Merge "lambda: Minor capture-variable/liberate-variable clean-up after post-merge reviews."
diff --git a/.gitignore b/.gitignore
index c4cf98b..4e806c6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 JIT_ART
+**/__pycache__/**
diff --git a/Android.mk b/Android.mk
index b8ba9f2..fcf70ff 100644
--- a/Android.mk
+++ b/Android.mk
@@ -89,6 +89,7 @@
 include $(art_path)/tools/Android.mk
 include $(art_path)/tools/ahat/Android.mk
 include $(art_path)/tools/dexfuzz/Android.mk
+include $(art_path)/tools/dmtracedump/Android.mk
 include $(art_path)/sigchainlib/Android.mk
 
 
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index b507124..cd9d18d 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -34,6 +34,10 @@
 ART_BUILD_HOST_NDEBUG ?= true
 ART_BUILD_HOST_DEBUG ?= true
 
+# Set this to change what opt level Art is built at.
+ART_DEBUG_OPT_FLAG ?= -O2
+ART_NDEBUG_OPT_FLAG ?= -O3
+
 # Enable the static builds only for checkbuilds.
 ifneq (,$(filter checkbuild,$(MAKECMDGOALS)))
   ART_BUILD_HOST_STATIC ?= true
@@ -110,7 +114,8 @@
 else
 ART_TARGET_CLANG := false
 endif
-ART_TARGET_CLANG_arm :=
+# b/25130937
+ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
 ART_TARGET_CLANG_mips64 :=
@@ -319,11 +324,11 @@
 
 # Cflags for non-debug ART and ART tools.
 art_non_debug_cflags := \
-  -O3
+  $(ART_NDEBUG_OPT_FLAG)
 
 # Cflags for debug ART and ART tools.
 art_debug_cflags := \
-  -O2 \
+  $(ART_DEBUG_OPT_FLAG) \
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
   -DVIXL_DEBUG \
   -UNDEBUG
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index a561c5f..c53479c 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -89,7 +89,11 @@
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
+ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
 # Classpath for Jack compilation: we only need core-libart.
-HOST_JACK_CLASSPATH   := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack)
-TARGET_JACK_CLASSPATH := $(abspath $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack)
+HOST_JACK_CLASSPATH_DEPENDENCIES   := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart-hostdex,t,COMMON)/classes.jack
+HOST_JACK_CLASSPATH                := $(foreach dep,$(HOST_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
+TARGET_JACK_CLASSPATH_DEPENDENCIES := $(call intermediates-dir-for,JAVA_LIBRARIES,core-libart, ,COMMON)/classes.jack
+TARGET_JACK_CLASSPATH              := $(foreach dep,$(TARGET_JACK_CLASSPATH_DEPENDENCIES),$(abspath $(dep)))
+endif
 endif # ART_ANDROID_COMMON_PATH_MK
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 2f43f5f..420db43 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -40,6 +40,9 @@
 # Do you want all tests, even those that are time consuming?
 ART_TEST_FULL ?= false
 
+# Do you want run-test to be quieter? run-tests will only show output if they fail.
+ART_TEST_QUIET ?= true
+
 # Do you want default compiler tests run?
 ART_TEST_DEFAULT_COMPILER ?= true
 
@@ -116,12 +119,25 @@
         || (echo -e "$(1) \e[91mFAILED\e[0m" >&2 )))
 endef
 
+ifeq ($(ART_TEST_QUIET),true)
+  ART_TEST_ANNOUNCE_PASS := ( true )
+  ART_TEST_ANNOUNCE_RUN := ( true )
+  ART_TEST_ANNOUNCE_SKIP_FAILURE := ( true )
+  ART_TEST_ANNOUNCE_SKIP_BROKEN := ( true )
+else
+  # Note the use of '=' and not ':=' is intentional since these are actually functions.
+  ART_TEST_ANNOUNCE_PASS = ( echo -e "$(1) \e[92mPASSED\e[0m" )
+  ART_TEST_ANNOUNCE_RUN = ( echo -e "$(1) \e[95mRUNNING\e[0m")
+  ART_TEST_ANNOUNCE_SKIP_FAILURE = ( echo -e "$(1) \e[93mSKIPPING DUE TO EARLIER FAILURE\e[0m" )
+  ART_TEST_ANNOUNCE_SKIP_BROKEN = ( echo -e "$(1) \e[93mSKIPPING BROKEN TEST\e[0m" )
+endif
+
 # Define the command run on test success. $(1) is the name of the test. Executed by the shell.
 # The command checks prints "PASSED" then checks to see if this was a top-level make target (e.g.
 # "mm test-art-host-oat-HelloWorld32"), if it was then it does nothing, otherwise it creates a file
 # to be printed in the passing test summary.
 define ART_TEST_PASSED
-  ( echo -e "$(1) \e[92mPASSED\e[0m" && \
+  ( $(call ART_TEST_ANNOUNCE_PASS,$(1)) && \
     (echo $(MAKECMDGOALS) | grep -q $(1) || \
       (mkdir -p $(ART_HOST_TEST_DIR)/passed/ && touch $(ART_HOST_TEST_DIR)/passed/$(1))))
 endef
@@ -150,11 +166,11 @@
 define ART_TEST_SKIP
   ((echo $(ART_TEST_KNOWN_BROKEN) | grep -q -v $(1) \
      && ([ ! -d $(ART_HOST_TEST_DIR)/failed/ ] || [ $(ART_TEST_KEEP_GOING) = true ])\
-     && echo -e "$(1) \e[95mRUNNING\e[0m") \
+     && $(call ART_TEST_ANNOUNCE_RUN,$(1)) ) \
    || ((mkdir -p $(ART_HOST_TEST_DIR)/skipped/ && touch $(ART_HOST_TEST_DIR)/skipped/$(1) \
      && ([ -d $(ART_HOST_TEST_DIR)/failed/ ] \
-       && echo -e "$(1) \e[93mSKIPPING DUE TO EARLIER FAILURE\e[0m") \
-     || echo -e "$(1) \e[93mSKIPPING BROKEN TEST\e[0m") && false))
+       && $(call ART_TEST_ANNOUNCE_SKIP_FAILURE,$(1)) ) \
+     || $(call ART_TEST_ANNOUNCE_SKIP_BROKEN,$(1)) ) && false))
 endef
 
 # Create a build rule to create the dex file for a test.
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 79f8f5e..953cfc0 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -18,6 +18,7 @@
 
 ART_CPPLINT := art/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
+ART_CPPLINT_FLAGS := --quiet
 ART_CPPLINT_SRC := $(shell find art -name "*.h" -o -name "*$(ART_CPP_EXTENSION)" | grep -v art/compiler/llvm/generated/ | grep -v art/runtime/elf\.h)
 
 # "mm cpplint-art" to verify we aren't regressing
@@ -39,8 +40,8 @@
 art_cpplint_touch := $$(OUT_CPPLINT)/$$(subst /,__,$$(art_cpplint_file))
 
 $$(art_cpplint_touch): $$(art_cpplint_file) $(ART_CPPLINT) art/build/Android.cpplint.mk
-	$(hide) $(ART_CPPLINT) $(ART_CPPLINT_FILTER) $$<
-	@mkdir -p $$(dir $$@)
+	$(hide) $(ART_CPPLINT) $(ART_CPPLINT_FLAGS) $(ART_CPPLINT_FILTER) $$<
+	$(hide) mkdir -p $$(dir $$@)
 	$(hide) touch $$@
 
 ART_CPPLINT_TARGETS += $$(art_cpplint_touch)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 9775f6a..ff41736 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -29,6 +29,7 @@
   GetMethodSignature \
   Instrumentation \
   Interfaces \
+  Lookup \
   Main \
   MultiDex \
   MultiDexModifiedSecondary \
@@ -65,6 +66,7 @@
 # Dex file dependencies for each gtest.
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
+ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
@@ -77,6 +79,7 @@
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
+ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
 
 # The elf writer test has dependencies on core.oat.
 ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
@@ -203,7 +206,6 @@
   runtime/interpreter/safe_math_test.cc \
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
-  runtime/jit/jit_code_cache_test.cc \
   runtime/lambda/closure_test.cc \
   runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
@@ -220,6 +222,7 @@
   runtime/reference_table_test.cc \
   runtime/thread_pool_test.cc \
   runtime/transaction_test.cc \
+  runtime/type_lookup_table_test.cc \
   runtime/utf_test.cc \
   runtime/utils_test.cc \
   runtime/verifier/method_verifier_test.cc \
@@ -230,6 +233,7 @@
   runtime/jni_internal_test.cc \
   runtime/proxy_test.cc \
   runtime/reflection_test.cc \
+  compiler/compiled_method_test.cc \
   compiler/dex/gvn_dead_code_elimination_test.cc \
   compiler/dex/global_value_numbering_test.cc \
   compiler/dex/local_value_numbering_test.cc \
@@ -237,6 +241,7 @@
   compiler/dex/mir_optimization_test.cc \
   compiler/dex/type_inference_test.cc \
   compiler/dwarf/dwarf_test.cc \
+  compiler/driver/compiled_method_storage_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
@@ -348,6 +353,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES_mips := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips) \
+  compiler/utils/mips/assembler_mips_test.cc \
 
 COMPILER_GTEST_HOST_SRC_FILES_mips64 := \
   $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 52df7de..f34b5ed 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -21,6 +21,7 @@
 #include "utils.h"
 #include <numeric>
 #include "gtest/gtest.h"
+#include "runtime/experimental_flags.h"
 
 #define EXPECT_NULL(expected) EXPECT_EQ(reinterpret_cast<const void*>(expected), \
                                         reinterpret_cast<void*>(nullptr));
@@ -529,22 +530,32 @@
   }
 }  // TEST_F
 
-/* -X[no]experimental-lambdas */
-TEST_F(CmdlineParserTest, TestExperimentalLambdas) {
+/* -Xexperimental:_ */
+TEST_F(CmdlineParserTest, TestExperimentalFlags) {
   // Off by default
-  EXPECT_SINGLE_PARSE_DEFAULT_VALUE(false,
+  EXPECT_SINGLE_PARSE_DEFAULT_VALUE(ExperimentalFlags::kNone,
                                     "",
-                                    M::ExperimentalLambdas);
+                                    M::Experimental);
 
   // Disabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(false,
-                            "-Xnoexperimental-lambdas",
-                            M::ExperimentalLambdas);
+  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kNone,
+                            "-Xexperimental:none",
+                            M::Experimental);
 
   // Enabled explicitly
-  EXPECT_SINGLE_PARSE_VALUE(true,
-                            "-Xexperimental-lambdas",
-                            M::ExperimentalLambdas);
+  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kLambdas,
+                            "-Xexperimental:lambdas",
+                            M::Experimental);
+  // Enabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kDefaultMethods,
+                            "-Xexperimental:default-methods",
+                            M::Experimental);
+
+  // Enabled both
+  EXPECT_SINGLE_PARSE_VALUE(ExperimentalFlags::kDefaultMethods | ExperimentalFlags::kLambdas,
+                            "-Xexperimental:default-methods "
+                            "-Xexperimental:lambdas",
+                            M::Experimental);
 }
 
 // -Xverify:_
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index a57b619..c594adb 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -28,6 +28,7 @@
 #include "jdwp/jdwp.h"
 #include "runtime/base/logging.h"
 #include "runtime/base/time_utils.h"
+#include "runtime/experimental_flags.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "profiler_options.h"
@@ -838,6 +839,23 @@
   static constexpr bool kCanParseBlankless = true;
 };
 
+template<>
+struct CmdlineType<ExperimentalFlags> : CmdlineTypeParser<ExperimentalFlags> {
+  Result ParseAndAppend(const std::string& option, ExperimentalFlags& existing) {
+    if (option == "none") {
+      existing = existing | ExperimentalFlags::kNone;
+    } else if (option == "lambdas") {
+      existing = existing | ExperimentalFlags::kLambdas;
+    } else if (option == "default-methods") {
+      existing = existing | ExperimentalFlags::kDefaultMethods;
+    } else {
+      return Result::Failure(std::string("Unknown option '") + option + "'");
+    }
+    return Result::SuccessNoValue();
+  }
+
+  static const char* Name() { return "ExperimentalFlags"; }
+};
 
 }  // namespace art
 #endif  // ART_CMDLINE_CMDLINE_TYPES_H_
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 96e13ac..e74a68f 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -54,6 +54,7 @@
 	dex/verification_results.cc \
 	dex/vreg_analysis.cc \
 	dex/quick_compiler_callbacks.cc \
+	driver/compiled_method_storage.cc \
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
@@ -66,6 +67,7 @@
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
 	optimizing/code_generator_utils.cc \
+	optimizing/constant_area_fixups_x86.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
 	optimizing/graph_checker.cc \
@@ -77,6 +79,7 @@
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
 	optimizing/licm.cc \
+	optimizing/load_store_elimination.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimization.cc \
@@ -86,13 +89,13 @@
 	optimizing/primitive_type_propagation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
+	optimizing/sharpening.cc \
 	optimizing/side_effects_analysis.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
 	optimizing/ssa_phi_elimination.cc \
 	optimizing/stack_map_stream.cc \
 	trampolines/trampoline_compiler.cc \
-	utils/arena_bit_vector.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
 	buffered_output_stream.cc \
@@ -151,6 +154,8 @@
 	dex/quick/mips/target_mips.cc \
 	dex/quick/mips/utility_mips.cc \
 	jni/quick/mips/calling_convention_mips.cc \
+	optimizing/code_generator_mips.cc \
+	optimizing/intrinsics_mips.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 
@@ -210,7 +215,8 @@
   dex/quick/arm64/arm64_lir.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \
-  dex/quick/mips/mips_lir.h
+  dex/quick/mips/mips_lir.h \
+  utils/mips/assembler_mips.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \
   $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips)
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 5e345db..6fd4575 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -51,7 +51,7 @@
     dwarf::WriteDebugFrameCIE(is64bit, dwarf::DW_EH_PE_absptr, dwarf::Reg(8),
                               initial_opcodes, kCFIFormat, &debug_frame_data_);
     std::vector<uintptr_t> debug_frame_patches;
-    dwarf::WriteDebugFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi,
+    dwarf::WriteDebugFrameFDE(is64bit, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
                               kCFIFormat, &debug_frame_data_, &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 1727657..c37ceca 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -32,6 +32,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object-inl.h"
+#include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 #include "utils.h"
@@ -53,22 +54,22 @@
                                                             method->GetDexMethodIndex()));
   }
   if (compiled_method != nullptr) {
-    const SwapVector<uint8_t>* code = compiled_method->GetQuickCode();
-    uint32_t code_size = code->size();
+    ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
+    uint32_t code_size = code.size();
     CHECK_NE(0u, code_size);
-    const SwapVector<uint8_t>* vmap_table = compiled_method->GetVmapTable();
-    uint32_t vmap_table_offset = vmap_table->empty() ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table->size();
-    const SwapVector<uint8_t>* mapping_table = compiled_method->GetMappingTable();
-    bool mapping_table_used = mapping_table != nullptr && !mapping_table->empty();
-    size_t mapping_table_size = mapping_table_used ? mapping_table->size() : 0U;
+    ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
+    uint32_t vmap_table_offset = vmap_table.empty() ? 0u
+        : sizeof(OatQuickMethodHeader) + vmap_table.size();
+    ArrayRef<const uint8_t> mapping_table = compiled_method->GetMappingTable();
+    bool mapping_table_used = !mapping_table.empty();
+    size_t mapping_table_size = mapping_table.size();
     uint32_t mapping_table_offset = !mapping_table_used ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table->size() + mapping_table_size;
-    const SwapVector<uint8_t>* gc_map = compiled_method->GetGcMap();
-    bool gc_map_used = gc_map != nullptr && !gc_map->empty();
-    size_t gc_map_size = gc_map_used ? gc_map->size() : 0U;
+        : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table_size;
+    ArrayRef<const uint8_t> gc_map = compiled_method->GetGcMap();
+    bool gc_map_used = !gc_map.empty();
+    size_t gc_map_size = gc_map.size();
     uint32_t gc_map_offset = !gc_map_used ? 0u
-        : sizeof(OatQuickMethodHeader) + vmap_table->size() + mapping_table_size + gc_map_size;
+        : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table_size + gc_map_size;
     OatQuickMethodHeader method_header(mapping_table_offset, vmap_table_offset, gc_map_offset,
                                        compiled_method->GetFrameSizeInBytes(),
                                        compiled_method->GetCoreSpillMask(),
@@ -76,25 +77,30 @@
 
     header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
     std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
-    size_t size = sizeof(method_header) + code_size + vmap_table->size() + mapping_table_size +
-        gc_map_size;
-    size_t code_offset = compiled_method->AlignCode(size - code_size);
-    size_t padding = code_offset - (size - code_size);
-    chunk->reserve(padding + size);
+    const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet());
+    const size_t size =
+        gc_map_size + mapping_table_size + vmap_table.size() + sizeof(method_header) + code_size;
+    chunk->reserve(size + max_padding);
     chunk->resize(sizeof(method_header));
     memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
-    chunk->insert(chunk->begin(), vmap_table->begin(), vmap_table->end());
+    chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
     if (mapping_table_used) {
-      chunk->insert(chunk->begin(), mapping_table->begin(), mapping_table->end());
+      chunk->insert(chunk->begin(), mapping_table.begin(), mapping_table.end());
     }
     if (gc_map_used) {
-      chunk->insert(chunk->begin(), gc_map->begin(), gc_map->end());
+      chunk->insert(chunk->begin(), gc_map.begin(), gc_map.end());
     }
+    chunk->insert(chunk->end(), code.begin(), code.end());
+    CHECK_EQ(chunk->size(), size);
+    const void* unaligned_code_ptr = chunk->data() + (size - code_size);
+    size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr));
+    size_t padding = compiled_method->AlignCode(offset) - offset;
+    // Make sure no resizing takes place.
+    CHECK_GE(chunk->capacity(), chunk->size() + padding);
     chunk->insert(chunk->begin(), padding, 0);
-    chunk->insert(chunk->end(), code->begin(), code->end());
-    CHECK_EQ(padding + size, chunk->size());
-    const void* code_ptr = &(*chunk)[code_offset];
-    MakeExecutable(code_ptr, code->size());
+    const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding;
+    CHECK_EQ(code_ptr, static_cast<const void*>(chunk->data() + (chunk->size() - code_size)));
+    MakeExecutable(code_ptr, code.size());
     const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                           compiled_method->GetInstructionSet());
     LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 74ef35e..9551d22 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -15,27 +15,22 @@
  */
 
 #include "compiled_method.h"
+
+#include "driver/compiled_method_storage.h"
 #include "driver/compiler_driver.h"
+#include "utils/swap_space.h"
 
 namespace art {
 
 CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-                           const ArrayRef<const uint8_t>& quick_code, bool owns_code_array)
-    : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
-      owns_code_array_(owns_code_array), quick_code_(nullptr) {
-  if (owns_code_array_) {
-    // If we are supposed to own the code, don't deduplicate it.
-    quick_code_ = new SwapVector<uint8_t>(quick_code.begin(), quick_code.end(),
-                                          compiler_driver_->GetSwapSpaceAllocator());
-  } else {
-    quick_code_ = compiler_driver_->DeduplicateCode(quick_code);
-  }
+                           const ArrayRef<const uint8_t>& quick_code)
+    : compiler_driver_(compiler_driver),
+      instruction_set_(instruction_set),
+      quick_code_(compiler_driver_->GetCompiledMethodStorage()->DeduplicateCode(quick_code)) {
 }
 
 CompiledCode::~CompiledCode() {
-  if (owns_code_array_) {
-    delete quick_code_;
-  }
+  compiler_driver_->GetCompiledMethodStorage()->ReleaseCode(quick_code_);
 }
 
 bool CompiledCode::operator==(const CompiledCode& rhs) const {
@@ -104,59 +99,28 @@
   }
 }
 
-const std::vector<uint32_t>& CompiledCode::GetOatdataOffsetsToCompliledCodeOffset() const {
-  CHECK_NE(0U, oatdata_offsets_to_compiled_code_offset_.size());
-  return oatdata_offsets_to_compiled_code_offset_;
-}
-
-void CompiledCode::AddOatdataOffsetToCompliledCodeOffset(uint32_t offset) {
-  oatdata_offsets_to_compiled_code_offset_.push_back(offset);
-}
-
 CompiledMethod::CompiledMethod(CompilerDriver* driver,
                                InstructionSet instruction_set,
                                const ArrayRef<const uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
-                               DefaultSrcMap* src_mapping_table,
+                               const ArrayRef<const SrcMapElem>& src_mapping_table,
                                const ArrayRef<const uint8_t>& mapping_table,
                                const ArrayRef<const uint8_t>& vmap_table,
                                const ArrayRef<const uint8_t>& native_gc_map,
                                const ArrayRef<const uint8_t>& cfi_info,
                                const ArrayRef<const LinkerPatch>& patches)
-    : CompiledCode(driver, instruction_set, quick_code, !driver->DedupeEnabled()),
-      owns_arrays_(!driver->DedupeEnabled()),
+    : CompiledCode(driver, instruction_set, quick_code),
       frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
       fp_spill_mask_(fp_spill_mask),
-      patches_(patches.begin(), patches.end(), driver->GetSwapSpaceAllocator()) {
-  if (owns_arrays_) {
-    if (src_mapping_table == nullptr) {
-      src_mapping_table_ = new SwapSrcMap(driver->GetSwapSpaceAllocator());
-    } else {
-      src_mapping_table_ = new SwapSrcMap(src_mapping_table->begin(), src_mapping_table->end(),
-                                          driver->GetSwapSpaceAllocator());
-    }
-    mapping_table_ = mapping_table.empty() ?
-        nullptr : new SwapVector<uint8_t>(mapping_table.begin(), mapping_table.end(),
-                                          driver->GetSwapSpaceAllocator());
-    vmap_table_ = new SwapVector<uint8_t>(vmap_table.begin(), vmap_table.end(),
-                                          driver->GetSwapSpaceAllocator());
-    gc_map_ = native_gc_map.empty() ? nullptr :
-        new SwapVector<uint8_t>(native_gc_map.begin(), native_gc_map.end(),
-                                driver->GetSwapSpaceAllocator());
-    cfi_info_ = cfi_info.empty() ? nullptr :
-        new SwapVector<uint8_t>(cfi_info.begin(), cfi_info.end(), driver->GetSwapSpaceAllocator());
-  } else {
-    src_mapping_table_ = src_mapping_table == nullptr ?
-        driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) :
-        driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(*src_mapping_table));
-    mapping_table_ = mapping_table.empty() ?
-        nullptr : driver->DeduplicateMappingTable(mapping_table);
-    vmap_table_ = driver->DeduplicateVMapTable(vmap_table);
-    gc_map_ = native_gc_map.empty() ? nullptr : driver->DeduplicateGCMap(native_gc_map);
-    cfi_info_ = cfi_info.empty() ? nullptr : driver->DeduplicateCFIInfo(cfi_info);
-  }
+      src_mapping_table_(
+          driver->GetCompiledMethodStorage()->DeduplicateSrcMappingTable(src_mapping_table)),
+      mapping_table_(driver->GetCompiledMethodStorage()->DeduplicateMappingTable(mapping_table)),
+      vmap_table_(driver->GetCompiledMethodStorage()->DeduplicateVMapTable(vmap_table)),
+      gc_map_(driver->GetCompiledMethodStorage()->DeduplicateGCMap(native_gc_map)),
+      cfi_info_(driver->GetCompiledMethodStorage()->DeduplicateCFIInfo(cfi_info)),
+      patches_(driver->GetCompiledMethodStorage()->DeduplicateLinkerPatches(patches)) {
 }
 
 CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
@@ -166,13 +130,13 @@
     const size_t frame_size_in_bytes,
     const uint32_t core_spill_mask,
     const uint32_t fp_spill_mask,
-    DefaultSrcMap* src_mapping_table,
+    const ArrayRef<const SrcMapElem>& src_mapping_table,
     const ArrayRef<const uint8_t>& mapping_table,
     const ArrayRef<const uint8_t>& vmap_table,
     const ArrayRef<const uint8_t>& native_gc_map,
     const ArrayRef<const uint8_t>& cfi_info,
     const ArrayRef<const LinkerPatch>& patches) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
+  SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator());
   CompiledMethod* ret = alloc.allocate(1);
   alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
                   fp_spill_mask, src_mapping_table, mapping_table, vmap_table, native_gc_map,
@@ -180,22 +144,20 @@
   return ret;
 }
 
-
-
 void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
+  SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator());
   alloc.destroy(m);
   alloc.deallocate(m, 1);
 }
 
 CompiledMethod::~CompiledMethod() {
-  if (owns_arrays_) {
-    delete src_mapping_table_;
-    delete mapping_table_;
-    delete vmap_table_;
-    delete gc_map_;
-    delete cfi_info_;
-  }
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  storage->ReleaseLinkerPatches(patches_);
+  storage->ReleaseCFIInfo(cfi_info_);
+  storage->ReleaseGCMap(gc_map_);
+  storage->ReleaseVMapTable(vmap_table_);
+  storage->ReleaseMappingTable(mapping_table_);
+  storage->ReleaseSrcMappingTable(src_mapping_table_);
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index a4d2387..15a4ba0 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -23,19 +23,20 @@
 
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "length_prefixed_array.h"
 #include "method_reference.h"
 #include "utils/array_ref.h"
-#include "utils/swap_space.h"
 
 namespace art {
 
 class CompilerDriver;
+class CompiledMethodStorage;
 
 class CompiledCode {
  public:
   // For Quick to supply an code blob
   CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-               const ArrayRef<const uint8_t>& quick_code, bool owns_code_array);
+               const ArrayRef<const uint8_t>& quick_code);
 
   virtual ~CompiledCode();
 
@@ -43,8 +44,8 @@
     return instruction_set_;
   }
 
-  const SwapVector<uint8_t>* GetQuickCode() const {
-    return quick_code_;
+  ArrayRef<const uint8_t> GetQuickCode() const {
+    return GetArray(quick_code_);
   }
 
   bool operator==(const CompiledCode& rhs) const;
@@ -66,41 +67,46 @@
   static const void* CodePointer(const void* code_pointer,
                                  InstructionSet instruction_set);
 
-  const std::vector<uint32_t>& GetOatdataOffsetsToCompliledCodeOffset() const;
-  void AddOatdataOffsetToCompliledCodeOffset(uint32_t offset);
+ protected:
+  template <typename T>
+  static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array) {
+    if (array == nullptr) {
+      return ArrayRef<const T>();
+    }
+    DCHECK_NE(array->size(), 0u);
+    return ArrayRef<const T>(&array->At(0), array->size());
+  }
+
+  CompilerDriver* GetCompilerDriver() {
+    return compiler_driver_;
+  }
 
  private:
   CompilerDriver* const compiler_driver_;
 
   const InstructionSet instruction_set_;
 
-  // If we own the code array (means that we free in destructor).
-  const bool owns_code_array_;
-
   // Used to store the PIC code for Quick.
-  SwapVector<uint8_t>* quick_code_;
-
-  // There are offsets from the oatdata symbol to where the offset to
-  // the compiled method will be found. These are computed by the
-  // OatWriter and then used by the ElfWriter to add relocations so
-  // that MCLinker can update the values to the location in the linked .so.
-  std::vector<uint32_t> oatdata_offsets_to_compiled_code_offset_;
+  const LengthPrefixedArray<uint8_t>* const quick_code_;
 };
 
 class SrcMapElem {
  public:
   uint32_t from_;
   int32_t to_;
-
-  // Lexicographical compare.
-  bool operator<(const SrcMapElem& other) const {
-    if (from_ != other.from_) {
-      return from_ < other.from_;
-    }
-    return to_ < other.to_;
-  }
 };
 
+inline bool operator<(const SrcMapElem& lhs, const SrcMapElem& rhs) {
+  if (lhs.from_ != rhs.from_) {
+    return lhs.from_ < rhs.from_;
+  }
+  return lhs.to_ < rhs.to_;
+}
+
+inline bool operator==(const SrcMapElem& lhs, const SrcMapElem& rhs) {
+  return lhs.from_ == rhs.from_ && lhs.to_ == rhs.to_;
+}
+
 template <class Allocator>
 class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> {
  public:
@@ -151,7 +157,6 @@
 };
 
 using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
-using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
 
 
 enum LinkerPatchType {
@@ -273,6 +278,9 @@
     uint32_t method_idx_;       // Method index for Call/Method patches.
     uint32_t type_idx_;         // Type index for Type patches.
     uint32_t element_offset_;   // Element offset in the dex cache arrays.
+    static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
+    static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
+    static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators");
   };
   union {
     uint32_t cmp2_;             // Used for relational operators.
@@ -313,7 +321,7 @@
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
-                 DefaultSrcMap* src_mapping_table,
+                 const ArrayRef<const SrcMapElem>& src_mapping_table,
                  const ArrayRef<const uint8_t>& mapping_table,
                  const ArrayRef<const uint8_t>& vmap_table,
                  const ArrayRef<const uint8_t>& native_gc_map,
@@ -329,7 +337,7 @@
       const size_t frame_size_in_bytes,
       const uint32_t core_spill_mask,
       const uint32_t fp_spill_mask,
-      DefaultSrcMap* src_mapping_table,
+      const ArrayRef<const SrcMapElem>& src_mapping_table,
       const ArrayRef<const uint8_t>& mapping_table,
       const ArrayRef<const uint8_t>& vmap_table,
       const ArrayRef<const uint8_t>& native_gc_map,
@@ -350,35 +358,31 @@
     return fp_spill_mask_;
   }
 
-  const SwapSrcMap& GetSrcMappingTable() const {
-    DCHECK(src_mapping_table_ != nullptr);
-    return *src_mapping_table_;
+  ArrayRef<const SrcMapElem> GetSrcMappingTable() const {
+    return GetArray(src_mapping_table_);
   }
 
-  SwapVector<uint8_t> const* GetMappingTable() const {
-    return mapping_table_;
+  ArrayRef<const uint8_t> GetMappingTable() const {
+    return GetArray(mapping_table_);
   }
 
-  const SwapVector<uint8_t>* GetVmapTable() const {
-    DCHECK(vmap_table_ != nullptr);
-    return vmap_table_;
+  ArrayRef<const uint8_t> GetVmapTable() const {
+    return GetArray(vmap_table_);
   }
 
-  SwapVector<uint8_t> const* GetGcMap() const {
-    return gc_map_;
+  ArrayRef<const uint8_t> GetGcMap() const {
+    return GetArray(gc_map_);
   }
 
-  const SwapVector<uint8_t>* GetCFIInfo() const {
-    return cfi_info_;
+  ArrayRef<const uint8_t> GetCFIInfo() const {
+    return GetArray(cfi_info_);
   }
 
   ArrayRef<const LinkerPatch> GetPatches() const {
-    return ArrayRef<const LinkerPatch>(patches_);
+    return GetArray(patches_);
   }
 
  private:
-  // Whether or not the arrays are owned by the compiled method or dedupe sets.
-  const bool owns_arrays_;
   // For quick code, the size of the activation used by the code.
   const size_t frame_size_in_bytes_;
   // For quick code, a bit mask describing spilled GPR callee-save registers.
@@ -386,19 +390,19 @@
   // For quick code, a bit mask describing spilled FPR callee-save registers.
   const uint32_t fp_spill_mask_;
   // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
-  SwapSrcMap* src_mapping_table_;
+  const LengthPrefixedArray<SrcMapElem>* const src_mapping_table_;
   // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to
   // native PC offset. Size prefixed.
-  SwapVector<uint8_t>* mapping_table_;
+  const LengthPrefixedArray<uint8_t>* const mapping_table_;
   // For quick code, a uleb128 encoded map from GPR/FPR register to dex register. Size prefixed.
-  SwapVector<uint8_t>* vmap_table_;
+  const LengthPrefixedArray<uint8_t>* const vmap_table_;
   // For quick code, a map keyed by native PC indices to bitmaps describing what dalvik registers
   // are live.
-  SwapVector<uint8_t>* gc_map_;
+  const LengthPrefixedArray<uint8_t>* const gc_map_;
   // For quick code, a FDE entry for the debug_frame section.
-  SwapVector<uint8_t>* cfi_info_;
+  const LengthPrefixedArray<uint8_t>* const cfi_info_;
   // For quick code, linker patches needed by the method.
-  const SwapVector<LinkerPatch> patches_;
+  const LengthPrefixedArray<LinkerPatch>* const patches_;
 };
 
 }  // namespace art
diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc
new file mode 100644
index 0000000..99ee875
--- /dev/null
+++ b/compiler/compiled_method_test.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "compiled_method.h"
+
+namespace art {
+
+TEST(CompiledMethod, SrcMapElemOperators) {
+  SrcMapElem elems[] = {
+      { 1u, -1 },
+      { 1u, 0 },
+      { 1u, 1 },
+      { 2u, -1 },
+      { 2u, 0 },    // Index 4.
+      { 2u, 1 },
+      { 2u, 0u },   // Index 6: Arbitrarily add identical SrcMapElem with index 4.
+  };
+
+  for (size_t i = 0; i != arraysize(elems); ++i) {
+    for (size_t j = 0; j != arraysize(elems); ++j) {
+      bool expected = (i != 6u ? i : 4u) == (j != 6u ? j : 4u);
+      EXPECT_EQ(expected, elems[i] == elems[j]) << i << " " << j;
+    }
+  }
+
+  for (size_t i = 0; i != arraysize(elems); ++i) {
+    for (size_t j = 0; j != arraysize(elems); ++j) {
+      bool expected = (i != 6u ? i : 4u) < (j != 6u ? j : 4u);
+      EXPECT_EQ(expected, elems[i] < elems[j]) << i << " " << j;
+    }
+  }
+}
+
+TEST(CompiledMethod, LinkerPatchOperators) {
+  const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1);
+  const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2);
+  LinkerPatch patches[] = {
+      LinkerPatch::MethodPatch(16u, dex_file1, 1000u),
+      LinkerPatch::MethodPatch(16u, dex_file1, 1001u),
+      LinkerPatch::MethodPatch(16u, dex_file2, 1000u),
+      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // Index 3.
+      LinkerPatch::CodePatch(16u, dex_file1, 1000u),
+      LinkerPatch::CodePatch(16u, dex_file1, 1001u),
+      LinkerPatch::CodePatch(16u, dex_file2, 1000u),
+      LinkerPatch::CodePatch(16u, dex_file2, 1001u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file1, 1000u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file1, 1001u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file2, 1000u),
+      LinkerPatch::RelativeCodePatch(16u, dex_file2, 1001u),
+      LinkerPatch::TypePatch(16u, dex_file1, 1000u),
+      LinkerPatch::TypePatch(16u, dex_file1, 1001u),
+      LinkerPatch::TypePatch(16u, dex_file2, 1000u),
+      LinkerPatch::TypePatch(16u, dex_file2, 1001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u),
+      LinkerPatch::MethodPatch(32u, dex_file1, 1000u),
+      LinkerPatch::MethodPatch(32u, dex_file1, 1001u),
+      LinkerPatch::MethodPatch(32u, dex_file2, 1000u),
+      LinkerPatch::MethodPatch(32u, dex_file2, 1001u),
+      LinkerPatch::CodePatch(32u, dex_file1, 1000u),
+      LinkerPatch::CodePatch(32u, dex_file1, 1001u),
+      LinkerPatch::CodePatch(32u, dex_file2, 1000u),
+      LinkerPatch::CodePatch(32u, dex_file2, 1001u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file1, 1000u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file1, 1001u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file2, 1000u),
+      LinkerPatch::RelativeCodePatch(32u, dex_file2, 1001u),
+      LinkerPatch::TypePatch(32u, dex_file1, 1000u),
+      LinkerPatch::TypePatch(32u, dex_file1, 1001u),
+      LinkerPatch::TypePatch(32u, dex_file2, 1000u),
+      LinkerPatch::TypePatch(32u, dex_file2, 1001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u),
+      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u),
+      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // identical with patch as index 3.
+  };
+  constexpr size_t last_index = arraysize(patches) - 1u;
+
+  for (size_t i = 0; i != arraysize(patches); ++i) {
+    for (size_t j = 0; j != arraysize(patches); ++j) {
+      bool expected = (i != last_index ? i : 3u) == (j != last_index ? j : 3u);
+      EXPECT_EQ(expected, patches[i] == patches[j]) << i << " " << j;
+    }
+  }
+
+  for (size_t i = 0; i != arraysize(patches); ++i) {
+    for (size_t j = 0; j != arraysize(patches); ++j) {
+      bool expected = (i != last_index ? i : 3u) < (j != last_index ? j : 3u);
+      EXPECT_EQ(expected, patches[i] < patches[j]) << i << " " << j;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 9b4dbe0..8788dc1 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -77,9 +77,8 @@
    * information.
    * @note This is used for backtrace information in generated code.
    */
-  virtual std::vector<uint8_t>* GetCallFrameInformationInitialization(const CompilerDriver& driver)
-      const {
-    UNUSED(driver);
+  virtual std::vector<uint8_t>* GetCallFrameInformationInitialization(
+      const CompilerDriver& driver ATTRIBUTE_UNUSED) const {
     return nullptr;
   }
 
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index ff7ddc1..4836041 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -356,7 +356,7 @@
         0,
         0,
         0,
-        nullptr,                                     // src_mapping_table
+        ArrayRef<const SrcMapElem>(),                // src_mapping_table
         ArrayRef<const uint8_t>(),                   // mapping_table
         ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
         ArrayRef<const uint8_t>(),                   // gc_map
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 4de3410..445859c 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -18,6 +18,7 @@
 
 #include "gvn_dead_code_elimination.h"
 
+#include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/macros.h"
 #include "base/allocator.h"
@@ -26,7 +27,6 @@
 #include "dex_instruction.h"
 #include "dex/mir_graph.h"
 #include "local_value_numbering.h"
-#include "utils/arena_bit_vector.h"
 
 namespace art {
 
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index bd00690..f98969e 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -797,6 +797,10 @@
   }
 }
 
+static constexpr int64_t shift_minus_1(size_t by) {
+  return static_cast<int64_t>(static_cast<uint64_t>(INT64_C(-1)) << by);
+}
+
 TEST_F(LocalValueNumberingTest, ConstWide) {
   static const MIRDef mirs[] = {
       // Core reg constants.
@@ -804,45 +808,45 @@
       DEF_CONST(Instruction::CONST_WIDE_16, 2u, 1),
       DEF_CONST(Instruction::CONST_WIDE_16, 4u, -1),
       DEF_CONST(Instruction::CONST_WIDE_32, 6u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 8u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 8u, shift_minus_1(16)),
       DEF_CONST(Instruction::CONST_WIDE_32, 10u, (1 << 16) + 1),
       DEF_CONST(Instruction::CONST_WIDE_32, 12u, (1 << 16) - 1),
       DEF_CONST(Instruction::CONST_WIDE_32, 14u, -(1 << 16) + 1),
       DEF_CONST(Instruction::CONST_WIDE_32, 16u, -(1 << 16) - 1),
       DEF_CONST(Instruction::CONST_WIDE, 18u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 20u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 20u, shift_minus_1(32)),
       DEF_CONST(Instruction::CONST_WIDE, 22u, (INT64_C(1) << 32) + 1),
       DEF_CONST(Instruction::CONST_WIDE, 24u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 26u, (INT64_C(-1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 28u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 26u, shift_minus_1(32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 28u, shift_minus_1(32) - 1),
       DEF_CONST(Instruction::CONST_WIDE_HIGH16, 30u, 1),       // Effectively 1 << 48.
       DEF_CONST(Instruction::CONST_WIDE_HIGH16, 32u, 0xffff),  // Effectively -1 << 48.
       DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(1) << 48) + 1),
       DEF_CONST(Instruction::CONST_WIDE, 36u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(-1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 38u, shift_minus_1(48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 40u, shift_minus_1(48) - 1),
       // FP reg constants.
       DEF_CONST(Instruction::CONST_WIDE_16, 42u, 0),
       DEF_CONST(Instruction::CONST_WIDE_16, 44u, 1),
       DEF_CONST(Instruction::CONST_WIDE_16, 46u, -1),
       DEF_CONST(Instruction::CONST_WIDE_32, 48u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 50u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 50u, shift_minus_1(16)),
       DEF_CONST(Instruction::CONST_WIDE_32, 52u, (1 << 16) + 1),
       DEF_CONST(Instruction::CONST_WIDE_32, 54u, (1 << 16) - 1),
       DEF_CONST(Instruction::CONST_WIDE_32, 56u, -(1 << 16) + 1),
       DEF_CONST(Instruction::CONST_WIDE_32, 58u, -(1 << 16) - 1),
       DEF_CONST(Instruction::CONST_WIDE, 60u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 62u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 62u, shift_minus_1(32)),
       DEF_CONST(Instruction::CONST_WIDE, 64u, (INT64_C(1) << 32) + 1),
       DEF_CONST(Instruction::CONST_WIDE, 66u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 68u, (INT64_C(-1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 70u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 68u, shift_minus_1(32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 70u, shift_minus_1(32) - 1),
       DEF_CONST(Instruction::CONST_WIDE_HIGH16, 72u, 1),       // Effectively 1 << 48.
       DEF_CONST(Instruction::CONST_WIDE_HIGH16, 74u, 0xffff),  // Effectively -1 << 48.
       DEF_CONST(Instruction::CONST_WIDE, 76u, (INT64_C(1) << 48) + 1),
       DEF_CONST(Instruction::CONST_WIDE, 78u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 80u, (INT64_C(-1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 82u, (INT64_C(-1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 80u, shift_minus_1(48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 82u, shift_minus_1(48) - 1),
   };
 
   PrepareMIRs(mirs);
@@ -868,7 +872,7 @@
       DEF_CONST(Instruction::CONST_4, 1u, 1),
       DEF_CONST(Instruction::CONST_4, 2u, -1),
       DEF_CONST(Instruction::CONST_16, 3u, 1 << 4),
-      DEF_CONST(Instruction::CONST_16, 4u, -1 << 4),
+      DEF_CONST(Instruction::CONST_16, 4u, shift_minus_1(4)),
       DEF_CONST(Instruction::CONST_16, 5u, (1 << 4) + 1),
       DEF_CONST(Instruction::CONST_16, 6u, (1 << 4) - 1),
       DEF_CONST(Instruction::CONST_16, 7u, -(1 << 4) + 1),
@@ -877,14 +881,14 @@
       DEF_CONST(Instruction::CONST_HIGH16, 10u, 0xffff),  // Effectively -1 << 16.
       DEF_CONST(Instruction::CONST, 11u, (1 << 16) + 1),
       DEF_CONST(Instruction::CONST, 12u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST, 13u, (-1 << 16) + 1),
-      DEF_CONST(Instruction::CONST, 14u, (-1 << 16) - 1),
+      DEF_CONST(Instruction::CONST, 13u, shift_minus_1(16) + 1),
+      DEF_CONST(Instruction::CONST, 14u, shift_minus_1(16) - 1),
       // FP reg constants.
       DEF_CONST(Instruction::CONST_4, 15u, 0),
       DEF_CONST(Instruction::CONST_4, 16u, 1),
       DEF_CONST(Instruction::CONST_4, 17u, -1),
       DEF_CONST(Instruction::CONST_16, 18u, 1 << 4),
-      DEF_CONST(Instruction::CONST_16, 19u, -1 << 4),
+      DEF_CONST(Instruction::CONST_16, 19u, shift_minus_1(4)),
       DEF_CONST(Instruction::CONST_16, 20u, (1 << 4) + 1),
       DEF_CONST(Instruction::CONST_16, 21u, (1 << 4) - 1),
       DEF_CONST(Instruction::CONST_16, 22u, -(1 << 4) + 1),
@@ -893,8 +897,8 @@
       DEF_CONST(Instruction::CONST_HIGH16, 25u, 0xffff),  // Effectively -1 << 16.
       DEF_CONST(Instruction::CONST, 26u, (1 << 16) + 1),
       DEF_CONST(Instruction::CONST, 27u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST, 28u, (-1 << 16) + 1),
-      DEF_CONST(Instruction::CONST, 29u, (-1 << 16) - 1),
+      DEF_CONST(Instruction::CONST, 28u, shift_minus_1(16) + 1),
+      DEF_CONST(Instruction::CONST, 29u, shift_minus_1(16) - 1),
       // null reference constant.
       DEF_CONST(Instruction::CONST_4, 30u, 0),
   };
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 4efe4af..b0972d9 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -511,9 +511,8 @@
 
 /* Process instructions with the kSwitch flag */
 BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
-                                       int width, int flags,
+                                       int width, int flags ATTRIBUTE_UNUSED,
                                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  UNUSED(flags);
   const uint16_t* switch_data =
       reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset +
           static_cast<int32_t>(insn->dalvikInsn.vB));
@@ -592,11 +591,15 @@
 }
 
 /* Process instructions with the kThrow flag */
-BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
-                                      int width, int flags, ArenaBitVector* try_block_addr,
-                                      const uint16_t* code_ptr, const uint16_t* code_end,
+BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block,
+                                      MIR* insn,
+                                      DexOffset cur_offset,
+                                      int width,
+                                      int flags ATTRIBUTE_UNUSED,
+                                      ArenaBitVector* try_block_addr,
+                                      const uint16_t* code_ptr,
+                                      const uint16_t* code_end,
                                       ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  UNUSED(flags);
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
   bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 097abdc..2da8a98 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "base/arena_bit_vector.h"
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/scoped_arena_containers.h"
@@ -30,7 +31,6 @@
 #include "mir_method_info.h"
 #include "reg_location.h"
 #include "reg_storage.h"
-#include "utils/arena_bit_vector.h"
 
 namespace art {
 
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index 0def056..16414ef 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -53,10 +53,7 @@
    * @param data the PassDataHolder.
    * @return whether or not to execute the pass.
    */
-  virtual bool Gate(const PassDataHolder* data) const {
-    // Unused parameter.
-    UNUSED(data);
-
+  virtual bool Gate(const PassDataHolder* data ATTRIBUTE_UNUSED) const {
     // Base class says yes.
     return true;
   }
@@ -64,17 +61,13 @@
   /**
    * @brief Start of the pass: called before the Worker function.
    */
-  virtual void Start(PassDataHolder* data) const {
-    // Unused parameter.
-    UNUSED(data);
+  virtual void Start(PassDataHolder* data ATTRIBUTE_UNUSED) const {
   }
 
   /**
    * @brief End of the pass: called after the WalkBasicBlocks function.
    */
-  virtual void End(PassDataHolder* data) const {
-    // Unused parameter.
-    UNUSED(data);
+  virtual void End(PassDataHolder* data ATTRIBUTE_UNUSED) const {
   }
 
   /**
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index 8762b53..34a6f63 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -125,8 +125,7 @@
    * @brief Dispatch a patch.
    * Gives the ability to add logic when running the patch.
    */
-  virtual void DispatchPass(const Pass* pass) {
-    UNUSED(pass);
+  virtual void DispatchPass(const Pass* pass ATTRIBUTE_UNUSED) {
   }
 
   /** @brief List of passes: provides the order to execute the passes.
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index db76cc6..b2bd6fa 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -216,8 +216,7 @@
 
 void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                  RegisterClass dest_reg_class) {
-  UNUSED(dest_reg_class);
+                                  RegisterClass dest_reg_class ATTRIBUTE_UNUSED) {
   // TODO: Generalize the IT below to accept more than one-instruction loads.
   DCHECK(InexpensiveConstantInt(true_val));
   DCHECK(InexpensiveConstantInt(false_val));
@@ -239,8 +238,7 @@
   OpEndIT(it);
 }
 
-void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+void ArmMir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
@@ -516,9 +514,8 @@
 };
 
 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
-bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
+bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED, bool is_div,
                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(dalvik_opcode);
   if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
     return false;
   }
@@ -728,16 +725,19 @@
   return true;
 }
 
-RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                  RegLocation rl_src2, bool is_div, int flags) {
-  UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
+RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                  RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                  RegLocation rl_src2 ATTRIBUTE_UNUSED,
+                                  bool is_div ATTRIBUTE_UNUSED,
+                                  int flags ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
   UNREACHABLE();
 }
 
-RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
-                                     bool is_div) {
-  UNUSED(rl_dest, rl_src1, lit, is_div);
+RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                     RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                     int lit ATTRIBUTE_UNUSED,
+                                     bool is_div ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
   UNREACHABLE();
 }
@@ -1160,9 +1160,8 @@
 }
 
 void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
-                                               RegLocation rl_result, int lit,
+                                               RegLocation rl_result, int lit ATTRIBUTE_UNUSED,
                                                int first_bit, int second_bit) {
-  UNUSED(lit);
   OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
                    EncodeShift(kArmLsl, second_bit - first_bit));
   if (first_bit != 0) {
@@ -1257,9 +1256,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+void ArmMir2Lir::GenMulLong(Instruction::Code opcode ATTRIBUTE_UNUSED, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
-  UNUSED(opcode);
   /*
    * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
    * dest     = src1.lo * src2.lo;
@@ -1564,8 +1562,7 @@
 
 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
-                                   int flags) {
-  UNUSED(flags);
+                                   int flags ATTRIBUTE_UNUSED) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 5f27338..355485e 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -987,8 +987,7 @@
   return count;
 }
 
-void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
   DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
   RegLocation rl_src[3];
   RegLocation rl_dest = mir_graph_->GetBadLoc();
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 062f7af..c31f46b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -419,20 +419,26 @@
   return OpRegRegShift(op, r_dest_src1, r_src2, 0);
 }
 
-LIR* ArmMir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
-  UNUSED(r_dest, r_base, offset, move_type);
+LIR* ArmMir2Lir::OpMovRegMem(RegStorage r_dest ATTRIBUTE_UNUSED,
+                             RegStorage r_base ATTRIBUTE_UNUSED,
+                             int offset ATTRIBUTE_UNUSED,
+                             MoveType move_type ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
 }
 
-LIR* ArmMir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
-  UNUSED(r_base, offset, r_src, move_type);
+LIR* ArmMir2Lir::OpMovMemReg(RegStorage r_base ATTRIBUTE_UNUSED,
+                             int offset ATTRIBUTE_UNUSED,
+                             RegStorage r_src ATTRIBUTE_UNUSED,
+                             MoveType move_type ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
 }
 
-LIR* ArmMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
-  UNUSED(op, cc, r_dest, r_src);
+LIR* ArmMir2Lir::OpCondRegReg(OpKind op ATTRIBUTE_UNUSED,
+                              ConditionCode cc ATTRIBUTE_UNUSED,
+                              RegStorage r_dest ATTRIBUTE_UNUSED,
+                              RegStorage r_src ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
   UNREACHABLE();
 }
@@ -1243,14 +1249,17 @@
   return res;
 }
 
-LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
-  UNUSED(op, r_base, disp);
+LIR* ArmMir2Lir::OpMem(OpKind op ATTRIBUTE_UNUSED,
+                       RegStorage r_base ATTRIBUTE_UNUSED,
+                       int disp ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpMem for Arm";
   UNREACHABLE();
 }
 
-LIR* ArmMir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
-  UNUSED(trampoline);  // The address of the trampoline is already loaded into r_tgt.
+LIR* ArmMir2Lir::InvokeTrampoline(OpKind op,
+                                  RegStorage r_tgt,
+                                  // The address of the trampoline is already loaded into r_tgt.
+                                  QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
   return OpReg(op, r_tgt);
 }
 
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 31cf667..d92dea2 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -37,14 +37,12 @@
   return OpCondBranch(cond, target);
 }
 
-LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
-  UNUSED(ccode, guide);
+LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode ATTRIBUTE_UNUSED, const char* guide ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpIT for Arm64";
   UNREACHABLE();
 }
 
-void Arm64Mir2Lir::OpEndIT(LIR* it) {
-  UNUSED(it);
+void Arm64Mir2Lir::OpEndIT(LIR* it ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
 }
 
@@ -188,8 +186,7 @@
   GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
 }
 
-void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+void Arm64Mir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
   // rl_src may be aliased with rl_result/rl_dest, so do compare early.
@@ -413,9 +410,11 @@
 };
 
 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
-bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                      RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(dalvik_opcode);
+bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
+                                      bool is_div,
+                                      RegLocation rl_src,
+                                      RegLocation rl_dest,
+                                      int lit) {
   if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
     return false;
   }
@@ -457,9 +456,11 @@
   return true;
 }
 
-bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
-                                        RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
-  UNUSED(dalvik_opcode);
+bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
+                                        bool is_div,
+                                        RegLocation rl_src,
+                                        RegLocation rl_dest,
+                                        int64_t lit) {
   if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
     return false;
   }
@@ -599,15 +600,17 @@
   return true;
 }
 
-bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(rl_src, rl_dest, lit);
+bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src ATTRIBUTE_UNUSED,
+                                RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                int lit ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
   UNREACHABLE();
 }
 
-RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
-                                       bool is_div) {
-  UNUSED(rl_dest, rl_src1, lit, is_div);
+RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                       RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                       int lit ATTRIBUTE_UNUSED,
+                                       bool is_div ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
   UNREACHABLE();
 }
@@ -626,9 +629,11 @@
   return rl_result;
 }
 
-RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                    RegLocation rl_src2, bool is_div, int flags) {
-  UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                    RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                    RegLocation rl_src2 ATTRIBUTE_UNUSED,
+                                    bool is_div ATTRIBUTE_UNUSED,
+                                    int flags ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
   UNREACHABLE();
 }
@@ -963,14 +968,12 @@
   dex_cache_access_insns_.push_back(ldr);
 }
 
-LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
-  UNUSED(r_base, count);
+LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
   UNREACHABLE();
 }
 
-LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
-  UNUSED(r_base, count);
+LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
   UNREACHABLE();
 }
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 6efa11e..691bfd9 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -881,8 +881,7 @@
   return count;
 }
 
-void Arm64Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+void Arm64Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
   DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
   RegLocation rl_src[3];
   RegLocation rl_dest = mir_graph_->GetBadLoc();
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 483231f..58769ea 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -672,22 +672,26 @@
   }
 }
 
-LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset,
-                               MoveType move_type) {
-  UNUSED(r_dest, r_base, offset, move_type);
+LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest ATTRIBUTE_UNUSED,
+                               RegStorage r_base ATTRIBUTE_UNUSED,
+                               int offset ATTRIBUTE_UNUSED,
+                               MoveType move_type ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
 }
 
-LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src,
-                               MoveType move_type) {
-  UNUSED(r_base, offset, r_src, move_type);
+LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base ATTRIBUTE_UNUSED,
+                               int offset ATTRIBUTE_UNUSED,
+                               RegStorage r_src ATTRIBUTE_UNUSED,
+                               MoveType move_type ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
   return nullptr;
 }
 
-LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
-  UNUSED(op, cc, r_dest, r_src);
+LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op ATTRIBUTE_UNUSED,
+                                ConditionCode cc ATTRIBUTE_UNUSED,
+                                RegStorage r_dest ATTRIBUTE_UNUSED,
+                                RegStorage r_src ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
   UNREACHABLE();
 }
@@ -1381,14 +1385,15 @@
   return store;
 }
 
-LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  UNUSED(r_dest, r_src);
+LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest ATTRIBUTE_UNUSED,
+                               RegStorage r_src ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
   UNREACHABLE();
 }
 
-LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
-  UNUSED(op, r_base, disp);
+LIR* Arm64Mir2Lir::OpMem(OpKind op ATTRIBUTE_UNUSED,
+                         RegStorage r_base ATTRIBUTE_UNUSED,
+                         int disp ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpMem for Arm64";
   UNREACHABLE();
 }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index d5ac341..d68835a 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -16,7 +16,13 @@
 
 #include "mir_to_lir-inl.h"
 
+// Mac does not provide endian.h, so we'll use byte order agnostic code.
+#ifndef __APPLE__
+#include <endian.h>
+#endif
+
 #include "base/bit_vector-inl.h"
+#include "base/stringprintf.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -841,7 +847,7 @@
         references_buffer[i] = static_cast<uint8_t>(
             raw_storage[i / sizeof(raw_storage[0])] >> (8u * (i % sizeof(raw_storage[0]))));
       }
-      native_gc_map_builder.AddEntry(native_offset, &references_buffer[0]);
+      native_gc_map_builder.AddEntry(native_offset, references_buffer.data());
       prev_mir = mir;
     }
   }
@@ -987,8 +993,7 @@
 }
 
 /* Set up special LIR to mark a Dalvik byte-code instruction start for pretty printing */
-void Mir2Lir::MarkBoundary(DexOffset offset, const char* inst_str) {
-  UNUSED(offset);
+void Mir2Lir::MarkBoundary(DexOffset offset ATTRIBUTE_UNUSED, const char* inst_str) {
   // NOTE: only used for debug listings.
   NewLIR1(kPseudoDalvikByteCodeBoundary, WrapPointer(ArenaStrdup(inst_str)));
 }
@@ -1161,7 +1166,7 @@
       cu_->compiler_driver, cu_->instruction_set,
       ArrayRef<const uint8_t>(code_buffer_),
       frame_size_, core_spill_mask_, fp_spill_mask_,
-      &src_mapping_table_,
+      ArrayRef<const SrcMapElem>(src_mapping_table_),
       ArrayRef<const uint8_t>(encoded_mapping_table_),
       ArrayRef<const uint8_t>(vmap_encoder.GetData()),
       ArrayRef<const uint8_t>(native_gc_map_),
@@ -1353,8 +1358,8 @@
   return loc;
 }
 
-void Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb, mir);
+void Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb ATTRIBUTE_UNUSED,
+                                                  MIR* mir ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unknown MIR opcode not supported on this architecture";
   UNREACHABLE();
 }
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index af93aab..eaf2408 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -73,6 +73,7 @@
     false,  // kIntrinsicUnsafeGet
     false,  // kIntrinsicUnsafePut
     true,   // kIntrinsicSystemArrayCopyCharArray
+    true,   // kIntrinsicSystemArrayCopy
 };
 static_assert(arraysize(kIntrinsicIsStatic) == kInlineOpNop,
               "arraysize of kIntrinsicIsStatic unexpected");
@@ -121,6 +122,8 @@
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopy],
+              "SystemArrayCopy must be static");
 
 MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke) {
   MIR* insn = mir_graph->NewMIR();
@@ -326,6 +329,9 @@
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
         kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
+    // kProtoCacheObjectIObjectII_V
+    { kClassCacheVoid, 5, {kClassCacheJavaLangObject, kClassCacheInt,
+        kClassCacheJavaLangObject, kClassCacheInt, kClassCacheInt} },
     // kProtoCacheIICharArrayI_V
     { kClassCacheVoid, 4, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray,
         kClassCacheInt } },
@@ -481,6 +487,8 @@
 
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
+    INTRINSIC(JavaLangSystem, ArrayCopy, ObjectIObjectII_V , kIntrinsicSystemArrayCopy,
+              0),
 
     INTRINSIC(JavaLangInteger, RotateRight, II_I, kIntrinsicRotateRight, k32),
     INTRINSIC(JavaLangLong, RotateRight, JI_J, kIntrinsicRotateRight, k64),
@@ -653,6 +661,7 @@
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
     case kIntrinsicRotateLeft:
+    case kIntrinsicSystemArrayCopy:
       return false;   // not implemented in quick.
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
@@ -747,14 +756,7 @@
     return *class_index;
   }
 
-  const DexFile::StringId* string_id = dex_file->FindStringId(kClassCacheNames[index]);
-  if (string_id == nullptr) {
-    *class_index = kIndexNotFound;
-    return *class_index;
-  }
-  uint32_t string_index = dex_file->GetIndexForStringId(*string_id);
-
-  const DexFile::TypeId* type_id = dex_file->FindTypeId(string_index);
+  const DexFile::TypeId* type_id = dex_file->FindTypeId(kClassCacheNames[index]);
   if (type_id == nullptr) {
     *class_index = kIndexNotFound;
     return *class_index;
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 8458806..5ce110c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -259,6 +259,7 @@
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
       kProtoCacheCharArrayICharArrayII_V,
+      kProtoCacheObjectIObjectII_V,
       kProtoCacheIICharArrayI_V,
       kProtoCacheByteArrayIII_String,
       kProtoCacheIICharArray_String,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2a1d644..2b60a51 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -2102,15 +2102,15 @@
 }
 
 /* Call out to helper assembly routine that will null check obj and then lock it. */
-void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  UNUSED(opt_flags);  // TODO: avoid null check with specialized non-null helper.
+void Mir2Lir::GenMonitorEnter(int opt_flags ATTRIBUTE_UNUSED, RegLocation rl_src) {
+  // TODO: avoid null check with specialized non-null helper.
   FlushAllRegs();
   CallRuntimeHelperRegLocation(kQuickLockObject, rl_src, true);
 }
 
 /* Call out to helper assembly routine that will null check obj and then unlock it. */
-void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  UNUSED(opt_flags);  // TODO: avoid null check with specialized non-null helper.
+void Mir2Lir::GenMonitorExit(int opt_flags ATTRIBUTE_UNUSED, RegLocation rl_src) {
+  // TODO: avoid null check with specialized non-null helper.
   FlushAllRegs();
   CallRuntimeHelperRegLocation(kQuickUnlockObject, rl_src, true);
 }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 3c5c2fe..422d82f 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -521,10 +521,9 @@
  * kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
-                         int state, const MethodReference& target_method,
+                         int state, const MethodReference& target_method ATTRIBUTE_UNUSED,
                          uint32_t method_idx, uintptr_t, uintptr_t,
                          InvokeType) {
-  UNUSED(target_method);
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
   /*
    * This is the fast path in which the target virtual method is
@@ -607,10 +606,12 @@
   return state + 1;
 }
 
-static int NextInvokeInsnSP(CompilationUnit* cu, CallInfo* info,
-                            QuickEntrypointEnum trampoline, int state,
-                            const MethodReference& target_method, uint32_t method_idx) {
-  UNUSED(info, method_idx);
+static int NextInvokeInsnSP(CompilationUnit* cu,
+                            CallInfo* info ATTRIBUTE_UNUSED,
+                            QuickEntrypointEnum trampoline,
+                            int state,
+                            const MethodReference& target_method,
+                            uint32_t method_idx ATTRIBUTE_UNUSED) {
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
 
   /*
@@ -1266,35 +1267,31 @@
   return true;
 }
 
-bool Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
+bool Mir2Lir::GenInlinedReverseBits(CallInfo* info ATTRIBUTE_UNUSED, OpSize size ATTRIBUTE_UNUSED) {
   // Currently implemented only for ARM64.
-  UNUSED(info, size);
   return false;
 }
 
-bool Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
+bool Mir2Lir::GenInlinedMinMaxFP(CallInfo* info ATTRIBUTE_UNUSED,
+                                 bool is_min ATTRIBUTE_UNUSED,
+                                 bool is_double ATTRIBUTE_UNUSED) {
   // Currently implemented only for ARM64.
-  UNUSED(info, is_min, is_double);
   return false;
 }
 
-bool Mir2Lir::GenInlinedCeil(CallInfo* info) {
-  UNUSED(info);
+bool Mir2Lir::GenInlinedCeil(CallInfo* info ATTRIBUTE_UNUSED) {
   return false;
 }
 
-bool Mir2Lir::GenInlinedFloor(CallInfo* info) {
-  UNUSED(info);
+bool Mir2Lir::GenInlinedFloor(CallInfo* info ATTRIBUTE_UNUSED) {
   return false;
 }
 
-bool Mir2Lir::GenInlinedRint(CallInfo* info) {
-  UNUSED(info);
+bool Mir2Lir::GenInlinedRint(CallInfo* info ATTRIBUTE_UNUSED) {
   return false;
 }
 
-bool Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
-  UNUSED(info, is_double);
+bool Mir2Lir::GenInlinedRound(CallInfo* info ATTRIBUTE_UNUSED, bool is_double ATTRIBUTE_UNUSED) {
   return false;
 }
 
@@ -1328,8 +1325,7 @@
   return true;
 }
 
-bool Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
-  UNUSED(info);
+bool Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info ATTRIBUTE_UNUSED) {
   return false;
 }
 
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
index 3e9fb96..c425fc8 100644
--- a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
@@ -28,8 +28,8 @@
 // When we are generating the CFI code, we do not know the instuction offsets,
 // this class stores the LIR references and patches the instruction stream later.
 class LazyDebugFrameOpCodeWriter FINAL
-    : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> {
-  typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base;
+    : public DebugFrameOpCodeWriter<ArenaVector<uint8_t>> {
+  typedef DebugFrameOpCodeWriter<ArenaVector<uint8_t>> Base;
  public:
   // This method is implicitely called the by opcode writers.
   virtual void ImplicitlyAdvancePC() OVERRIDE {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 8863c05..4a736f3d 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -32,9 +32,10 @@
 
 namespace art {
 
-bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special) {
+bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb ATTRIBUTE_UNUSED,
+                                 MIR* mir ATTRIBUTE_UNUSED,
+                                 const InlineMethod& special ATTRIBUTE_UNUSED) {
   // TODO
-  UNUSED(bb, mir, special);
   return false;
 }
 
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 45fd1a9..52706df 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -115,17 +115,17 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                             int32_t constant) {
+void MipsMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                             RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                             int32_t constant ATTRIBUTE_UNUSED) {
   // TODO: need mips implementation.
-  UNUSED(rl_dest, rl_src1, constant);
   LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in mips";
 }
 
-void MipsMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                              int64_t constant) {
+void MipsMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                              RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                              int64_t constant ATTRIBUTE_UNUSED) {
   // TODO: need mips implementation.
-  UNUSED(rl_dest, rl_src1, constant);
   LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in mips";
 }
 
@@ -254,8 +254,10 @@
   StoreValue(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) {
-  UNUSED(bb, mir, gt_bias, is_double);
+void MipsMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb ATTRIBUTE_UNUSED,
+                                      MIR* mir ATTRIBUTE_UNUSED,
+                                      bool gt_bias ATTRIBUTE_UNUSED,
+                                      bool is_double ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Need codegen for fused fp cmp branch";
 }
 
@@ -288,9 +290,10 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-bool MipsMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
+bool MipsMir2Lir::GenInlinedMinMax(CallInfo* info ATTRIBUTE_UNUSED,
+                                   bool is_min ATTRIBUTE_UNUSED,
+                                   bool is_long ATTRIBUTE_UNUSED) {
   // TODO: need Mips implementation.
-  UNUSED(info, is_min, is_long);
   return false;
 }
 
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 1099303..8ca53ea 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -279,8 +279,7 @@
 
 void MipsMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
                                    int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                                   RegisterClass dest_reg_class) {
-  UNUSED(dest_reg_class);
+                                   RegisterClass dest_reg_class ATTRIBUTE_UNUSED) {
   // Implement as a branch-over.
   // TODO: Conditional move?
   LoadConstant(rs_dest, true_val);
@@ -290,13 +289,12 @@
   ne_branchover->target = target_label;
 }
 
-void MipsMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb, mir);
+void MipsMir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Need codegen for select";
 }
 
-void MipsMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb, mir);
+void MipsMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb ATTRIBUTE_UNUSED,
+                                        MIR* mir ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Need codegen for fused long cmp branch";
 }
 
@@ -327,39 +325,40 @@
   return rl_result;
 }
 
-RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                                   bool is_div, int flags) {
-  UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
+RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                   RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                   RegLocation rl_src2 ATTRIBUTE_UNUSED,
+                                   bool is_div ATTRIBUTE_UNUSED,
+                                   int flags ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Mips";
   UNREACHABLE();
 }
 
-RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
-                                      bool is_div) {
-  UNUSED(rl_dest, rl_src1, lit, is_div);
+RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                      RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                      int lit ATTRIBUTE_UNUSED,
+                                      bool is_div ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for Mips";
   UNREACHABLE();
 }
 
-bool MipsMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
-  UNUSED(info, is_long, is_object);
+bool MipsMir2Lir::GenInlinedCas(CallInfo* info ATTRIBUTE_UNUSED,
+                                bool is_long ATTRIBUTE_UNUSED,
+                                bool is_object ATTRIBUTE_UNUSED) {
   return false;
 }
 
-bool MipsMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
-  UNUSED(info);
+bool MipsMir2Lir::GenInlinedAbsFloat(CallInfo* info ATTRIBUTE_UNUSED) {
   // TODO: add Mips implementation.
   return false;
 }
 
-bool MipsMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
-  UNUSED(info);
+bool MipsMir2Lir::GenInlinedAbsDouble(CallInfo* info ATTRIBUTE_UNUSED) {
   // TODO: add Mips implementation.
   return false;
 }
 
-bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info) {
-  UNUSED(info);
+bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info ATTRIBUTE_UNUSED) {
   return false;
 }
 
@@ -408,27 +407,26 @@
   return true;
 }
 
-void MipsMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
-  UNUSED(reg, target);
+void MipsMir2Lir::OpPcRelLoad(RegStorage reg ATTRIBUTE_UNUSED, LIR* target ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips";
   UNREACHABLE();
 }
 
-LIR* MipsMir2Lir::OpVldm(RegStorage r_base, int count) {
-  UNUSED(r_base, count);
+LIR* MipsMir2Lir::OpVldm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpVldm for Mips";
   UNREACHABLE();
 }
 
-LIR* MipsMir2Lir::OpVstm(RegStorage r_base, int count) {
-  UNUSED(r_base, count);
+LIR* MipsMir2Lir::OpVstm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpVstm for Mips";
   UNREACHABLE();
 }
 
-void MipsMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                                int first_bit, int second_bit) {
-  UNUSED(lit);
+void MipsMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
+                                                RegLocation rl_result,
+                                                int lit ATTRIBUTE_UNUSED,
+                                                int first_bit,
+                                                int second_bit) {
   RegStorage t_reg = AllocTemp();
   OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
@@ -462,27 +460,28 @@
   return OpCmpImmBranch(c_code, reg, 0, target);
 }
 
-bool MipsMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit);
-  LOG(FATAL) << "Unexpected use of smallLiteralDive in Mips";
+bool MipsMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
+                                     bool is_div ATTRIBUTE_UNUSED,
+                                     RegLocation rl_src ATTRIBUTE_UNUSED,
+                                     RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                     int lit ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unexpected use of smallLiteralDivRem in Mips";
   UNREACHABLE();
 }
 
-bool MipsMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(rl_src, rl_dest, lit);
+bool MipsMir2Lir::EasyMultiply(RegLocation rl_src ATTRIBUTE_UNUSED,
+                               RegLocation rl_dest ATTRIBUTE_UNUSED,
+                               int lit ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of easyMultiply in Mips";
   UNREACHABLE();
 }
 
-LIR* MipsMir2Lir::OpIT(ConditionCode cond, const char* guide) {
-  UNUSED(cond, guide);
+LIR* MipsMir2Lir::OpIT(ConditionCode cond ATTRIBUTE_UNUSED, const char* guide ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpIT in Mips";
   UNREACHABLE();
 }
 
-void MipsMir2Lir::OpEndIT(LIR* it) {
-  UNUSED(it);
+void MipsMir2Lir::OpEndIT(LIR* it ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpEndIT in Mips";
 }
 
@@ -621,9 +620,12 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                RegLocation rl_src2, bool is_div, int flags) {
-  UNUSED(opcode);
+void MipsMir2Lir::GenDivRemLong(Instruction::Code opcode ATTRIBUTE_UNUSED,
+                                RegLocation rl_dest,
+                                RegLocation rl_src1,
+                                RegLocation rl_src2,
+                                bool is_div,
+                                int flags) {
   // TODO: Implement easy div/rem?
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
@@ -855,9 +857,11 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                    RegLocation rl_src1, RegLocation rl_shift, int flags) {
-  UNUSED(flags);
+void MipsMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
+                                    RegLocation rl_dest,
+                                    RegLocation rl_src1,
+                                    RegLocation rl_shift,
+                                    int flags ATTRIBUTE_UNUSED) {
   if (!cu_->target64) {
     // Default implementation is just to ignore the constant case.
     GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index ec2475a..372fe2b 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -103,18 +103,15 @@
   return ((value == 0) || IsUint<16>(value) || IsInt<16>(value));
 }
 
-bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value) {
-  UNUSED(value);
+bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value ATTRIBUTE_UNUSED) {
   return false;  // TUNING
 }
 
-bool MipsMir2Lir::InexpensiveConstantLong(int64_t value) {
-  UNUSED(value);
+bool MipsMir2Lir::InexpensiveConstantLong(int64_t value ATTRIBUTE_UNUSED) {
   return false;  // TUNING
 }
 
-bool MipsMir2Lir::InexpensiveConstantDouble(int64_t value) {
-  UNUSED(value);
+bool MipsMir2Lir::InexpensiveConstantDouble(int64_t value ATTRIBUTE_UNUSED) {
   return false;  // TUNING
 }
 
@@ -520,21 +517,26 @@
   return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
 }
 
-LIR* MipsMir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset,
-                              MoveType move_type) {
-  UNUSED(r_dest, r_base, offset, move_type);
+LIR* MipsMir2Lir::OpMovRegMem(RegStorage r_dest ATTRIBUTE_UNUSED,
+                              RegStorage r_base ATTRIBUTE_UNUSED,
+                              int offset ATTRIBUTE_UNUSED,
+                              MoveType move_type ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
 }
 
-LIR* MipsMir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) {
-  UNUSED(r_base, offset, r_src, move_type);
+LIR* MipsMir2Lir::OpMovMemReg(RegStorage r_base ATTRIBUTE_UNUSED,
+                              int offset ATTRIBUTE_UNUSED,
+                              RegStorage r_src ATTRIBUTE_UNUSED,
+                              MoveType move_type ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
 }
 
-LIR* MipsMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
-  UNUSED(op, cc, r_dest, r_src);
+LIR* MipsMir2Lir::OpCondRegReg(OpKind op ATTRIBUTE_UNUSED,
+                               ConditionCode cc ATTRIBUTE_UNUSED,
+                               RegStorage r_dest ATTRIBUTE_UNUSED,
+                               RegStorage r_src ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for MIPS";
   UNREACHABLE();
 }
@@ -1031,14 +1033,14 @@
   return store;
 }
 
-LIR* MipsMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
-  UNUSED(op, r_base, disp);
+LIR* MipsMir2Lir::OpMem(OpKind op ATTRIBUTE_UNUSED,
+                        RegStorage r_base ATTRIBUTE_UNUSED,
+                        int disp ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpMem for MIPS";
   UNREACHABLE();
 }
 
-LIR* MipsMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
-  UNUSED(cc, target);
+LIR* MipsMir2Lir::OpCondBranch(ConditionCode cc ATTRIBUTE_UNUSED, LIR* target ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpCondBranch for MIPS";
   UNREACHABLE();
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c50246d..8da3863 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1411,8 +1411,7 @@
       rl.ref ? RefCheck::kCheckRef : RefCheck::kCheckNotRef, FPCheck::kIgnoreFP, fail, report);
 }
 
-size_t Mir2Lir::GetInstructionOffset(LIR* lir) {
-  UNUSED(lir);
+size_t Mir2Lir::GetInstructionOffset(LIR* lir ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Unsupported GetInstructionOffset()";
   UNREACHABLE();
 }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4e3aab2..a0db1e8 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1463,8 +1463,7 @@
     virtual bool InexpensiveConstantFloat(int32_t value) = 0;
     virtual bool InexpensiveConstantLong(int64_t value) = 0;
     virtual bool InexpensiveConstantDouble(int64_t value) = 0;
-    virtual bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
-      UNUSED(opcode);
+    virtual bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode ATTRIBUTE_UNUSED) {
       return InexpensiveConstantInt(value);
     }
 
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index 18c2e55..24daf2f 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -67,7 +67,6 @@
       false,
       false,
       nullptr,
-      new PassManagerOptions(),
       nullptr,
       false);
     VerificationResults verification_results(&compiler_options);
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 1cd742a..6673ea8 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -37,6 +37,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "elf_writer_quick.h"
+#include "experimental_flags.h"
 #include "jni/quick/jni_compiler.h"
 #include "mir_to_lir.h"
 #include "mirror/object.h"
@@ -523,7 +524,8 @@
     // All opcodes are supported no matter what. Usually not the case
     // since experimental opcodes are not implemented in the quick compiler.
     return true;
-  } else if (LIKELY(!Runtime::Current()->AreExperimentalLambdasEnabled())) {
+  } else if (LIKELY(!Runtime::Current()->
+                      AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas))) {
     // Experimental opcodes are disabled.
     //
     // If all unsupported opcodes are experimental we don't need to do scanning.
@@ -849,8 +851,8 @@
       InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
 }
 
-Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) {
-  UNUSED(compilation_unit);
+Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu,
+                                         void* compilation_unit ATTRIBUTE_UNUSED) {
   Mir2Lir* mir_to_lir = nullptr;
   switch (cu->instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 8ec86fa..dceb118 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -18,6 +18,7 @@
 
 #include "mir_to_lir-inl.h"
 
+#include "base/stringprintf.h"
 #include "dex/compiler_ir.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/mir_graph.h"
@@ -320,15 +321,13 @@
 }
 
 // TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
-RegStorage Mir2Lir::AllocPreservedDouble(int s_reg) {
-  UNUSED(s_reg);
+RegStorage Mir2Lir::AllocPreservedDouble(int s_reg ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedDouble";
   UNREACHABLE();
 }
 
 // TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
-RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
-  UNUSED(s_reg);
+RegStorage Mir2Lir::AllocPreservedSingle(int s_reg ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedSingle";
   UNREACHABLE();
 }
@@ -1553,8 +1552,7 @@
   return (lowSreg == INVALID_SREG) ? INVALID_SREG : lowSreg + 1;
 }
 
-bool Mir2Lir::LiveOut(int s_reg) {
-  UNUSED(s_reg);
+bool Mir2Lir::LiveOut(int s_reg ATTRIBUTE_UNUSED) {
   // For now.
   return true;
 }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 12523ac..e5d3841 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -21,6 +21,7 @@
 #include "dex/compiler_ir.h"
 #include "dex/quick/mir_to_lir.h"
 #include "oat.h"
+#include "oat_quick_method_header.h"
 #include "utils.h"
 #include "x86_lir.h"
 
@@ -1629,8 +1630,8 @@
  * instruction.  In those cases we will try to substitute a new code
  * sequence or request that the trace be shortened and retried.
  */
-AssemblerStatus X86Mir2Lir::AssembleInstructions(LIR* first_lir_insn, CodeOffset start_addr) {
-  UNUSED(start_addr);
+AssemblerStatus X86Mir2Lir::AssembleInstructions(LIR* first_lir_insn,
+                                                 CodeOffset start_addr ATTRIBUTE_UNUSED) {
   LIR *lir;
   AssemblerStatus res = kSuccess;  // Assume success
 
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 8e81746..b11d41c 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -124,17 +124,17 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
-                                            int32_t constant) {
+void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                            RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                            int32_t constant ATTRIBUTE_UNUSED) {
   // TODO: need x86 implementation.
-  UNUSED(rl_dest, rl_src1, constant);
   LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
 }
 
-void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
-                                             int64_t constant) {
+void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                             RegLocation rl_src1 ATTRIBUTE_UNUSED,
+                                             int64_t constant ATTRIBUTE_UNUSED) {
   // TODO: need x86 implementation.
-  UNUSED(rl_dest, rl_src1, constant);
   LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
 }
 
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index ecd23e9..a8706c3 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -270,8 +270,7 @@
   }
 }
 
-void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+void X86Mir2Lir::GenSelect(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir) {
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
@@ -597,8 +596,10 @@
   shift = (is_long) ? p - 64 : p - 32;
 }
 
-RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
-  UNUSED(rl_dest, reg_lo, lit, is_div);
+RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                     RegStorage reg_lo ATTRIBUTE_UNUSED,
+                                     int lit ATTRIBUTE_UNUSED,
+                                     bool is_div ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
   UNREACHABLE();
 }
@@ -766,16 +767,19 @@
   return rl_result;
 }
 
-RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
-                                  bool is_div) {
-  UNUSED(rl_dest, reg_lo, reg_hi, is_div);
+RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                  RegStorage reg_lo ATTRIBUTE_UNUSED,
+                                  RegStorage reg_hi ATTRIBUTE_UNUSED,
+                                  bool is_div ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
   UNREACHABLE();
 }
 
-RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                  RegLocation rl_src2, bool is_div, int flags) {
-  UNUSED(rl_dest);
+RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                  RegLocation rl_src1,
+                                  RegLocation rl_src2,
+                                  bool is_div,
+                                  int flags) {
   // We have to use fixed registers, so flush all the temps.
 
   // Prepare for explicit register usage.
@@ -1449,22 +1453,21 @@
   }
 }
 
-LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
-  UNUSED(r_base, count);
+LIR* X86Mir2Lir::OpVldm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpVldm for x86";
   UNREACHABLE();
 }
 
-LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
-  UNUSED(r_base, count);
+LIR* X86Mir2Lir::OpVstm(RegStorage r_base ATTRIBUTE_UNUSED, int count ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpVstm for x86";
   UNREACHABLE();
 }
 
 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
-                                               RegLocation rl_result, int lit,
-                                               int first_bit, int second_bit) {
-  UNUSED(lit);
+                                               RegLocation rl_result,
+                                               int lit ATTRIBUTE_UNUSED,
+                                               int first_bit,
+                                               int second_bit) {
   RegStorage t_reg = AllocTemp();
   OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
   OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
@@ -1595,27 +1598,28 @@
   return OpCondBranch(c_code, target);
 }
 
-bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit);
-  LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
+bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode ATTRIBUTE_UNUSED,
+                                    bool is_div ATTRIBUTE_UNUSED,
+                                    RegLocation rl_src ATTRIBUTE_UNUSED,
+                                    RegLocation rl_dest ATTRIBUTE_UNUSED,
+                                    int lit ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unexpected use of smallLiteralDivRem in x86";
   UNREACHABLE();
 }
 
-bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
-  UNUSED(rl_src, rl_dest, lit);
+bool X86Mir2Lir::EasyMultiply(RegLocation rl_src ATTRIBUTE_UNUSED,
+                              RegLocation rl_dest ATTRIBUTE_UNUSED,
+                              int lit ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of easyMultiply in x86";
   UNREACHABLE();
 }
 
-LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
-  UNUSED(cond, guide);
+LIR* X86Mir2Lir::OpIT(ConditionCode cond ATTRIBUTE_UNUSED, const char* guide ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpIT in x86";
   UNREACHABLE();
 }
 
-void X86Mir2Lir::OpEndIT(LIR* it) {
-  UNUSED(it);
+void X86Mir2Lir::OpEndIT(LIR* it ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of OpEndIT in x86";
   UNREACHABLE();
 }
@@ -1634,8 +1638,10 @@
   }
 }
 
-void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
-  UNUSED(sreg);
+void X86Mir2Lir::GenImulMemImm(RegStorage dest,
+                               int sreg ATTRIBUTE_UNUSED,
+                               int displacement,
+                               int val) {
   // All memory accesses below reference dalvik regs.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
 
@@ -2548,9 +2554,11 @@
   }
 }
 
-RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                          RegLocation rl_src, int shift_amount, int flags) {
-  UNUSED(flags);
+RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
+                                          RegLocation rl_dest,
+                                          RegLocation rl_src,
+                                          int shift_amount,
+                                          int flags ATTRIBUTE_UNUSED) {
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   if (cu_->target64) {
     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index d9571c5..e977ebf 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -50,7 +50,6 @@
         false,
         false,
         nullptr,
-        new PassManagerOptions(),
         nullptr,
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index c62cd47..75f3fef 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -31,6 +31,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
 #include "oat.h"
+#include "oat_quick_method_header.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -254,8 +255,7 @@
                        : RegStorage32FromSpecialTargetRegister_Target32[reg];
 }
 
-RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
-  UNUSED(reg);
+RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Do not use this function!!!";
   UNREACHABLE();
 }
@@ -861,8 +861,7 @@
 }
 
 // Not used in x86(-64)
-RegStorage X86Mir2Lir::LoadHelper(QuickEntrypointEnum trampoline) {
-  UNUSED(trampoline);
+RegStorage X86Mir2Lir::LoadHelper(QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unexpected use of LoadHelper in x86";
   UNREACHABLE();
 }
@@ -2323,13 +2322,11 @@
   }
 }
 
-void X86Mir2Lir::GenPackedArrayGet(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb, mir);
+void X86Mir2Lir::GenPackedArrayGet(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Extended opcode kMirOpPackedArrayGet not supported.";
 }
 
-void X86Mir2Lir::GenPackedArrayPut(BasicBlock* bb, MIR* mir) {
-  UNUSED(bb, mir);
+void X86Mir2Lir::GenPackedArrayPut(BasicBlock* bb ATTRIBUTE_UNUSED, MIR* mir ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Extended opcode kMirOpPackedArrayPut not supported.";
 }
 
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index b16ae98..61354df 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -57,8 +57,7 @@
   return res;
 }
 
-bool X86Mir2Lir::InexpensiveConstantInt(int32_t value) {
-  UNUSED(value);
+bool X86Mir2Lir::InexpensiveConstantInt(int32_t value ATTRIBUTE_UNUSED) {
   return true;
 }
 
@@ -66,8 +65,7 @@
   return value == 0;
 }
 
-bool X86Mir2Lir::InexpensiveConstantLong(int64_t value) {
-  UNUSED(value);
+bool X86Mir2Lir::InexpensiveConstantLong(int64_t value ATTRIBUTE_UNUSED) {
   return true;
 }
 
@@ -942,9 +940,14 @@
   return store;
 }
 
-LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target, LIR** compare) {
-  UNUSED(temp_reg);  // Comparison performed directly with memory.
+LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond,
+                                   // Comparison performed directly with memory.
+                                   RegStorage temp_reg ATTRIBUTE_UNUSED,
+                                   RegStorage base_reg,
+                                   int offset,
+                                   int check_value,
+                                   LIR* target,
+                                   LIR** compare) {
   LIR* inst = NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(),
       offset, check_value);
   if (compare != nullptr) {
@@ -1114,8 +1117,11 @@
   return loc;
 }
 
-LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
-  UNUSED(r_tgt);  // Call to absolute memory location doesn't need a temporary target register.
+LIR* X86Mir2Lir::InvokeTrampoline(OpKind op,
+                                  // Call to absolute memory location doesn't
+                                  // need a temporary target register.
+                                  RegStorage r_tgt ATTRIBUTE_UNUSED,
+                                  QuickEntrypointEnum trampoline) {
   if (cu_->target64) {
     return OpThreadMem(op, GetThreadOffset<8>(trampoline));
   } else {
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 6f2b234..65b0ad6 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -34,7 +34,6 @@
       verified_methods_(),
       rejected_classes_lock_("compiler rejected classes lock"),
       rejected_classes_() {
-  UNUSED(compiler_options);
 }
 
 VerificationResults::~VerificationResults() {
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
new file mode 100644
index 0000000..bc5c6ca
--- /dev/null
+++ b/compiler/driver/compiled_method_storage.cc
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <ostream>
+
+#include "compiled_method_storage.h"
+
+#include "base/logging.h"
+#include "compiled_method.h"
+#include "thread-inl.h"
+#include "utils.h"
+#include "utils/dedupe_set-inl.h"
+#include "utils/swap_space.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+template <typename T>
+const LengthPrefixedArray<T>* CopyArray(SwapSpace* swap_space, const ArrayRef<const T>& array) {
+  DCHECK(!array.empty());
+  SwapAllocator<uint8_t> allocator(swap_space);
+  void* storage = allocator.allocate(LengthPrefixedArray<T>::ComputeSize(array.size()));
+  LengthPrefixedArray<T>* array_copy = new(storage) LengthPrefixedArray<T>(array.size());
+  std::copy(array.begin(), array.end(), array_copy->begin());
+  return array_copy;
+}
+
+template <typename T>
+void ReleaseArray(SwapSpace* swap_space, const LengthPrefixedArray<T>* array) {
+  SwapAllocator<uint8_t> allocator(swap_space);
+  size_t size = LengthPrefixedArray<T>::ComputeSize(array->size());
+  array->~LengthPrefixedArray<T>();
+  allocator.deallocate(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(array)), size);
+}
+
+}  // anonymous namespace
+
+template <typename T, typename DedupeSetType>
+inline const LengthPrefixedArray<T>* CompiledMethodStorage::AllocateOrDeduplicateArray(
+    const ArrayRef<const T>& data,
+    DedupeSetType* dedupe_set) {
+  if (data.empty()) {
+    return nullptr;
+  } else if (!DedupeEnabled()) {
+    return CopyArray(swap_space_.get(), data);
+  } else {
+    return dedupe_set->Add(Thread::Current(), data);
+  }
+}
+
+template <typename T>
+inline void CompiledMethodStorage::ReleaseArrayIfNotDeduplicated(
+    const LengthPrefixedArray<T>* array) {
+  if (array != nullptr && !DedupeEnabled()) {
+    ReleaseArray(swap_space_.get(), array);
+  }
+}
+
+template <typename ContentType>
+class CompiledMethodStorage::DedupeHashFunc {
+ private:
+  static constexpr bool kUseMurmur3Hash = true;
+
+ public:
+  size_t operator()(const ArrayRef<ContentType>& array) const {
+    const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
+    // TODO: More reasonable assertion.
+    // static_assert(IsPowerOfTwo(sizeof(ContentType)),
+    //    "ContentType is not power of two, don't know whether array layout is as assumed");
+    uint32_t len = sizeof(ContentType) * array.size();
+    if (kUseMurmur3Hash) {
+      static constexpr uint32_t c1 = 0xcc9e2d51;
+      static constexpr uint32_t c2 = 0x1b873593;
+      static constexpr uint32_t r1 = 15;
+      static constexpr uint32_t r2 = 13;
+      static constexpr uint32_t m = 5;
+      static constexpr uint32_t n = 0xe6546b64;
+
+      uint32_t hash = 0;
+
+      const int nblocks = len / 4;
+      typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+      const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data);
+      int i;
+      for (i = 0; i < nblocks; i++) {
+        uint32_t k = blocks[i];
+        k *= c1;
+        k = (k << r1) | (k >> (32 - r1));
+        k *= c2;
+
+        hash ^= k;
+        hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
+      }
+
+      const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
+      uint32_t k1 = 0;
+
+      switch (len & 3) {
+        case 3:
+          k1 ^= tail[2] << 16;
+          FALLTHROUGH_INTENDED;
+        case 2:
+          k1 ^= tail[1] << 8;
+          FALLTHROUGH_INTENDED;
+        case 1:
+          k1 ^= tail[0];
+
+          k1 *= c1;
+          k1 = (k1 << r1) | (k1 >> (32 - r1));
+          k1 *= c2;
+          hash ^= k1;
+      }
+
+      hash ^= len;
+      hash ^= (hash >> 16);
+      hash *= 0x85ebca6b;
+      hash ^= (hash >> 13);
+      hash *= 0xc2b2ae35;
+      hash ^= (hash >> 16);
+
+      return hash;
+    } else {
+      size_t hash = 0x811c9dc5;
+      for (uint32_t i = 0; i < len; ++i) {
+        hash = (hash * 16777619) ^ data[i];
+      }
+      hash += hash << 13;
+      hash ^= hash >> 7;
+      hash += hash << 3;
+      hash ^= hash >> 17;
+      hash += hash << 5;
+      return hash;
+    }
+  }
+};
+
+template <typename T>
+class CompiledMethodStorage::LengthPrefixedArrayAlloc {
+ public:
+  explicit LengthPrefixedArrayAlloc(SwapSpace* swap_space)
+      : swap_space_(swap_space) {
+  }
+
+  const LengthPrefixedArray<T>* Copy(const ArrayRef<const T>& array) {
+    return CopyArray(swap_space_, array);
+  }
+
+  void Destroy(const LengthPrefixedArray<T>* array) {
+    ReleaseArray(swap_space_, array);
+  }
+
+ private:
+  SwapSpace* const swap_space_;
+};
+
+CompiledMethodStorage::CompiledMethodStorage(int swap_fd)
+    : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
+      dedupe_enabled_(true),
+      dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_src_mapping_table_("dedupe source mapping table",
+                                LengthPrefixedArrayAlloc<SrcMapElem>(swap_space_.get())),
+      dedupe_mapping_table_("dedupe mapping table",
+                            LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_vmap_table_("dedupe vmap table",
+                         LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_gc_map_("dedupe gc map", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
+      dedupe_linker_patches_("dedupe cfi info",
+                             LengthPrefixedArrayAlloc<LinkerPatch>(swap_space_.get())) {
+}
+
+CompiledMethodStorage::~CompiledMethodStorage() {
+  // All done by member destructors.
+}
+
+void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const {
+  if (swap_space_.get() != nullptr) {
+    os << " swap=" << PrettySize(swap_space_->GetSize());
+  }
+  if (extended) {
+    Thread* self = Thread::Current();
+    os << "\nCode dedupe: " << dedupe_code_.DumpStats(self);
+    os << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats(self);
+    os << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats(self);
+    os << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats(self);
+    os << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats(self);
+  }
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCode(
+    const ArrayRef<const uint8_t>& code) {
+  return AllocateOrDeduplicateArray(code, &dedupe_code_);
+}
+
+void CompiledMethodStorage::ReleaseCode(const LengthPrefixedArray<uint8_t>* code) {
+  ReleaseArrayIfNotDeduplicated(code);
+}
+
+const LengthPrefixedArray<SrcMapElem>* CompiledMethodStorage::DeduplicateSrcMappingTable(
+    const ArrayRef<const SrcMapElem>& src_map) {
+  return AllocateOrDeduplicateArray(src_map, &dedupe_src_mapping_table_);
+}
+
+void CompiledMethodStorage::ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map) {
+  ReleaseArrayIfNotDeduplicated(src_map);
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateMappingTable(
+    const ArrayRef<const uint8_t>& table) {
+  return AllocateOrDeduplicateArray(table, &dedupe_mapping_table_);
+}
+
+void CompiledMethodStorage::ReleaseMappingTable(const LengthPrefixedArray<uint8_t>* table) {
+  ReleaseArrayIfNotDeduplicated(table);
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
+    const ArrayRef<const uint8_t>& table) {
+  return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_);
+}
+
+void CompiledMethodStorage::ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table) {
+  ReleaseArrayIfNotDeduplicated(table);
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateGCMap(
+    const ArrayRef<const uint8_t>& gc_map) {
+  return AllocateOrDeduplicateArray(gc_map, &dedupe_gc_map_);
+}
+
+void CompiledMethodStorage::ReleaseGCMap(const LengthPrefixedArray<uint8_t>* gc_map) {
+  ReleaseArrayIfNotDeduplicated(gc_map);
+}
+
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo(
+    const ArrayRef<const uint8_t>& cfi_info) {
+  return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_);
+}
+
+void CompiledMethodStorage::ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info) {
+  ReleaseArrayIfNotDeduplicated(cfi_info);
+}
+
+const LengthPrefixedArray<LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
+    const ArrayRef<const LinkerPatch>& linker_patches) {
+  return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_);
+}
+
+void CompiledMethodStorage::ReleaseLinkerPatches(
+    const LengthPrefixedArray<LinkerPatch>* linker_patches) {
+  ReleaseArrayIfNotDeduplicated(linker_patches);
+}
+
+}  // namespace art
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
new file mode 100644
index 0000000..ef10b67
--- /dev/null
+++ b/compiler/driver/compiled_method_storage.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
+#define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
+
+#include <iosfwd>
+#include <memory>
+
+#include "base/macros.h"
+#include "length_prefixed_array.h"
+#include "utils/array_ref.h"
+#include "utils/dedupe_set.h"
+#include "utils/swap_space.h"
+
+namespace art {
+
+class LinkerPatch;
+class SrcMapElem;
+
+class CompiledMethodStorage {
+ public:
+  explicit CompiledMethodStorage(int swap_fd);
+  ~CompiledMethodStorage();
+
+  void DumpMemoryUsage(std::ostream& os, bool extended) const;
+
+  void SetDedupeEnabled(bool dedupe_enabled) {
+    dedupe_enabled_ = dedupe_enabled;
+  }
+  bool DedupeEnabled() const {
+    return dedupe_enabled_;
+  }
+
+  SwapAllocator<void> GetSwapSpaceAllocator() {
+    return SwapAllocator<void>(swap_space_.get());
+  }
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
+  void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
+
+  const LengthPrefixedArray<SrcMapElem>* DeduplicateSrcMappingTable(
+      const ArrayRef<const SrcMapElem>& src_map);
+  void ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map);
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& table);
+  void ReleaseMappingTable(const LengthPrefixedArray<uint8_t>* table);
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
+  void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateGCMap(const ArrayRef<const uint8_t>& gc_map);
+  void ReleaseGCMap(const LengthPrefixedArray<uint8_t>* gc_map);
+
+  const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
+  void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
+
+  const LengthPrefixedArray<LinkerPatch>* DeduplicateLinkerPatches(
+      const ArrayRef<const LinkerPatch>& linker_patches);
+  void ReleaseLinkerPatches(const LengthPrefixedArray<LinkerPatch>* linker_patches);
+
+ private:
+  template <typename T, typename DedupeSetType>
+  const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data,
+                                                           DedupeSetType* dedupe_set);
+
+  template <typename T>
+  void ReleaseArrayIfNotDeduplicated(const LengthPrefixedArray<T>* array);
+
+  // DeDuplication data structures.
+  template <typename ContentType>
+  class DedupeHashFunc;
+
+  template <typename T>
+  class LengthPrefixedArrayAlloc;
+
+  template <typename T>
+  using ArrayDedupeSet = DedupeSet<ArrayRef<const T>,
+                                   LengthPrefixedArray<T>,
+                                   LengthPrefixedArrayAlloc<T>,
+                                   size_t,
+                                   DedupeHashFunc<const T>,
+                                   4>;
+
+  // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first
+  // as other fields rely on this.
+  std::unique_ptr<SwapSpace> swap_space_;
+
+  bool dedupe_enabled_;
+
+  ArrayDedupeSet<uint8_t> dedupe_code_;
+  ArrayDedupeSet<SrcMapElem> dedupe_src_mapping_table_;
+  ArrayDedupeSet<uint8_t> dedupe_mapping_table_;
+  ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
+  ArrayDedupeSet<uint8_t> dedupe_gc_map_;
+  ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
+  ArrayDedupeSet<LinkerPatch> dedupe_linker_patches_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
new file mode 100644
index 0000000..c6dbd24
--- /dev/null
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "compiled_method_storage.h"
+#include "compiled_method.h"
+#include "compiler_driver.h"
+#include "compiler_options.h"
+#include "dex/verification_results.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+
+namespace art {
+
+TEST(CompiledMethodStorage, Deduplicate) {
+  CompilerOptions compiler_options;
+  VerificationResults verification_results(&compiler_options);
+  DexFileToMethodInlinerMap method_inliner_map;
+  CompilerDriver driver(&compiler_options,
+                        &verification_results,
+                        &method_inliner_map,
+                        Compiler::kOptimizing, kNone,
+                        nullptr,
+                        false,
+                        nullptr,
+                        nullptr,
+                        nullptr,
+                        1u,
+                        false,
+                        false,
+                        "",
+                        false,
+                        nullptr,
+                        -1,
+                        "");
+  CompiledMethodStorage* storage = driver.GetCompiledMethodStorage();
+
+  ASSERT_TRUE(storage->DedupeEnabled());  // The default.
+
+  const uint8_t raw_code1[] = { 1u, 2u, 3u };
+  const uint8_t raw_code2[] = { 4u, 3u, 2u, 1u };
+  ArrayRef<const uint8_t> code[] = {
+      ArrayRef<const uint8_t>(raw_code1),
+      ArrayRef<const uint8_t>(raw_code2),
+  };
+  const SrcMapElem raw_src_map1[] = { { 1u, 2u }, { 3u, 4u }, { 5u, 6u } };
+  const SrcMapElem raw_src_map2[] = { { 8u, 7u }, { 6u, 5u }, { 4u, 3u }, { 2u, 1u } };
+  ArrayRef<const SrcMapElem> src_map[] = {
+      ArrayRef<const SrcMapElem>(raw_src_map1),
+      ArrayRef<const SrcMapElem>(raw_src_map2),
+  };
+  const uint8_t raw_mapping_table1[] = { 5, 6, 7 };
+  const uint8_t raw_mapping_table2[] = { 7, 6, 5, 4 };
+  ArrayRef<const uint8_t> mapping_table[] = {
+      ArrayRef<const uint8_t>(raw_mapping_table1),
+      ArrayRef<const uint8_t>(raw_mapping_table2),
+  };
+  const uint8_t raw_vmap_table1[] = { 2, 4, 6 };
+  const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 };
+  ArrayRef<const uint8_t> vmap_table[] = {
+      ArrayRef<const uint8_t>(raw_vmap_table1),
+      ArrayRef<const uint8_t>(raw_vmap_table2),
+  };
+  const uint8_t raw_gc_map1[] = { 9, 8, 7 };
+  const uint8_t raw_gc_map2[] = { 6, 7, 8, 9 };
+  ArrayRef<const uint8_t> gc_map[] = {
+      ArrayRef<const uint8_t>(raw_gc_map1),
+      ArrayRef<const uint8_t>(raw_gc_map2),
+  };
+  const uint8_t raw_cfi_info1[] = { 1, 3, 5 };
+  const uint8_t raw_cfi_info2[] = { 8, 6, 4, 2 };
+  ArrayRef<const uint8_t> cfi_info[] = {
+      ArrayRef<const uint8_t>(raw_cfi_info1),
+      ArrayRef<const uint8_t>(raw_cfi_info2),
+  };
+  const LinkerPatch raw_patches1[] = {
+      LinkerPatch::CodePatch(0u, nullptr, 1u),
+      LinkerPatch::MethodPatch(4u, nullptr, 1u),
+  };
+  const LinkerPatch raw_patches2[] = {
+      LinkerPatch::CodePatch(0u, nullptr, 1u),
+      LinkerPatch::MethodPatch(4u, nullptr, 2u),
+  };
+  ArrayRef<const LinkerPatch> patches[] = {
+      ArrayRef<const LinkerPatch>(raw_patches1),
+      ArrayRef<const LinkerPatch>(raw_patches2),
+  };
+
+  std::vector<CompiledMethod*> compiled_methods;
+  compiled_methods.reserve(1u << 7);
+  for (auto&& c : code) {
+    for (auto&& s : src_map) {
+      for (auto&& m : mapping_table) {
+        for (auto&& v : vmap_table) {
+          for (auto&& g : gc_map) {
+            for (auto&& f : cfi_info) {
+              for (auto&& p : patches) {
+                compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod(
+                        &driver, kNone, c, 0u, 0u, 0u, s, m, v, g, f, p));
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  constexpr size_t code_bit = 1u << 6;
+  constexpr size_t src_map_bit = 1u << 5;
+  constexpr size_t mapping_table_bit = 1u << 4;
+  constexpr size_t vmap_table_bit = 1u << 3;
+  constexpr size_t gc_map_bit = 1u << 2;
+  constexpr size_t cfi_info_bit = 1u << 1;
+  constexpr size_t patches_bit = 1u << 0;
+  CHECK_EQ(compiled_methods.size(), 1u << 7);
+  for (size_t i = 0; i != compiled_methods.size(); ++i) {
+    for (size_t j = 0; j != compiled_methods.size(); ++j) {
+      CompiledMethod* lhs = compiled_methods[i];
+      CompiledMethod* rhs = compiled_methods[j];
+      bool same_code = ((i ^ j) & code_bit) == 0u;
+      bool same_src_map = ((i ^ j) & src_map_bit) == 0u;
+      bool same_mapping_table = ((i ^ j) & mapping_table_bit) == 0u;
+      bool same_vmap_table = ((i ^ j) & vmap_table_bit) == 0u;
+      bool same_gc_map = ((i ^ j) & gc_map_bit) == 0u;
+      bool same_cfi_info = ((i ^ j) & cfi_info_bit) == 0u;
+      bool same_patches = ((i ^ j) & patches_bit) == 0u;
+      ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data())
+          << i << " " << j;
+      ASSERT_EQ(same_src_map, lhs->GetSrcMappingTable().data() == rhs->GetSrcMappingTable().data())
+          << i << " " << j;
+      ASSERT_EQ(same_mapping_table, lhs->GetMappingTable().data() == rhs->GetMappingTable().data())
+          << i << " " << j;
+      ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data())
+          << i << " " << j;
+      ASSERT_EQ(same_gc_map, lhs->GetGcMap().data() == rhs->GetGcMap().data())
+          << i << " " << j;
+      ASSERT_EQ(same_cfi_info, lhs->GetCFIInfo().data() == rhs->GetCFIInfo().data())
+          << i << " " << j;
+      ASSERT_EQ(same_patches, lhs->GetPatches().data() == rhs->GetPatches().data())
+          << i << " " << j;
+    }
+  }
+  for (CompiledMethod* method : compiled_methods) {
+    CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&driver, method);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index e535afd..14ba81d 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -187,15 +187,11 @@
         // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
         std::string temp;
-        const DexFile::StringId* string_id =
-            dex_file->FindStringId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
-        if (string_id != nullptr) {
-          const DexFile::TypeId* type_id =
-             dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
-          if (type_id != nullptr) {
-            // medium path, needs check of static storage base being initialized
-            storage_idx = dex_file->GetIndexForTypeId(*type_id);
-          }
+        const DexFile::TypeId* type_id =
+           dex_file->FindTypeId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
+        if (type_id != nullptr) {
+          // medium path, needs check of static storage base being initialized
+          storage_idx = dex_file->GetIndexForTypeId(*type_id);
         }
       }
       if (storage_idx != DexFile::kDexNoIndex) {
@@ -370,7 +366,9 @@
           nullptr, kVirtual);
     } else {
       StackHandleScope<1> hs(soa.Self());
-      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(*devirt_target->dex_file)));
+      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
+          *devirt_target->dex_file,
+          class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
       called_method = class_linker->ResolveMethod(
           *devirt_target->dex_file, devirt_target->dex_method_index, target_dex_cache,
           class_loader, nullptr, kVirtual);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f74b079..d055b37 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -348,9 +348,8 @@
                                const std::string& dump_cfg_file_name, bool dump_cfg_append,
                                CumulativeLogger* timer, int swap_fd,
                                const std::string& profile_file)
-    : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
-      swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())),
-      profile_present_(false), compiler_options_(compiler_options),
+    : profile_present_(false),
+      compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -369,7 +368,6 @@
       had_hard_verifier_failure_(false),
       thread_count_(thread_count),
       stats_(new AOTCompilationStats),
-      dedupe_enabled_(true),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
       dump_cfg_file_name_(dump_cfg_file_name),
@@ -377,12 +375,8 @@
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
-      dedupe_code_("dedupe code", *swap_space_allocator_),
-      dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_),
-      dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_),
-      dedupe_vmap_table_("dedupe vmap table", *swap_space_allocator_),
-      dedupe_gc_map_("dedupe gc map", *swap_space_allocator_),
-      dedupe_cfi_info_("dedupe cfi info", *swap_space_allocator_) {
+      dex_files_for_oat_file_(nullptr),
+      compiled_method_storage_(swap_fd) {
   DCHECK(compiler_options_ != nullptr);
   DCHECK(verification_results_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
@@ -402,36 +396,6 @@
   }
 }
 
-SwapVector<uint8_t>* CompilerDriver::DeduplicateCode(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_code_.Add(Thread::Current(), code);
-}
-
-SwapSrcMap* CompilerDriver::DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_src_mapping_table_.Add(Thread::Current(), src_map);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateMappingTable(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_mapping_table_.Add(Thread::Current(), code);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateVMapTable(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_vmap_table_.Add(Thread::Current(), code);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateGCMap(const ArrayRef<const uint8_t>& code) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_gc_map_.Add(Thread::Current(), code);
-}
-
-SwapVector<uint8_t>* CompilerDriver::DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info) {
-  DCHECK(dedupe_enabled_);
-  return dedupe_cfi_info_.Add(Thread::Current(), cfi_info);
-}
-
 CompilerDriver::~CompilerDriver() {
   Thread* self = Thread::Current();
   {
@@ -447,6 +411,7 @@
   compiler_->UnInit();
 }
 
+
 #define CREATE_TRAMPOLINE(type, abi, offset) \
     if (Is64BitInstructionSet(instruction_set_)) { \
       return CreateTrampoline64(instruction_set_, abi, \
@@ -456,14 +421,6 @@
                                 type ## _ENTRYPOINT_OFFSET(4, offset)); \
     }
 
-const std::vector<uint8_t>* CompilerDriver::CreateInterpreterToInterpreterBridge() const {
-  CREATE_TRAMPOLINE(INTERPRETER, kInterpreterAbi, pInterpreterToInterpreterBridge)
-}
-
-const std::vector<uint8_t>* CompilerDriver::CreateInterpreterToCompiledCodeBridge() const {
-  CREATE_TRAMPOLINE(INTERPRETER, kInterpreterAbi, pInterpreterToCompiledCodeBridge)
-}
-
 const std::vector<uint8_t>* CompilerDriver::CreateJniDlsymLookup() const {
   CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup)
 }
@@ -602,7 +559,7 @@
     }
   } else if ((access_flags & kAccAbstract) != 0) {
     // Abstract methods don't have code.
-  } else {
+  } else if (Runtime::Current()->IsAotCompiler()) {
     const VerifiedMethod* verified_method =
         driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
     bool compile = compilation_enabled &&
@@ -641,6 +598,13 @@
               ? dex_to_dex_compilation_level
               : optimizer::DexToDexCompilationLevel::kRequired);
     }
+  } else {
+    // This is for the JIT compiler, which has already ensured the class is verified.
+    // We can go straight to compiling.
+    DCHECK(Runtime::Current()->UseJit());
+    compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
+                                                     class_def_idx, method_idx, class_loader,
+                                                     dex_file, dex_cache);
   }
   if (kTimeCompileMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
@@ -733,6 +697,9 @@
 }
 
 CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) {
+  DCHECK_EQ(method,
+            method->GetInterfaceMethodIfProxy(
+                Runtime::Current()->GetClassLinker()->GetImagePointerSize()));
   const uint32_t method_idx = method->GetDexMethodIndex();
   const uint32_t access_flags = method->GetAccessFlags();
   const InvokeType invoke_type = method->GetInvokeType();
@@ -961,7 +928,9 @@
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
       StackHandleScope<2> hs2(self);
-      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file)));
+      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(
+          *dex_file,
+          Runtime::Current()->GetLinearAlloc())));
       Handle<mirror::Class> klass(hs2.NewHandle(
           class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
                                     NullHandle<mirror::ClassLoader>())));
@@ -1406,8 +1375,7 @@
 }
 
 DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
-  // Currently only image dex caches have fixed array layout.
-  return IsImage() && GetSupportBootImageFixup()
+  return ContainsElement(GetDexFilesForOatFile(), dex_file)
       ? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file)
       : DexCacheArraysLayout();
 }
@@ -2018,9 +1986,11 @@
     ClassLinker* class_linker = manager_->GetClassLinker();
     const DexFile& dex_file = *manager_->GetDexFile();
     StackHandleScope<2> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile(dex_file)));
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager_->GetClassLoader())));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
+        dex_file,
+        class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
     mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
 
     if (klass == nullptr) {
@@ -2639,16 +2609,7 @@
   oss << " native alloc=" << PrettySize(allocated_space) << " free="
       << PrettySize(free_space);
 #endif
-  if (swap_space_.get() != nullptr) {
-    oss << " swap=" << PrettySize(swap_space_->GetSize());
-  }
-  if (extended) {
-    oss << "\nCode dedupe: " << dedupe_code_.DumpStats();
-    oss << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats();
-    oss << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats();
-    oss << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats();
-    oss << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats();
-  }
+  compiled_method_storage_.DumpMemoryUsage(oss, extended);
   return oss.str();
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 11e782f..4ed4dc6 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -30,6 +30,7 @@
 #include "class_reference.h"
 #include "compiler.h"
 #include "dex_file.h"
+#include "driver/compiled_method_storage.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
@@ -39,9 +40,7 @@
 #include "safe_map.h"
 #include "thread_pool.h"
 #include "utils/array_ref.h"
-#include "utils/dedupe_set.h"
 #include "utils/dex_cache_arrays_layout.h"
-#include "utils/swap_space.h"
 
 namespace art {
 
@@ -80,8 +79,6 @@
   kQuickAbi
 };
 
-static constexpr bool kUseMurmur3Hash = true;
-
 class CompilerDriver {
  public:
   // Create a compiler targeting the requested "instruction_set".
@@ -105,7 +102,20 @@
 
   ~CompilerDriver();
 
-  void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+  // Set dex files that will be stored in the oat file after being compiled.
+  void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
+    dex_files_for_oat_file_ = &dex_files;
+  }
+
+  // Get dex file that will be stored in the oat file after being compiled.
+  ArrayRef<const DexFile* const> GetDexFilesForOatFile() const {
+    return (dex_files_for_oat_file_ != nullptr)
+        ? ArrayRef<const DexFile* const>(*dex_files_for_oat_file_)
+        : ArrayRef<const DexFile* const>();
+  }
+
+  void CompileAll(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
@@ -155,10 +165,6 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateInterpreterToInterpreterBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateInterpreterToCompiledCodeBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
   const std::vector<uint8_t>* CreateJniDlsymLookup() const
       SHARED_REQUIRES(Locks::mutator_lock_);
   const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const
@@ -392,10 +398,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  SwapAllocator<void>& GetSwapSpaceAllocator() {
-    return *swap_space_allocator_.get();
-  }
-
   bool WriteElf(const std::string& android_root,
                 bool is_host,
                 const std::vector<const DexFile*>& dex_files,
@@ -435,10 +437,10 @@
   }
 
   void SetDedupeEnabled(bool dedupe_enabled) {
-    dedupe_enabled_ = dedupe_enabled;
+    compiled_method_storage_.SetDedupeEnabled(dedupe_enabled);
   }
   bool DedupeEnabled() const {
-    return dedupe_enabled_;
+    return compiled_method_storage_.DedupeEnabled();
   }
 
   // Checks if class specified by type_idx is one of the image_classes_
@@ -459,13 +461,6 @@
                                        uint16_t class_def_idx,
                                        const DexFile& dex_file) const;
 
-  SwapVector<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
-  SwapSrcMap* DeduplicateSrcMappingTable(const ArrayRef<SrcMapElem>& src_map);
-  SwapVector<uint8_t>* DeduplicateMappingTable(const ArrayRef<const uint8_t>& code);
-  SwapVector<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& code);
-  SwapVector<uint8_t>* DeduplicateGCMap(const ArrayRef<const uint8_t>& code);
-  SwapVector<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
-
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
 
@@ -483,6 +478,10 @@
     return compiler_kind_;
   }
 
+  CompiledMethodStorage* GetCompiledMethodStorage() {
+    return &compiled_method_storage_;
+  }
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -603,11 +602,6 @@
                       ThreadPool* thread_pool, TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first
-  // as other fields rely on this.
-  std::unique_ptr<SwapSpace> swap_space_;
-  std::unique_ptr<SwapAllocator<void> > swap_space_allocator_;
-
   ProfileFile profile_file_;
   bool profile_present_;
 
@@ -667,7 +661,6 @@
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
 
-  bool dedupe_enabled_;
   bool dump_stats_;
   const bool dump_passes_;
   const std::string dump_cfg_file_name_;
@@ -682,93 +675,10 @@
 
   bool support_boot_image_fixup_;
 
-  // DeDuplication data structures, these own the corresponding byte arrays.
-  template <typename ContentType>
-  class DedupeHashFunc {
-   public:
-    size_t operator()(const ArrayRef<ContentType>& array) const {
-      const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data());
-      static_assert(IsPowerOfTwo(sizeof(ContentType)),
-          "ContentType is not power of two, don't know whether array layout is as assumed");
-      uint32_t len = sizeof(ContentType) * array.size();
-      if (kUseMurmur3Hash) {
-        static constexpr uint32_t c1 = 0xcc9e2d51;
-        static constexpr uint32_t c2 = 0x1b873593;
-        static constexpr uint32_t r1 = 15;
-        static constexpr uint32_t r2 = 13;
-        static constexpr uint32_t m = 5;
-        static constexpr uint32_t n = 0xe6546b64;
+  // List of dex files that will be stored in the oat file.
+  const std::vector<const DexFile*>* dex_files_for_oat_file_;
 
-        uint32_t hash = 0;
-
-        const int nblocks = len / 4;
-        typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
-        const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data);
-        int i;
-        for (i = 0; i < nblocks; i++) {
-          uint32_t k = blocks[i];
-          k *= c1;
-          k = (k << r1) | (k >> (32 - r1));
-          k *= c2;
-
-          hash ^= k;
-          hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
-        }
-
-        const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4);
-        uint32_t k1 = 0;
-
-        switch (len & 3) {
-          case 3:
-            k1 ^= tail[2] << 16;
-            FALLTHROUGH_INTENDED;
-          case 2:
-            k1 ^= tail[1] << 8;
-            FALLTHROUGH_INTENDED;
-          case 1:
-            k1 ^= tail[0];
-
-            k1 *= c1;
-            k1 = (k1 << r1) | (k1 >> (32 - r1));
-            k1 *= c2;
-            hash ^= k1;
-        }
-
-        hash ^= len;
-        hash ^= (hash >> 16);
-        hash *= 0x85ebca6b;
-        hash ^= (hash >> 13);
-        hash *= 0xc2b2ae35;
-        hash ^= (hash >> 16);
-
-        return hash;
-      } else {
-        size_t hash = 0x811c9dc5;
-        for (uint32_t i = 0; i < len; ++i) {
-          hash = (hash * 16777619) ^ data[i];
-        }
-        hash += hash << 13;
-        hash ^= hash >> 7;
-        hash += hash << 3;
-        hash ^= hash >> 17;
-        hash += hash << 5;
-        return hash;
-      }
-    }
-  };
-
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_code_;
-  DedupeSet<ArrayRef<SrcMapElem>,
-            SwapSrcMap, size_t, DedupeHashFunc<SrcMapElem>, 4> dedupe_src_mapping_table_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_mapping_table_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_vmap_table_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_gc_map_;
-  DedupeSet<ArrayRef<const uint8_t>,
-            SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_cfi_info_;
+  CompiledMethodStorage compiled_method_storage_;
 
   friend class CompileClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 3f5a1ea..a24c8a3 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -16,6 +16,8 @@
 
 #include "compiler_options.h"
 
+#include <fstream>
+
 #include "dex/pass_manager.h"
 
 namespace art {
@@ -27,8 +29,8 @@
       small_method_threshold_(kDefaultSmallMethodThreshold),
       tiny_method_threshold_(kDefaultTinyMethodThreshold),
       num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
-      inline_depth_limit_(kDefaultInlineDepthLimit),
-      inline_max_code_units_(kDefaultInlineMaxCodeUnits),
+      inline_depth_limit_(kUnsetInlineDepthLimit),
+      inline_max_code_units_(kUnsetInlineMaxCodeUnits),
       include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
@@ -38,7 +40,7 @@
       implicit_suspend_checks_(false),
       compile_pic_(false),
       verbose_methods_(nullptr),
-      pass_manager_options_(new PassManagerOptions),
+      pass_manager_options_(),
       abort_on_hard_verifier_failure_(false),
       init_failure_output_(nullptr) {
 }
@@ -65,7 +67,6 @@
                                  bool implicit_suspend_checks,
                                  bool compile_pic,
                                  const std::vector<std::string>* verbose_methods,
-                                 PassManagerOptions* pass_manager_options,
                                  std::ostream* init_failure_output,
                                  bool abort_on_hard_verifier_failure
                                  ) :  // NOLINT(whitespace/parens)
@@ -86,9 +87,155 @@
     implicit_suspend_checks_(implicit_suspend_checks),
     compile_pic_(compile_pic),
     verbose_methods_(verbose_methods),
-    pass_manager_options_(pass_manager_options),
+    pass_manager_options_(),
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
     init_failure_output_(init_failure_output) {
 }
 
+void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--huge-method-max", &huge_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseLargeMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--large-method-max", &large_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseSmallMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--small-method-max", &small_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseTinyMethodMax(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--tiny-method-max", &tiny_method_threshold_, Usage);
+}
+
+void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage);
+}
+
+void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage);
+}
+
+void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
+  ParseUintOption(option, "--inline-max-code-units=", &inline_max_code_units_, Usage);
+}
+
+void CompilerOptions::ParseDisablePasses(const StringPiece& option,
+                                         UsageFn Usage ATTRIBUTE_UNUSED) {
+  DCHECK(option.starts_with("--disable-passes="));
+  const std::string disable_passes = option.substr(strlen("--disable-passes=")).data();
+  pass_manager_options_.SetDisablePassList(disable_passes);
+}
+
+void CompilerOptions::ParsePrintPasses(const StringPiece& option,
+                                       UsageFn Usage ATTRIBUTE_UNUSED) {
+  DCHECK(option.starts_with("--print-passes="));
+  const std::string print_passes = option.substr(strlen("--print-passes=")).data();
+  pass_manager_options_.SetPrintPassList(print_passes);
+}
+
+void CompilerOptions::ParseDumpCfgPasses(const StringPiece& option,
+                                         UsageFn Usage ATTRIBUTE_UNUSED) {
+  DCHECK(option.starts_with("--dump-cfg-passes="));
+  const std::string dump_passes_string = option.substr(strlen("--dump-cfg-passes=")).data();
+  pass_manager_options_.SetDumpPassList(dump_passes_string);
+}
+
+void CompilerOptions::ParsePassOptions(const StringPiece& option,
+                                       UsageFn Usage ATTRIBUTE_UNUSED) {
+  DCHECK(option.starts_with("--pass-options="));
+  const std::string pass_options = option.substr(strlen("--pass-options=")).data();
+  pass_manager_options_.SetOverriddenPassOptions(pass_options);
+}
+
+void CompilerOptions::ParseDumpInitFailures(const StringPiece& option,
+                                            UsageFn Usage ATTRIBUTE_UNUSED) {
+  DCHECK(option.starts_with("--dump-init-failures="));
+  std::string file_name = option.substr(strlen("--dump-init-failures=")).data();
+  init_failure_output_.reset(new std::ofstream(file_name));
+  if (init_failure_output_.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate ofstream";
+  } else if (init_failure_output_->fail()) {
+    LOG(ERROR) << "Failed to open " << file_name << " for writing the initialization "
+               << "failures.";
+    init_failure_output_.reset();
+  }
+}
+
+bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usage) {
+  if (option.starts_with("--compiler-filter=")) {
+    const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
+    if (strcmp(compiler_filter_string, "verify-none") == 0) {
+      compiler_filter_ = CompilerOptions::kVerifyNone;
+    } else if (strcmp(compiler_filter_string, "interpret-only") == 0) {
+      compiler_filter_ = CompilerOptions::kInterpretOnly;
+    } else if (strcmp(compiler_filter_string, "verify-at-runtime") == 0) {
+      compiler_filter_ = CompilerOptions::kVerifyAtRuntime;
+    } else if (strcmp(compiler_filter_string, "space") == 0) {
+      compiler_filter_ = CompilerOptions::kSpace;
+    } else if (strcmp(compiler_filter_string, "balanced") == 0) {
+      compiler_filter_ = CompilerOptions::kBalanced;
+    } else if (strcmp(compiler_filter_string, "speed") == 0) {
+      compiler_filter_ = CompilerOptions::kSpeed;
+    } else if (strcmp(compiler_filter_string, "everything") == 0) {
+      compiler_filter_ = CompilerOptions::kEverything;
+    } else if (strcmp(compiler_filter_string, "time") == 0) {
+      compiler_filter_ = CompilerOptions::kTime;
+    } else {
+      Usage("Unknown --compiler-filter value %s", compiler_filter_string);
+    }
+  } else if (option == "--compile-pic") {
+    compile_pic_ = true;
+  } else if (option.starts_with("--huge-method-max=")) {
+    ParseHugeMethodMax(option, Usage);
+  } else if (option.starts_with("--large-method-max=")) {
+    ParseLargeMethodMax(option, Usage);
+  } else if (option.starts_with("--small-method-max=")) {
+    ParseSmallMethodMax(option, Usage);
+  } else if (option.starts_with("--tiny-method-max=")) {
+    ParseTinyMethodMax(option, Usage);
+  } else if (option.starts_with("--num-dex-methods=")) {
+    ParseNumDexMethods(option, Usage);
+  } else if (option.starts_with("--inline-depth-limit=")) {
+    ParseInlineDepthLimit(option, Usage);
+  } else if (option.starts_with("--inline-max-code-units=")) {
+    ParseInlineMaxCodeUnits(option, Usage);
+  } else if (option == "--generate-debug-info" || option == "-g") {
+    generate_debug_info_ = true;
+  } else if (option == "--no-generate-debug-info") {
+    generate_debug_info_ = false;
+  } else if (option == "--debuggable") {
+    debuggable_ = true;
+    generate_debug_info_ = true;
+  } else if (option.starts_with("--top-k-profile-threshold=")) {
+    ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
+  } else if (option == "--include-patch-information") {
+    include_patch_information_ = true;
+  } else if (option == "--no-include-patch-information") {
+    include_patch_information_ = false;
+  } else if (option == "--abort-on-hard-verifier-error") {
+    abort_on_hard_verifier_failure_ = true;
+  } else if (option == "--print-pass-names") {
+    pass_manager_options_.SetPrintPassNames(true);
+  } else if (option.starts_with("--disable-passes=")) {
+    ParseDisablePasses(option, Usage);
+  } else if (option.starts_with("--print-passes=")) {
+    ParsePrintPasses(option, Usage);
+  } else if (option == "--print-all-passes") {
+    pass_manager_options_.SetPrintAllPasses();
+  } else if (option.starts_with("--dump-cfg-passes=")) {
+    ParseDumpCfgPasses(option, Usage);
+  } else if (option == "--print-pass-options") {
+    pass_manager_options_.SetPrintPassOptions(true);
+  } else if (option.starts_with("--pass-options=")) {
+    ParsePassOptions(option, Usage);
+  } else if (option.starts_with("--dump-init-failures=")) {
+    ParseDumpInitFailures(option, Usage);
+  } else {
+    // Option not recognized.
+    return false;
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 18f215d..e6acab4 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -22,12 +22,12 @@
 #include <vector>
 
 #include "base/macros.h"
+#include "dex/pass_manager.h"
 #include "globals.h"
+#include "utils.h"
 
 namespace art {
 
-class PassManagerOptions;
-
 class CompilerOptions FINAL {
  public:
   enum CompilerFilter {
@@ -53,6 +53,8 @@
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 20;
+  static constexpr size_t kUnsetInlineDepthLimit = -1;
+  static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
   // Default inlining settings when the space filter is used.
   static constexpr size_t kSpaceFilterInlineDepthLimit = 3;
@@ -78,7 +80,6 @@
                   bool implicit_suspend_checks,
                   bool compile_pic,
                   const std::vector<std::string>* verbose_methods,
-                  PassManagerOptions* pass_manager_options,
                   std::ostream* init_failure_output,
                   bool abort_on_hard_verifier_failure);
 
@@ -200,47 +201,64 @@
   }
 
   std::ostream* GetInitFailureOutput() const {
-    return init_failure_output_;
+    return init_failure_output_.get();
   }
 
   const PassManagerOptions* GetPassManagerOptions() const {
-    return pass_manager_options_.get();
+    return &pass_manager_options_;
   }
 
   bool AbortOnHardVerifierFailure() const {
     return abort_on_hard_verifier_failure_;
   }
 
+  bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
+
  private:
+  void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
+  void ParsePassOptions(const StringPiece& option, UsageFn Usage);
+  void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
+  void ParsePrintPasses(const StringPiece& option, UsageFn Usage);
+  void ParseDisablePasses(const StringPiece& option, UsageFn Usage);
+  void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage);
+  void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage);
+  void ParseNumDexMethods(const StringPiece& option, UsageFn Usage);
+  void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseLargeMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseHugeMethodMax(const StringPiece& option, UsageFn Usage);
+
   CompilerFilter compiler_filter_;
-  const size_t huge_method_threshold_;
-  const size_t large_method_threshold_;
-  const size_t small_method_threshold_;
-  const size_t tiny_method_threshold_;
-  const size_t num_dex_methods_threshold_;
-  const size_t inline_depth_limit_;
-  const size_t inline_max_code_units_;
-  const bool include_patch_information_;
+  size_t huge_method_threshold_;
+  size_t large_method_threshold_;
+  size_t small_method_threshold_;
+  size_t tiny_method_threshold_;
+  size_t num_dex_methods_threshold_;
+  size_t inline_depth_limit_;
+  size_t inline_max_code_units_;
+  bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
-  const double top_k_profile_threshold_;
-  const bool debuggable_;
-  const bool generate_debug_info_;
-  const bool implicit_null_checks_;
-  const bool implicit_so_checks_;
-  const bool implicit_suspend_checks_;
-  const bool compile_pic_;
+  double top_k_profile_threshold_;
+  bool debuggable_;
+  bool generate_debug_info_;
+  bool implicit_null_checks_;
+  bool implicit_so_checks_;
+  bool implicit_suspend_checks_;
+  bool compile_pic_;
 
   // Vector of methods to have verbose output enabled for.
-  const std::vector<std::string>* const verbose_methods_;
+  const std::vector<std::string>* verbose_methods_;
 
-  std::unique_ptr<PassManagerOptions> pass_manager_options_;
+  PassManagerOptions pass_manager_options_;
 
   // Abort compilation with an error if we find a class that fails verification with a hard
   // failure.
-  const bool abort_on_hard_verifier_failure_;
+  bool abort_on_hard_verifier_failure_;
 
   // Log initialization of initialization failures to this stream if not null.
-  std::ostream* const init_failure_output_;
+  std::unique_ptr<std::ostream> init_failure_output_;
+
+  friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
 };
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
index d8077d5..60241f7 100644
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -31,8 +31,10 @@
 //  * Choose the most compact encoding of a given opcode.
 //  * Keep track of current state and convert absolute values to deltas.
 //  * Divide by header-defined factors as appropriate.
-template<typename Allocator = std::allocator<uint8_t> >
-class DebugFrameOpCodeWriter : private Writer<Allocator> {
+template<typename Vector = std::vector<uint8_t> >
+class DebugFrameOpCodeWriter : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   // To save space, DWARF divides most offsets by header-defined factors.
   // They are used in integer divisions, so we make them constants.
@@ -288,11 +290,12 @@
 
   void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
 
-  using Writer<Allocator>::data;
+  using Writer<Vector>::data;
 
   DebugFrameOpCodeWriter(bool enabled = true,
-                         const Allocator& alloc = Allocator())
-      : Writer<Allocator>(&opcodes_),
+                         const typename Vector::allocator_type& alloc =
+                             typename Vector::allocator_type())
+      : Writer<Vector>(&opcodes_),
         enabled_(enabled),
         opcodes_(alloc),
         current_cfa_offset_(0),
@@ -318,7 +321,7 @@
   }
 
   bool enabled_;  // If disabled all writes are no-ops.
-  std::vector<uint8_t, Allocator> opcodes_;
+  Vector opcodes_;
   int current_cfa_offset_;
   int current_pc_;
   bool uses_dwarf3_features_;
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index f5b9ca5..d9b367b 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -29,9 +29,11 @@
 
 // 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
 // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-template< typename Allocator >
+template <typename Vector>
 struct FNVHash {
-  size_t operator()(const std::vector<uint8_t, Allocator>& v) const {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+  size_t operator()(const Vector& v) const {
     uint32_t hash = 2166136261u;
     for (size_t i = 0; i < v.size(); i++) {
       hash = (hash ^ v[i]) * 16777619u;
@@ -52,8 +54,10 @@
  *     EndTag();
  *   EndTag();
  */
-template< typename Allocator = std::allocator<uint8_t> >
-class DebugInfoEntryWriter FINAL : private Writer<Allocator> {
+template <typename Vector = std::vector<uint8_t>>
+class DebugInfoEntryWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   // Start debugging information entry.
   void StartTag(Tag tag, Children children) {
@@ -176,12 +180,13 @@
     return patch_locations_;
   }
 
-  using Writer<Allocator>::data;
+  using Writer<Vector>::data;
 
   DebugInfoEntryWriter(bool is64bitArch,
-                       std::vector<uint8_t, Allocator>* debug_abbrev,
-                       const Allocator& alloc = Allocator())
-      : Writer<Allocator>(&entries_),
+                       Vector* debug_abbrev,
+                       const typename Vector::allocator_type& alloc =
+                           typename Vector::allocator_type())
+      : Writer<Vector>(&entries_),
         debug_abbrev_(debug_abbrev),
         current_abbrev_(alloc),
         abbrev_codes_(alloc),
@@ -221,7 +226,7 @@
                                                   NextAbbrevCode()));
     int abbrev_code = it.first->second;
     if (UNLIKELY(it.second)) {  // Inserted new entry.
-      const std::vector<uint8_t, Allocator>& abbrev = it.first->first;
+      const Vector& abbrev = it.first->first;
       debug_abbrev_.Pop();  // Remove abbrev table terminator.
       debug_abbrev_.PushUleb128(abbrev_code);
       debug_abbrev_.PushData(abbrev.data(), abbrev.size());
@@ -234,13 +239,13 @@
 
  private:
   // Fields for writing and deduplication of abbrevs.
-  Writer<Allocator> debug_abbrev_;
-  std::vector<uint8_t, Allocator> current_abbrev_;
-  std::unordered_map<std::vector<uint8_t, Allocator>, int,
-                     FNVHash<Allocator> > abbrev_codes_;
+  Writer<Vector> debug_abbrev_;
+  Vector current_abbrev_;
+  std::unordered_map<Vector, int,
+                     FNVHash<Vector> > abbrev_codes_;
 
   // Fields for writing of debugging information entries.
-  std::vector<uint8_t, Allocator> entries_;
+  Vector entries_;
   bool is64bit_;
   int depth_ = 0;
   size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
diff --git a/compiler/dwarf/debug_line_opcode_writer.h b/compiler/dwarf/debug_line_opcode_writer.h
index bdc25e4..201f0b4 100644
--- a/compiler/dwarf/debug_line_opcode_writer.h
+++ b/compiler/dwarf/debug_line_opcode_writer.h
@@ -30,8 +30,10 @@
 //  * Choose the most compact encoding of a given opcode.
 //  * Keep track of current state and convert absolute values to deltas.
 //  * Divide by header-defined factors as appropriate.
-template<typename Allocator = std::allocator<uint8_t>>
-class DebugLineOpCodeWriter FINAL : private Writer<Allocator> {
+template<typename Vector = std::vector<uint8_t>>
+class DebugLineOpCodeWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   static constexpr int kOpcodeBase = 13;
   static constexpr bool kDefaultIsStmt = true;
@@ -212,12 +214,13 @@
     return patch_locations_;
   }
 
-  using Writer<Allocator>::data;
+  using Writer<Vector>::data;
 
   DebugLineOpCodeWriter(bool use64bitAddress,
                         int codeFactorBits,
-                        const Allocator& alloc = Allocator())
-      : Writer<Allocator>(&opcodes_),
+                        const typename Vector::allocator_type& alloc =
+                            typename Vector::allocator_type())
+      : Writer<Vector>(&opcodes_),
         opcodes_(alloc),
         uses_dwarf3_features_(false),
         use_64bit_address_(use64bitAddress),
@@ -234,7 +237,7 @@
     return offset >> code_factor_bits_;
   }
 
-  std::vector<uint8_t, Allocator> opcodes_;
+  Vector opcodes_;
   bool uses_dwarf3_features_;
   bool use_64bit_address_;
   int code_factor_bits_;
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index a07d27c..3ba380e 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -126,7 +126,7 @@
                      initial_opcodes, kCFIFormat, &debug_frame_data_);
   std::vector<uintptr_t> debug_frame_patches;
   std::vector<uintptr_t> expected_patches { 28 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(),
+  WriteDebugFrameFDE(is64bit, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()),
                      kCFIFormat, &debug_frame_data_, &debug_frame_patches);
 
   EXPECT_EQ(expected_patches, debug_frame_patches);
@@ -142,7 +142,8 @@
   std::vector<uintptr_t> debug_frame_patches;
   std::vector<uintptr_t> expected_patches { 32 };  // NOLINT
   WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     opcodes.data(), kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+                     ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
   DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
 
   EXPECT_EQ(expected_patches, debug_frame_patches);
@@ -179,7 +180,8 @@
                      initial_opcodes, kCFIFormat, &debug_frame_data_);
   std::vector<uintptr_t> debug_frame_patches;
   WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     opcodes.data(), kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+                     ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
 
   CheckObjdumpOutput(is64bit, "-W");
 }
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index ae57755..f3fba4b 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -25,6 +25,7 @@
 #include "dwarf/dwarf_constants.h"
 #include "dwarf/register.h"
 #include "dwarf/writer.h"
+#include "utils/array_ref.h"
 
 namespace art {
 namespace dwarf {
@@ -36,21 +37,23 @@
 // In particular, it is not related to machine architecture.
 
 // Write common information entry (CIE) to .debug_frame or .eh_frame section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugFrameCIE(bool is64bit,
                         ExceptionHeaderValueApplication address_type,
                         Reg return_address_register,
-                        const DebugFrameOpCodeWriter<Allocator>& opcodes,
+                        const DebugFrameOpCodeWriter<Vector>& opcodes,
                         CFIFormat format,
                         std::vector<uint8_t>* debug_frame) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_frame);
   size_t cie_header_start_ = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
   writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF);  // CIE id.
   writer.PushUint8(1);   // Version.
   writer.PushString("zR");
-  writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
-  writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor);
+  writer.PushUleb128(DebugFrameOpCodeWriter<Vector>::kCodeAlignmentFactor);
+  writer.PushSleb128(DebugFrameOpCodeWriter<Vector>::kDataAlignmentFactor);
   writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
   writer.PushUleb128(1);  // z: Augmentation data size.
   if (is64bit) {
@@ -68,16 +71,16 @@
       writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
     }
   }
-  writer.PushData(opcodes.data());
+  writer.PushData(*opcodes.data());
   writer.Pad(is64bit ? 8 : 4);
   writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
 }
 
 // Write frame description entry (FDE) to .debug_frame or .eh_frame section.
-template<typename Allocator>
+inline
 void WriteDebugFrameFDE(bool is64bit, size_t cie_offset,
                         uint64_t initial_address, uint64_t address_range,
-                        const std::vector<uint8_t, Allocator>* opcodes,
+                        const ArrayRef<const uint8_t>& opcodes,
                         CFIFormat format,
                         std::vector<uint8_t>* debug_frame,
                         std::vector<uintptr_t>* debug_frame_patches) {
@@ -107,11 +110,13 @@
 }
 
 // Write compilation unit (CU) to .debug_info section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
-                      const DebugInfoEntryWriter<Allocator>& entries,
+                      const DebugInfoEntryWriter<Vector>& entries,
                       std::vector<uint8_t>* debug_info,
                       std::vector<uintptr_t>* debug_info_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_info);
   size_t start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
@@ -119,7 +124,7 @@
   writer.PushUint32(debug_abbrev_offset);
   writer.PushUint8(entries.Is64bit() ? 8 : 4);
   size_t entries_offset = writer.data()->size();
-  writer.PushData(entries.data());
+  writer.PushData(*entries.data());
   writer.UpdateUint32(start, writer.data()->size() - start - 4);
   // Copy patch locations and make them relative to .debug_info section.
   for (uintptr_t patch_location : entries.GetPatchLocations()) {
@@ -135,12 +140,14 @@
 };
 
 // Write line table to .debug_line section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugLineTable(const std::vector<std::string>& include_directories,
                          const std::vector<FileEntry>& files,
-                         const DebugLineOpCodeWriter<Allocator>& opcodes,
+                         const DebugLineOpCodeWriter<Vector>& opcodes,
                          std::vector<uint8_t>* debug_line,
                          std::vector<uintptr_t>* debug_line_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_line);
   size_t header_start = writer.data()->size();
   writer.PushUint32(0);  // Section-length placeholder.
@@ -151,13 +158,13 @@
   size_t header_length_pos = writer.data()->size();
   writer.PushUint32(0);  // Header-length placeholder.
   writer.PushUint8(1 << opcodes.GetCodeFactorBits());
-  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0);
-  writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase);
-  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange);
-  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase);
-  static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = {
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kDefaultIsStmt ? 1 : 0);
+  writer.PushInt8(DebugLineOpCodeWriter<Vector>::kLineBase);
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kLineRange);
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kOpcodeBase);
+  static const int opcode_lengths[DebugLineOpCodeWriter<Vector>::kOpcodeBase] = {
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
-  for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) {
+  for (int i = 1; i < DebugLineOpCodeWriter<Vector>::kOpcodeBase; i++) {
     writer.PushUint8(opcode_lengths[i]);
   }
   for (const std::string& directory : include_directories) {
@@ -173,7 +180,7 @@
   writer.PushUint8(0);  // Terminate file list.
   writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
   size_t opcodes_offset = writer.data()->size();
-  writer.PushData(opcodes.data());
+  writer.PushData(*opcodes.data());
   writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
   // Copy patch locations and make them relative to .debug_line section.
   for (uintptr_t patch_location : opcodes.GetPatchLocations()) {
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index e703aee..00b9dfa 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DWARF_WRITER_H_
 #define ART_COMPILER_DWARF_WRITER_H_
 
+#include <type_traits>
 #include <vector>
 #include "base/bit_utils.h"
 #include "base/logging.h"
@@ -26,8 +27,10 @@
 namespace dwarf {
 
 // The base class for all DWARF writers.
-template<typename Allocator = std::allocator<uint8_t>>
+template <typename Vector = std::vector<uint8_t>>
 class Writer {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   void PushUint8(int value) {
     DCHECK_GE(value, 0);
@@ -116,9 +119,11 @@
     data_->insert(data_->end(), p, p + size);
   }
 
-  template<typename Allocator2>
-  void PushData(const std::vector<uint8_t, Allocator2>* buffer) {
-    data_->insert(data_->end(), buffer->begin(), buffer->end());
+  template<typename Vector2>
+  void PushData(const Vector2& buffer) {
+    static_assert(std::is_same<typename std::add_const<typename Vector::value_type>::type,
+                               const uint8_t>::value, "Invalid value type");
+    data_->insert(data_->end(), buffer.begin(), buffer.end());
   }
 
   void UpdateUint32(size_t offset, uint32_t value) {
@@ -155,14 +160,14 @@
     data_->resize(RoundUp(data_->size(), alignment), 0);
   }
 
-  const std::vector<uint8_t, Allocator>* data() const {
+  const Vector* data() const {
     return data_;
   }
 
-  explicit Writer(std::vector<uint8_t, Allocator>* buffer) : data_(buffer) { }
+  explicit Writer(Vector* buffer) : data_(buffer) { }
 
  private:
-  std::vector<uint8_t, Allocator>* data_;
+  Vector* const data_;
 
   DISALLOW_COPY_AND_ASSIGN(Writer);
 };
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index c10ffeb..3a9e312 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -182,8 +182,8 @@
   WriteDebugFrameCIE(isa, address_type, format, debug_frame);
   for (const OatWriter::DebugInfo& mi : method_infos) {
     if (!mi.deduped_) {  // Only one FDE per unique address.
-      const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
-      if (opcodes != nullptr) {
+      ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
+      if (!opcodes.empty()) {
         address_to_fde_offset_map.emplace(mi.low_pc_, debug_frame->size());
         WriteDebugFrameFDE(Is64BitInstructionSet(isa), cie_offset,
                            mi.low_pc_, mi.high_pc_ - mi.low_pc_,
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
index 45e3fc5..2ef7f1a 100644
--- a/compiler/gc_map_builder.h
+++ b/compiler/gc_map_builder.h
@@ -26,14 +26,16 @@
 
 class GcMapBuilder {
  public:
-  template <typename Alloc>
-  GcMapBuilder(std::vector<uint8_t, Alloc>* table, size_t entries, uint32_t max_native_offset,
+  template <typename Vector>
+  GcMapBuilder(Vector* table, size_t entries, uint32_t max_native_offset,
                size_t references_width)
       : entries_(entries), references_width_(entries != 0u ? references_width : 0u),
         native_offset_width_(entries != 0 && max_native_offset != 0
                              ? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u
                              : 0u),
         in_use_(entries) {
+    static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
     // Resize table and set up header.
     table->resize((EntryWidth() * entries) + sizeof(uint32_t));
     table_ = table->data();
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e31a7a..fd6cd82 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -76,6 +76,7 @@
       for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
         dex_file->EnableWrite();
       }
+      compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
       t.NewTiming("WriteElf");
@@ -96,8 +97,10 @@
   ASSERT_TRUE(dup_oat.get() != nullptr);
 
   {
-    bool success_image =
-        writer->Write(image_file.GetFilename(), dup_oat->GetPath(), dup_oat->GetPath());
+    bool success_image = writer->Write(kInvalidImageFd,
+                                       image_file.GetFilename(),
+                                       dup_oat->GetPath(),
+                                       dup_oat->GetPath());
     ASSERT_TRUE(success_image);
     bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer->GetOatDataBegin());
     ASSERT_TRUE(success_fixup);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d9f8fcb..af2a4f9 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -55,6 +55,7 @@
 #include "mirror/string-inl.h"
 #include "oat.h"
 #include "oat_file.h"
+#include "oat_file_manager.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
@@ -121,13 +122,12 @@
   return true;
 }
 
-bool ImageWriter::Write(const std::string& image_filename,
+bool ImageWriter::Write(int image_fd,
+                        const std::string& image_filename,
                         const std::string& oat_filename,
                         const std::string& oat_location) {
   CHECK(!image_filename.empty());
 
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-
   std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
   if (oat_file.get() == nullptr) {
     PLOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
@@ -141,7 +141,8 @@
     oat_file->Erase();
     return false;
   }
-  CHECK_EQ(class_linker->RegisterOatFile(oat_file_), oat_file_);
+  Runtime::Current()->GetOatFileManager().RegisterOatFile(
+      std::unique_ptr<const OatFile>(oat_file_));
 
   interpreter_to_interpreter_bridge_offset_ =
       oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset();
@@ -178,10 +179,13 @@
     LOG(ERROR) << "Failed to flush and close oat file " << oat_filename << " for " << oat_location;
     return false;
   }
-
-  std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
-  ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  if (image_file.get() == nullptr) {
+  std::unique_ptr<File> image_file;
+  if (image_fd != kInvalidImageFd) {
+    image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage));
+  } else {
+    image_file.reset(OS::CreateEmptyFile(image_filename.c_str()));
+  }
+  if (image_file == nullptr) {
     LOG(ERROR) << "Failed to open image file " << image_filename;
     return false;
   }
@@ -192,6 +196,7 @@
   }
 
   // Write out the image + fields + methods.
+  ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   const auto write_count = image_header->GetImageSize();
   if (!image_file->WriteFully(image_->Begin(), write_count)) {
     PLOG(ERROR) << "Failed to write image file " << image_filename;
@@ -200,7 +205,8 @@
   }
 
   // Write out the image bitmap at the page aligned start of the image end.
-  const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
+  const ImageSection& bitmap_section = image_header->GetImageSection(
+      ImageHeader::kSectionImageBitmap);
   CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
   if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()),
                          bitmap_section.Size(), bitmap_section.Offset())) {
@@ -796,7 +802,7 @@
                   offset, kNativeObjectRelocationTypeArtFieldArray });
           offset += header_size;
           // Forward individual fields so that we can quickly find where they belong.
-          for (size_t i = 0, count = cur_fields->Length(); i < count; ++i) {
+          for (size_t i = 0, count = cur_fields->size(); i < count; ++i) {
             // Need to forward arrays separate of fields.
             ArtField* field = &cur_fields->At(i);
             auto it2 = native_object_relocations_.find(field);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index e235bc4..7a2febc 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -41,6 +41,8 @@
 
 namespace art {
 
+static constexpr int kInvalidImageFd = -1;
+
 // Write a Space built during compilation for use during execution.
 class ImageWriter FINAL {
  public:
@@ -89,7 +91,11 @@
 
   uint8_t* GetOatFileBegin() const;
 
-  bool Write(const std::string& image_filename, const std::string& oat_filename,
+  // If image_fd is not kInvalidImageFd, then we use that for the file. Otherwise we open
+  // image_filename.
+  bool Write(int image_fd,
+             const std::string& image_filename,
+             const std::string& oat_filename,
              const std::string& oat_location)
       REQUIRES(!Locks::mutator_lock_);
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index b6a40a2..d520208 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -19,6 +19,7 @@
 #include "art_method-inl.h"
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
+#include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "compiler_callbacks.h"
@@ -29,6 +30,7 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "oat_file-inl.h"
+#include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "thread_list.h"
 #include "verifier/method_verifier-inl.h"
@@ -61,9 +63,18 @@
   return jit_compiler->CompileMethod(self, method);
 }
 
+// Callers of this method assume it has NO_RETURN.
+NO_RETURN static void Usage(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(FATAL) << error;
+  va_end(ap);
+  exit(EXIT_FAILURE);
+}
+
 JitCompiler::JitCompiler() : total_time_(0) {
-  auto* pass_manager_options = new PassManagerOptions;
-  pass_manager_options->SetDisablePassList("GVN,DCE,GVNCleanup");
   compiler_options_.reset(new CompilerOptions(
       CompilerOptions::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
@@ -82,11 +93,43 @@
       /* implicit_suspend_checks */ false,
       /* pic */ true,  // TODO: Support non-PIC in optimizing.
       /* verbose_methods */ nullptr,
-      pass_manager_options,
       /* init_failure_output */ nullptr,
       /* abort_on_hard_verifier_failure */ false));
+  for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
+    compiler_options_->ParseCompilerOption(argument, Usage);
+  }
   const InstructionSet instruction_set = kRuntimeISA;
-  instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
+  for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) {
+    VLOG(compiler) << "JIT compiler option " << option;
+    std::string error_msg;
+    if (option.starts_with("--instruction-set-variant=")) {
+      StringPiece str = option.substr(strlen("--instruction-set-variant=")).data();
+      VLOG(compiler) << "JIT instruction set variant " << str;
+      instruction_set_features_.reset(InstructionSetFeatures::FromVariant(
+          instruction_set, str.as_string(), &error_msg));
+      if (instruction_set_features_ == nullptr) {
+        LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
+      }
+    } else if (option.starts_with("--instruction-set-features=")) {
+      StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
+      VLOG(compiler) << "JIT instruction set features " << str;
+      if (instruction_set_features_.get() == nullptr) {
+        instruction_set_features_.reset(InstructionSetFeatures::FromVariant(
+            instruction_set, "default", &error_msg));
+        if (instruction_set_features_ == nullptr) {
+          LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
+        }
+      }
+      instruction_set_features_.reset(
+          instruction_set_features_->AddFeaturesFromString(str.as_string(), &error_msg));
+      if (instruction_set_features_ == nullptr) {
+        LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
+      }
+    }
+  }
+  if (instruction_set_features_ == nullptr) {
+    instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
+  }
   cumulative_logger_.reset(new CumulativeLogger("jit times"));
   verification_results_.reset(new VerificationResults(compiler_options_.get()));
   method_inliner_map_.reset(new DexFileToMethodInlinerMap);
@@ -126,61 +169,66 @@
   StackHandleScope<2> hs(self);
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
-  if (runtime->GetJit()->GetCodeCache()->ContainsMethod(method)) {
+
+  // Check if the method is already compiled.
+  if (runtime->GetJit()->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
     VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;  // Already compiled
+    return true;
   }
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  if (runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
+    return false;
+  }
+
+  // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
-  {
-    TimingLogger::ScopedTiming t2("Initializing", &logger);
-    if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-      VLOG(jit) << "JIT failed to initialize " << PrettyMethod(method);
-      return false;
-    }
+  if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
+    VLOG(jit) << "JIT failed to initialize " << PrettyMethod(method);
+    return false;
   }
-  const DexFile* dex_file = h_class->GetDexCache()->GetDexFile();
-  MethodReference method_ref(dex_file, method->GetDexMethodIndex());
-  // Only verify if we don't already have verification results.
-  if (verification_results_->GetVerifiedMethod(method_ref) == nullptr) {
-    TimingLogger::ScopedTiming t2("Verifying", &logger);
-    std::string error;
-    if (verifier::MethodVerifier::VerifyMethod(method, true, &error) ==
-        verifier::MethodVerifier::kHardFailure) {
-      VLOG(jit) << "Not compile method " << PrettyMethod(method)
-          << " due to verification failure " << error;
-      return false;
-    }
-  }
+
+  // Do the compilation.
   CompiledMethod* compiled_method = nullptr;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
-    compiled_method = compiler_driver_->CompileArtMethod(self, method);
+    // If we get a request to compile a proxy method, we pass the actual Java method
+    // of that proxy method, as the compiler does not expect a proxy method.
+    ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+    compiled_method = compiler_driver_->CompileArtMethod(self, method_to_compile);
   }
+
+  // Trim maps to reduce memory usage.
+  // TODO: measure how much this increases compile time.
   {
     TimingLogger::ScopedTiming t2("TrimMaps", &logger);
-    // Trim maps to reduce memory usage, TODO: measure how much this increases compile time.
     runtime->GetArenaPool()->TrimMaps();
   }
+
+  // Check if we failed compiling.
   if (compiled_method == nullptr) {
     return false;
   }
+
   total_time_ += NanoTime() - start_time;
-  // Don't add the method if we are supposed to be deoptimized.
   bool result = false;
-  if (!runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
-    const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(method);
-    if (code != nullptr) {
-      // Already have some compiled code, just use this instead of linking.
-      // TODO: Fix recompilation.
-      method->SetEntryPointFromQuickCompiledCode(code);
+  const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(method);
+
+  if (code != nullptr) {
+    // Already have some compiled code, just use this instead of linking.
+    // TODO: Fix recompilation.
+    method->SetEntryPointFromQuickCompiledCode(code);
+    result = true;
+  } else {
+    TimingLogger::ScopedTiming t2("LinkCode", &logger);
+    if (AddToCodeCache(method, compiled_method)) {
       result = true;
-    } else {
-      TimingLogger::ScopedTiming t2("MakeExecutable", &logger);
-      result = MakeExecutable(compiled_method, method);
     }
   }
+
   // Remove the compiled method to save memory.
-  compiler_driver_->RemoveCompiledMethod(method_ref);
+  compiler_driver_->RemoveCompiledMethod(
+      MethodReference(h_class->GetDexCache()->GetDexFile(), method->GetDexMethodIndex()));
   runtime->GetJit()->AddTimingLogger(logger);
   return result;
 }
@@ -189,122 +237,71 @@
   return callbacks_.get();
 }
 
-uint8_t* JitCompiler::WriteMethodHeaderAndCode(const CompiledMethod* compiled_method,
-                                               uint8_t* reserve_begin, uint8_t* reserve_end,
-                                               const uint8_t* mapping_table,
-                                               const uint8_t* vmap_table,
-                                               const uint8_t* gc_map) {
-  reserve_begin += sizeof(OatQuickMethodHeader);
-  reserve_begin = reinterpret_cast<uint8_t*>(
-      compiled_method->AlignCode(reinterpret_cast<uintptr_t>(reserve_begin)));
-  const auto* quick_code = compiled_method->GetQuickCode();
-  CHECK_LE(reserve_begin, reserve_end);
-  CHECK_LE(quick_code->size(), static_cast<size_t>(reserve_end - reserve_begin));
-  auto* code_ptr = reserve_begin;
-  OatQuickMethodHeader* method_header = reinterpret_cast<OatQuickMethodHeader*>(code_ptr) - 1;
-  // Construct the header last.
-  const auto frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
-  const auto core_spill_mask = compiled_method->GetCoreSpillMask();
-  const auto fp_spill_mask = compiled_method->GetFpSpillMask();
-  const auto code_size = quick_code->size();
-  CHECK_NE(code_size, 0U);
-  std::copy(quick_code->data(), quick_code->data() + code_size, code_ptr);
-  // After we are done writing we need to update the method header.
-  // Write out the method header last.
-  method_header = new(method_header) OatQuickMethodHeader(
-      (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
-      (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
-      (gc_map == nullptr) ? 0 : code_ptr - gc_map,
-      frame_size_in_bytes,
-      core_spill_mask,
-      fp_spill_mask,
-      code_size);
-  // Return the code ptr.
-  return code_ptr;
-}
-
-bool JitCompiler::AddToCodeCache(ArtMethod* method, const CompiledMethod* compiled_method,
-                                 OatFile::OatMethod* out_method) {
+bool JitCompiler::AddToCodeCache(ArtMethod* method,
+                                 const CompiledMethod* compiled_method) {
   Runtime* runtime = Runtime::Current();
   JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-  const auto* quick_code = compiled_method->GetQuickCode();
-  if (quick_code == nullptr) {
+  auto const quick_code = compiled_method->GetQuickCode();
+  if (quick_code.empty()) {
     return false;
   }
-  const auto code_size = quick_code->size();
+  const auto code_size = quick_code.size();
   Thread* const self = Thread::Current();
-  const uint8_t* base = code_cache->CodeCachePtr();
-  auto* const mapping_table = compiled_method->GetMappingTable();
-  auto* const vmap_table = compiled_method->GetVmapTable();
-  auto* const gc_map = compiled_method->GetGcMap();
+  auto const mapping_table = compiled_method->GetMappingTable();
+  auto const vmap_table = compiled_method->GetVmapTable();
+  auto const gc_map = compiled_method->GetGcMap();
   uint8_t* mapping_table_ptr = nullptr;
   uint8_t* vmap_table_ptr = nullptr;
   uint8_t* gc_map_ptr = nullptr;
 
-  if (mapping_table != nullptr) {
+  if (!mapping_table.empty()) {
     // Write out pre-header stuff.
     mapping_table_ptr = code_cache->AddDataArray(
-        self, mapping_table->data(), mapping_table->data() + mapping_table->size());
+        self, mapping_table.data(), mapping_table.data() + mapping_table.size());
     if (mapping_table_ptr == nullptr) {
       return false;  // Out of data cache.
     }
   }
 
-  if (vmap_table != nullptr) {
+  if (!vmap_table.empty()) {
     vmap_table_ptr = code_cache->AddDataArray(
-        self, vmap_table->data(), vmap_table->data() + vmap_table->size());
+        self, vmap_table.data(), vmap_table.data() + vmap_table.size());
     if (vmap_table_ptr == nullptr) {
       return false;  // Out of data cache.
     }
   }
 
-  if (gc_map != nullptr) {
+  if (!gc_map.empty()) {
     gc_map_ptr = code_cache->AddDataArray(
-        self, gc_map->data(), gc_map->data() + gc_map->size());
+        self, gc_map.data(), gc_map.data() + gc_map.size());
     if (gc_map_ptr == nullptr) {
       return false;  // Out of data cache.
     }
   }
 
-  // Don't touch this until you protect / unprotect the code.
-  const size_t reserve_size = sizeof(OatQuickMethodHeader) + quick_code->size() + 32;
-  uint8_t* const code_reserve = code_cache->ReserveCode(self, reserve_size);
-  if (code_reserve == nullptr) {
+  uint8_t* const code = code_cache->CommitCode(self,
+                                               method,
+                                               mapping_table_ptr,
+                                               vmap_table_ptr,
+                                               gc_map_ptr,
+                                               compiled_method->GetFrameSizeInBytes(),
+                                               compiled_method->GetCoreSpillMask(),
+                                               compiled_method->GetFpSpillMask(),
+                                               compiled_method->GetQuickCode().data(),
+                                               compiled_method->GetQuickCode().size());
+
+  if (code == nullptr) {
     return false;
   }
-  auto* code_ptr = WriteMethodHeaderAndCode(
-      compiled_method, code_reserve, code_reserve + reserve_size, mapping_table_ptr,
-      vmap_table_ptr, gc_map_ptr);
-
-  __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
-                          reinterpret_cast<char*>(code_ptr + quick_code->size()));
 
   const size_t thumb_offset = compiled_method->CodeDelta();
-  const uint32_t code_offset = code_ptr - base + thumb_offset;
-  *out_method = OatFile::OatMethod(base, code_offset);
-  DCHECK_EQ(out_method->GetGcMap(), gc_map_ptr);
-  DCHECK_EQ(out_method->GetMappingTable(), mapping_table_ptr);
-  DCHECK_EQ(out_method->GetVmapTable(), vmap_table_ptr);
-  DCHECK_EQ(out_method->GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
-  DCHECK_EQ(out_method->GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
-  DCHECK_EQ(out_method->GetFpSpillMask(), compiled_method->GetFpSpillMask());
-  VLOG(jit)  << "JIT added " << PrettyMethod(method) << "@" << method << " ccache_size="
-      << PrettySize(code_cache->CodeCacheSize()) << ": " << reinterpret_cast<void*>(code_ptr)
-      << "," << reinterpret_cast<void*>(code_ptr + code_size);
-  return true;
-}
-
-bool JitCompiler::MakeExecutable(CompiledMethod* compiled_method, ArtMethod* method) {
-  CHECK(method != nullptr);
-  CHECK(compiled_method != nullptr);
-  OatFile::OatMethod oat_method(nullptr, 0);
-  if (!AddToCodeCache(method, compiled_method, &oat_method)) {
-    return false;
-  }
-  // TODO: Flush instruction cache.
-  oat_method.LinkMethod(method);
-  CHECK(Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method))
-      << PrettyMethod(method);
+  const uint32_t code_offset = sizeof(OatQuickMethodHeader) + thumb_offset;
+  VLOG(jit)
+      << "JIT added "
+      << PrettyMethod(method) << "@" << method
+      << " ccache_size=" << PrettySize(code_cache->CodeCacheSize()) << ": "
+      << reinterpret_cast<void*>(code + code_offset)
+      << "," << reinterpret_cast<void*>(code + code_offset + code_size);
   return true;
 }
 
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index ef68caa..913a6d0 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -39,10 +39,6 @@
   virtual ~JitCompiler();
   bool CompileMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  // This is in the compiler since the runtime doesn't have access to the compiled method
-  // structures.
-  bool AddToCodeCache(ArtMethod* method, const CompiledMethod* compiled_method,
-                      OatFile::OatMethod* out_method) SHARED_REQUIRES(Locks::mutator_lock_);
   CompilerCallbacks* GetCompilerCallbacks() const;
   size_t GetTotalCompileTime() const {
     return total_time_;
@@ -58,11 +54,12 @@
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
-  explicit JitCompiler();
-  uint8_t* WriteMethodHeaderAndCode(
-      const CompiledMethod* compiled_method, uint8_t* reserve_begin, uint8_t* reserve_end,
-      const uint8_t* mapping_table, const uint8_t* vmap_table, const uint8_t* gc_map);
-  bool MakeExecutable(CompiledMethod* compiled_method, ArtMethod* method)
+  JitCompiler();
+
+  // This is in the compiler since the runtime doesn't have access to the compiled method
+  // structures.
+  bool AddToCodeCache(ArtMethod* method,
+                      const CompiledMethod* compiled_method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JitCompiler);
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 8b5fdc3..16b4386 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -325,156 +325,146 @@
 // 0x0000007f: .cfi_def_cfa_offset: 128
 
 static constexpr uint8_t expected_asm_kMips[] = {
-    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF,
-    0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF,
-    0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF,
-    0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF,
-    0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xAC, 0xE7, 0x4C, 0x00, 0xA6, 0xAF,
-    0x50, 0x00, 0xA7, 0xAF, 0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27,
-    0x18, 0x00, 0xA8, 0x8F, 0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F,
-    0x24, 0x00, 0xAB, 0x8F, 0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F,
-    0x30, 0x00, 0xAE, 0x8F, 0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F,
-    0x3C, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
-    0x00, 0x00, 0x00, 0x00,
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xBE, 0xAF,
+    0x34, 0x00, 0xB7, 0xAF, 0x30, 0x00, 0xB6, 0xAF, 0x2C, 0x00, 0xB5, 0xAF,
+    0x28, 0x00, 0xB4, 0xAF, 0x24, 0x00, 0xB3, 0xAF, 0x20, 0x00, 0xB2, 0xAF,
+    0x00, 0x00, 0xA4, 0xAF, 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xAC, 0xE7,
+    0x4C, 0x00, 0xA6, 0xAF, 0x50, 0x00, 0xA7, 0xAF, 0xE0, 0xFF, 0xBD, 0x27,
+    0x20, 0x00, 0xBD, 0x27, 0x20, 0x00, 0xB2, 0x8F, 0x24, 0x00, 0xB3, 0x8F,
+    0x28, 0x00, 0xB4, 0x8F, 0x2C, 0x00, 0xB5, 0x8F, 0x30, 0x00, 0xB6, 0x8F,
+    0x34, 0x00, 0xB7, 0x8F, 0x38, 0x00, 0xBE, 0x8F, 0x3C, 0x00, 0xBF, 0x8F,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03,
-    0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07,
-    0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x58, 0x0E, 0x60,
-    0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB,
-    0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF,
-    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x97, 0x03,
+    0x44, 0x96, 0x04, 0x44, 0x95, 0x05, 0x44, 0x94, 0x06, 0x44, 0x93, 0x07,
+    0x44, 0x92, 0x08, 0x58, 0x0E, 0x60, 0x44, 0x0E, 0x40, 0x0A, 0x44, 0xD2,
+    0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, 0x44, 0xD7, 0x44, 0xDE,
+    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: addiu r29, r29, -64
 // 0x00000004: .cfi_def_cfa_offset: 64
 // 0x00000004: sw r31, +60(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r24, +56(r29)
-// 0x0000000c: .cfi_offset: r24 at cfa-8
-// 0x0000000c: sw r15, +52(r29)
-// 0x00000010: .cfi_offset: r15 at cfa-12
-// 0x00000010: sw r14, +48(r29)
-// 0x00000014: .cfi_offset: r14 at cfa-16
-// 0x00000014: sw r13, +44(r29)
-// 0x00000018: .cfi_offset: r13 at cfa-20
-// 0x00000018: sw r12, +40(r29)
-// 0x0000001c: .cfi_offset: r12 at cfa-24
-// 0x0000001c: sw r11, +36(r29)
-// 0x00000020: .cfi_offset: r11 at cfa-28
-// 0x00000020: sw r10, +32(r29)
-// 0x00000024: .cfi_offset: r10 at cfa-32
-// 0x00000024: sw r9, +28(r29)
-// 0x00000028: .cfi_offset: r9 at cfa-36
-// 0x00000028: sw r8, +24(r29)
-// 0x0000002c: .cfi_offset: r8 at cfa-40
-// 0x0000002c: sw r4, +0(r29)
-// 0x00000030: sw r5, +68(r29)
-// 0x00000034: swc1 f12, +72(r29)
-// 0x00000038: sw r6, +76(r29)
-// 0x0000003c: sw r7, +80(r29)
-// 0x00000040: addiu r29, r29, -32
-// 0x00000044: .cfi_def_cfa_offset: 96
-// 0x00000044: addiu r29, r29, 32
-// 0x00000048: .cfi_def_cfa_offset: 64
-// 0x00000048: .cfi_remember_state
-// 0x00000048: lw r8, +24(r29)
-// 0x0000004c: .cfi_restore: r8
-// 0x0000004c: lw r9, +28(r29)
-// 0x00000050: .cfi_restore: r9
-// 0x00000050: lw r10, +32(r29)
-// 0x00000054: .cfi_restore: r10
-// 0x00000054: lw r11, +36(r29)
-// 0x00000058: .cfi_restore: r11
-// 0x00000058: lw r12, +40(r29)
-// 0x0000005c: .cfi_restore: r12
-// 0x0000005c: lw r13, +44(r29)
-// 0x00000060: .cfi_restore: r13
-// 0x00000060: lw r14, +48(r29)
-// 0x00000064: .cfi_restore: r14
-// 0x00000064: lw r15, +52(r29)
-// 0x00000068: .cfi_restore: r15
-// 0x00000068: lw r24, +56(r29)
-// 0x0000006c: .cfi_restore: r24
-// 0x0000006c: lw r31, +60(r29)
-// 0x00000070: .cfi_restore: r31
-// 0x00000070: addiu r29, r29, 64
-// 0x00000074: .cfi_def_cfa_offset: 0
-// 0x00000074: jr r31
-// 0x00000078: nop
-// 0x0000007c: .cfi_restore_state
-// 0x0000007c: .cfi_def_cfa_offset: 64
+// 0x00000008: sw r30, +56(r29)
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: sw r23, +52(r29)
+// 0x00000010: .cfi_offset: r23 at cfa-12
+// 0x00000010: sw r22, +48(r29)
+// 0x00000014: .cfi_offset: r22 at cfa-16
+// 0x00000014: sw r21, +44(r29)
+// 0x00000018: .cfi_offset: r21 at cfa-20
+// 0x00000018: sw r20, +40(r29)
+// 0x0000001c: .cfi_offset: r20 at cfa-24
+// 0x0000001c: sw r19, +36(r29)
+// 0x00000020: .cfi_offset: r19 at cfa-28
+// 0x00000020: sw r18, +32(r29)
+// 0x00000024: .cfi_offset: r18 at cfa-32
+// 0x00000024: sw r4, +0(r29)
+// 0x00000028: sw r5, +68(r29)
+// 0x0000002c: swc1 f12, +72(r29)
+// 0x00000030: sw r6, +76(r29)
+// 0x00000034: sw r7, +80(r29)
+// 0x00000038: addiu r29, r29, -32
+// 0x0000003c: .cfi_def_cfa_offset: 96
+// 0x0000003c: addiu r29, r29, 32
+// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000040: .cfi_remember_state
+// 0x00000040: lw r18, +32(r29)
+// 0x00000044: .cfi_restore: r18
+// 0x00000044: lw r19, +36(r29)
+// 0x00000048: .cfi_restore: r19
+// 0x00000048: lw r20, +40(r29)
+// 0x0000004c: .cfi_restore: r20
+// 0x0000004c: lw r21, +44(r29)
+// 0x00000050: .cfi_restore: r21
+// 0x00000050: lw r22, +48(r29)
+// 0x00000054: .cfi_restore: r22
+// 0x00000054: lw r23, +52(r29)
+// 0x00000058: .cfi_restore: r23
+// 0x00000058: lw r30, +56(r29)
+// 0x0000005c: .cfi_restore: r30
+// 0x0000005c: lw r31, +60(r29)
+// 0x00000060: .cfi_restore: r31
+// 0x00000060: addiu r29, r29, 64
+// 0x00000064: .cfi_def_cfa_offset: 0
+// 0x00000064: jr r31
+// 0x00000068: nop
+// 0x0000006c: .cfi_restore_state
+// 0x0000006c: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
-    0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF,
-    0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF,
-    0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF,
-    0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xFF, 0x68, 0x00, 0xA5, 0xAF,
-    0x6C, 0x00, 0xAE, 0xE7, 0x70, 0x00, 0xA7, 0xAF, 0x74, 0x00, 0xA8, 0xAF,
-    0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF,
-    0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF,
-    0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF,
-    0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67,
+    0x90, 0xFF, 0xBD, 0x67, 0x68, 0x00, 0xBF, 0xFF, 0x60, 0x00, 0xBE, 0xFF,
+    0x58, 0x00, 0xBC, 0xFF, 0x50, 0x00, 0xB7, 0xFF, 0x48, 0x00, 0xB6, 0xFF,
+    0x40, 0x00, 0xB5, 0xFF, 0x38, 0x00, 0xB4, 0xFF, 0x30, 0x00, 0xB3, 0xFF,
+    0x28, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xFF, 0x78, 0x00, 0xA5, 0xAF,
+    0x7C, 0x00, 0xAE, 0xE7, 0x80, 0x00, 0xA7, 0xAF, 0x84, 0x00, 0xA8, 0xAF,
+    0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x28, 0x00, 0xB2, 0xDF,
+    0x30, 0x00, 0xB3, 0xDF, 0x38, 0x00, 0xB4, 0xDF, 0x40, 0x00, 0xB5, 0xDF,
+    0x48, 0x00, 0xB6, 0xDF, 0x50, 0x00, 0xB7, 0xDF, 0x58, 0x00, 0xBC, 0xDF,
+    0x60, 0x00, 0xBE, 0xDF, 0x68, 0x00, 0xBF, 0xDF, 0x70, 0x00, 0xBD, 0x67,
     0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips64[] = {
-    0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06,
+    0x44, 0x0E, 0x70, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06,
     0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E,
-    0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E,
-    0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6,
+    0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x90, 0x01, 0x44, 0x0E,
+    0x70, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6,
     0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48,
-    0x0B, 0x0E, 0x60,
+    0x0B, 0x0E, 0x70,
 };
-// 0x00000000: daddiu r29, r29, -96
-// 0x00000004: .cfi_def_cfa_offset: 96
-// 0x00000004: sd r31, +88(r29)
+// 0x00000000: daddiu r29, r29, -112
+// 0x00000004: .cfi_def_cfa_offset: 112
+// 0x00000004: sd r31, +104(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r30, +80(r29)
+// 0x00000008: sd r30, +96(r29)
 // 0x0000000c: .cfi_offset: r30 at cfa-16
-// 0x0000000c: sd r28, +72(r29)
+// 0x0000000c: sd r28, +88(r29)
 // 0x00000010: .cfi_offset: r28 at cfa-24
-// 0x00000010: sd r23, +64(r29)
+// 0x00000010: sd r23, +80(r29)
 // 0x00000014: .cfi_offset: r23 at cfa-32
-// 0x00000014: sd r22, +56(r29)
+// 0x00000014: sd r22, +72(r29)
 // 0x00000018: .cfi_offset: r22 at cfa-40
-// 0x00000018: sd r21, +48(r29)
+// 0x00000018: sd r21, +64(r29)
 // 0x0000001c: .cfi_offset: r21 at cfa-48
-// 0x0000001c: sd r20, +40(r29)
+// 0x0000001c: sd r20, +56(r29)
 // 0x00000020: .cfi_offset: r20 at cfa-56
-// 0x00000020: sd r19, +32(r29)
+// 0x00000020: sd r19, +48(r29)
 // 0x00000024: .cfi_offset: r19 at cfa-64
-// 0x00000024: sd r18, +24(r29)
+// 0x00000024: sd r18, +40(r29)
 // 0x00000028: .cfi_offset: r18 at cfa-72
 // 0x00000028: sd r4, +0(r29)
-// 0x0000002c: sw r5, +104(r29)
-// 0x00000030: swc1 f14, +108(r29)
-// 0x00000034: sw r7, +112(r29)
-// 0x00000038: sw r8, +116(r29)
+// 0x0000002c: sw r5, +120(r29)
+// 0x00000030: swc1 f14, +124(r29)
+// 0x00000034: sw r7, +128(r29)
+// 0x00000038: sw r8, +132(r29)
 // 0x0000003c: daddiu r29, r29, -32
-// 0x00000040: .cfi_def_cfa_offset: 128
+// 0x00000040: .cfi_def_cfa_offset: 144
 // 0x00000040: daddiu r29, r29, 32
-// 0x00000044: .cfi_def_cfa_offset: 96
+// 0x00000044: .cfi_def_cfa_offset: 112
 // 0x00000044: .cfi_remember_state
-// 0x00000044: ld r18, +24(r29)
+// 0x00000044: ld r18, +40(r29)
 // 0x00000048: .cfi_restore: r18
-// 0x00000048: ld r19, +32(r29)
+// 0x00000048: ld r19, +48(r29)
 // 0x0000004c: .cfi_restore: r19
-// 0x0000004c: ld r20, +40(r29)
+// 0x0000004c: ld r20, +56(r29)
 // 0x00000050: .cfi_restore: r20
-// 0x00000050: ld r21, +48(r29)
+// 0x00000050: ld r21, +64(r29)
 // 0x00000054: .cfi_restore: r21
-// 0x00000054: ld r22, +56(r29)
+// 0x00000054: ld r22, +72(r29)
 // 0x00000058: .cfi_restore: r22
-// 0x00000058: ld r23, +64(r29)
+// 0x00000058: ld r23, +80(r29)
 // 0x0000005c: .cfi_restore: r23
-// 0x0000005c: ld r28, +72(r29)
+// 0x0000005c: ld r28, +88(r29)
 // 0x00000060: .cfi_restore: r28
-// 0x00000060: ld r30, +80(r29)
+// 0x00000060: ld r30, +96(r29)
 // 0x00000064: .cfi_restore: r30
-// 0x00000064: ld r31, +88(r29)
+// 0x00000064: ld r31, +104(r29)
 // 0x00000068: .cfi_restore: r31
-// 0x00000068: daddiu r29, r29, 96
+// 0x00000068: daddiu r29, r29, 112
 // 0x0000006c: .cfi_def_cfa_offset: 0
 // 0x0000006c: jr r31
 // 0x00000070: nop
 // 0x00000074: .cfi_restore_state
-// 0x00000074: .cfi_def_cfa_offset: 96
+// 0x00000074: .cfi_def_cfa_offset: 112
+
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 953dfcb..52a2382 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -67,6 +67,7 @@
   const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
+  const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
   // Calling conventions used to iterate over parameters to method
   std::unique_ptr<JniCallingConvention> main_jni_conv(
@@ -93,7 +94,7 @@
       JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
+  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set, instruction_set_features));
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetGenerateDebugInfo());
 
   // Offsets into data structures
@@ -486,7 +487,7 @@
                                                  frame_size,
                                                  main_jni_conv->CoreSpillMask(),
                                                  main_jni_conv->FpSpillMask(),
-                                                 nullptr,  // src_mapping_table.
+                                                 ArrayRef<const SrcMapElem>(),
                                                  ArrayRef<const uint8_t>(),  // mapping_table.
                                                  ArrayRef<const uint8_t>(),  // vmap_table.
                                                  ArrayRef<const uint8_t>(),  // native_gc_map.
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index be2397f..2d31a98 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -162,22 +162,19 @@
   }
   padding_ = padding;
 
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T0));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T1));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T2));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T3));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T4));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T5));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T6));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T7));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(T8));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S2));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S3));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S4));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S5));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S6));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S7));
+  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(FP));
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
   uint32_t result = 0;
-  result = 1 << T0 | 1 << T1 | 1 << T2 | 1 << T3 | 1 << T4 | 1 << T5 | 1 << T6 |
-           1 << T7 | 1 << T8 | 1 << RA;
+  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << FP | 1 << RA;
   return result;
 }
 
@@ -186,7 +183,7 @@
 }
 
 size_t MipsJniCallingConvention::FrameSize() {
-  // Method*, LR and callee save area size, local reference segment state
+  // ArtMethod*, RA and callee save area size, local reference segment state
   size_t frame_data_size = kMipsPointerSize +
       (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 3a11bcf..807d740 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -140,6 +140,7 @@
   // Compute spill mask to agree with callee saves initialized in the constructor
   uint32_t result = 0;
   result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
+  DCHECK_EQ(static_cast<size_t>(POPCOUNT(result)), callee_save_regs_.size() + 1);
   return result;
 }
 
@@ -148,9 +149,9 @@
 }
 
 size_t Mips64JniCallingConvention::FrameSize() {
-  // Mehtod* and callee save area size, local reference segment state
+  // ArtMethod*, RA and callee save area size, local reference segment state
   size_t frame_data_size = kFramePointerSize +
-      CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t);
+      (CalleeSaveRegisters().size() + 1) * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
   size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 9c7eab1..b6b11ca 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -38,8 +38,7 @@
   return ManagedRegister::NoRegister();  // No free regs, so assembler uses push/pop
 }
 
-static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni) {
-  UNUSED(jni);
+static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni ATTRIBUTE_UNUSED) {
   if (shorty[0] == 'F' || shorty[0] == 'D') {
     return X86_64ManagedRegister::FromXmmRegister(XMM0);
   } else if (shorty[0] == 'J') {
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index ceace82..13754fd 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -18,6 +18,7 @@
 
 #include "compiled_method.h"
 #include "oat.h"
+#include "oat_quick_method_header.h"
 #include "output_stream.h"
 
 namespace art {
@@ -35,7 +36,8 @@
   // of code. To avoid any alignment discrepancies for the final chunk, we always align the
   // offset after reserving of writing any chunk.
   uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u),
+  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
+                                                MethodReference(nullptr, 0u),
                                                 aligned_offset);
   if (needs_thunk) {
     thunk_locations_.push_back(aligned_offset);
@@ -84,8 +86,7 @@
                                                       const CompiledMethod* compiled_method,
                                                       MethodReference method_ref,
                                                       uint32_t max_extra_space) {
-  DCHECK(compiled_method->GetQuickCode() != nullptr);
-  uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
+  uint32_t quick_code_size = compiled_method->GetQuickCode().size();
   uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
   uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
   // Adjust for extra space required by the subclass.
@@ -94,7 +95,8 @@
   // We need the MethodReference for that.
   if (!unprocessed_patches_.empty() &&
       next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
-    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref,
+    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset,
+                                                  method_ref,
                                                   next_aligned_offset);
     if (needs_thunk) {
       // A single thunk will cover all pending patches.
@@ -156,7 +158,10 @@
         // If still unresolved, check if we have a thunk within range.
         if (thunk_locations_.empty() ||
             patch_offset - thunk_locations_.back() > max_negative_displacement_) {
-          return next_aligned_offset - patch_offset > max_positive_displacement_;
+          // No thunk in range, we need a thunk if the next aligned offset
+          // is out of range, or if we're at the end of all code.
+          return (next_aligned_offset - patch_offset > max_positive_displacement_) ||
+              (quick_code_offset == next_aligned_offset);  // End of code.
         }
       } else {
         uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index a3e889f..5f4f760 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -110,8 +110,9 @@
       (static_cast<uint32_t>(addr[3]) << 8);
 }
 
-template <typename Alloc>
-uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
 }
 
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 2d474c2..006d6fb 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -37,8 +37,8 @@
   void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
   static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
 
-  template <typename Alloc>
-  static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+  template <typename Vector>
+  static uint32_t GetInsn32(Vector* code, uint32_t offset);
 
   // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
   static constexpr int32_t kPcDisplacement = 4;
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 13f67e6..a259cda 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -16,6 +16,7 @@
 
 #include "linker/relative_patcher_test.h"
 #include "linker/arm/relative_patcher_thumb2.h"
+#include "oat_quick_method_header.h"
 
 namespace art {
 namespace linker {
@@ -232,6 +233,36 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) {
+  constexpr uint32_t missing_method_index = 1024u;
+  auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method3_code(method3_raw_code);
+  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+  LinkerPatch method3_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
+                                            method3_code,
+                                            ArrayRef<const LinkerPatch>(method3_patches),
+                                            just_over_max_negative_disp - bl_offset_in_method3);
+  ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
+  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+  // Check linked code.
+  uint32_t method3_offset = GetMethodOffset(3u);
+  uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2);
+  uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
+  auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
 TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
   auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
   constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 6b9c530..57018af 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -22,6 +22,7 @@
 #include "driver/compiler_driver.h"
 #include "utils/arm64/assembler_arm64.h"
 #include "oat.h"
+#include "oat_quick_method_header.h"
 #include "output_stream.h"
 
 namespace art {
@@ -73,7 +74,7 @@
   // Now that we have the actual offset where the code will be placed, locate the ADRP insns
   // that actually require the thunk.
   uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
-  ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+  ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
   uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
   DCHECK(compiled_method != nullptr);
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b3af4c6..0bfef5e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -16,6 +16,7 @@
 
 #include "linker/relative_patcher_test.h"
 #include "linker/arm64/relative_patcher_arm64.h"
+#include "oat_quick_method_header.h"
 
 namespace art {
 namespace linker {
@@ -236,7 +237,7 @@
     CHECK(!compiled_method_refs_.empty());
     CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u);
     CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size());
-    uint32_t method1_size = compiled_methods_[0]->GetQuickCode()->size();
+    uint32_t method1_size = compiled_methods_[0]->GetQuickCode().size();
     uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64);
     uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u);
     ASSERT_EQ(b_diff & 3u, 0u);
@@ -385,6 +386,39 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) {
+  constexpr uint32_t missing_method_index = 1024u;
+  auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_last_method = 1u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+  LinkerPatch last_method_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
+  uint32_t last_method_idx = Create2MethodsWithGap(
+      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
+      ArrayRef<const LinkerPatch>(last_method_patches),
+      just_over_max_negative_disp - bl_offset_in_last_method);
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_EQ(method1_offset,
+            last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
+  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+  // Check linked code.
+  uint32_t thunk_offset =
+      CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
+  uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
+  ASSERT_EQ(diff & 3u, 0u);
+  ASSERT_LT(diff, 128 * MB);
+  auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+                                ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
 TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
   auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
   constexpr uint32_t bl_offset_in_method1 = 1u * 4u;  // After NOPs.
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 31d1bce..92cf8ca 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -30,6 +30,7 @@
 #include "linker/relative_patcher.h"
 #include "method_reference.h"
 #include "oat.h"
+#include "oat_quick_method_header.h"
 #include "utils/array_ref.h"
 #include "vector_output_stream.h"
 
@@ -73,8 +74,8 @@
     compiled_method_refs_.push_back(method_ref);
     compiled_methods_.emplace_back(new CompiledMethod(
         &driver_, instruction_set_, code,
-        0u, 0u, 0u, nullptr, ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
-        ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+        0u, 0u, 0u, ArrayRef<const SrcMapElem>(), ArrayRef<const uint8_t>(),
+        ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
         patches));
   }
 
@@ -92,7 +93,7 @@
 
       offset += sizeof(OatQuickMethodHeader);
       uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
-      const auto& code = *compiled_method->GetQuickCode();
+      const auto code = compiled_method->GetQuickCode();
       offset += code.size();
 
       method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset);
@@ -124,7 +125,7 @@
 
       out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
       offset += sizeof(OatQuickMethodHeader);
-      ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+      ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
       if (!compiled_method->GetPatches().empty()) {
         patched_code_.assign(code.begin(), code.end());
         code = ArrayRef<const uint8_t>(patched_code_);
@@ -163,7 +164,7 @@
       ++idx;
     }
     CHECK_NE(idx, compiled_method_refs_.size());
-    CHECK_EQ(compiled_methods_[idx]->GetQuickCode()->size(), expected_code.size());
+    CHECK_EQ(compiled_methods_[idx]->GetQuickCode().size(), expected_code.size());
 
     auto result = method_offset_map_.FindMethodOffset(method_ref);
     CHECK(result.first);  // Must have been linked.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 2d9d91a..ea3cb66 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -63,9 +63,9 @@
       EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
       uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(quick_oat_code), 2);
       quick_oat_code = reinterpret_cast<const void*>(oat_code_aligned);
-      const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      EXPECT_TRUE(quick_code != nullptr);
-      size_t code_size = quick_code->size() * sizeof(quick_code[0]);
+      ArrayRef<const uint8_t> quick_code = compiled_method->GetQuickCode();
+      EXPECT_FALSE(quick_code.empty());
+      size_t code_size = quick_code.size() * sizeof(quick_code[0]);
       EXPECT_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size))
           << PrettyMethod(method) << " " << code_size;
       CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
@@ -98,6 +98,7 @@
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
+    compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
   }
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4ddd457..c7b8884 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -31,18 +31,22 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "gc/space/image_space.h"
 #include "gc/space/space.h"
+#include "handle_scope-inl.h"
 #include "image_writer.h"
 #include "linker/relative_patcher.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
+#include "oat_quick_method_header.h"
 #include "os.h"
 #include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
-#include "handle_scope-inl.h"
+#include "type_lookup_table.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -104,6 +108,9 @@
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
+    size_oat_lookup_table_alignment_(0),
+    size_oat_lookup_table_offset_(0),
+    size_oat_lookup_table_(0),
     method_offset_map_() {
   CHECK(key_value_store != nullptr);
 
@@ -126,6 +133,10 @@
     offset = InitDexFiles(offset);
   }
   {
+    TimingLogger::ScopedTiming split("InitLookupTables", timings);
+    offset = InitLookupTables(offset);
+  }
+  {
     TimingLogger::ScopedTiming split("InitOatClasses", timings);
     offset = InitOatClasses(offset);
   }
@@ -143,6 +154,18 @@
   }
   size_ = offset;
 
+  if (!HasImage()) {
+    // Allocate space for app dex cache arrays in the .bss section.
+    size_t bss_start = RoundUp(size_, kPageSize);
+    size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
+    bss_size_ = 0u;
+    for (const DexFile* dex_file : dex_files) {
+      dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
+      DexCacheArraysLayout layout(pointer_size, dex_file);
+      bss_size_ += layout.Size();
+    }
+  }
+
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr);
   CHECK_EQ(compiler->IsImage(),
@@ -157,7 +180,7 @@
 }
 
 struct OatWriter::GcMapDataAccess {
-  static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
+  static ArrayRef<const uint8_t> GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
     return compiled_method->GetGcMap();
   }
 
@@ -179,7 +202,7 @@
 };
 
 struct OatWriter::MappingTableDataAccess {
-  static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
+  static ArrayRef<const uint8_t> GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
     return compiled_method->GetMappingTable();
   }
 
@@ -201,7 +224,7 @@
 };
 
 struct OatWriter::VmapTableDataAccess {
-  static const SwapVector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
+  static ArrayRef<const uint8_t> GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
     return compiled_method->GetVmapTable();
   }
 
@@ -307,7 +330,8 @@
     return true;
   }
 
-  bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED, const ClassDataItemIterator& it) {
+  bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED,
+                   const ClassDataItemIterator& it) {
     // Fill in the compiled_methods_ array for methods that have a
     // CompiledMethod. We track the number of non-null entries in
     // num_non_null_compiled_methods_ since we only want to allocate
@@ -373,8 +397,8 @@
       // Derived from CompiledMethod.
       uint32_t quick_code_offset = 0;
 
-      const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      ArrayRef<const uint8_t> quick_code = compiled_method->GetQuickCode();
+      uint32_t code_size = quick_code.size() * sizeof(uint8_t);
       uint32_t thumb_offset = compiled_method->CodeDelta();
 
       // Deduplicate code arrays if we are not producing debuggable code.
@@ -413,7 +437,7 @@
       uint32_t vmap_table_offset = method_header->vmap_table_offset_;
       // If we don't have quick code, then we must have a vmap, as that is how the dex2dex
       // compiler records its transformations.
-      DCHECK(quick_code != nullptr || vmap_table_offset != 0);
+      DCHECK(!quick_code.empty() || vmap_table_offset != 0);
       uint32_t gc_map_offset = method_header->gc_map_offset_;
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
@@ -481,12 +505,12 @@
         } else {
           status = mirror::Class::kStatusNotReady;
         }
-        const SwapVector<uint8_t>* gc_map = compiled_method->GetGcMap();
-        if (gc_map != nullptr) {
-          size_t gc_map_size = gc_map->size() * sizeof(gc_map[0]);
+        ArrayRef<const uint8_t> gc_map = compiled_method->GetGcMap();
+        if (!gc_map.empty()) {
+          size_t gc_map_size = gc_map.size() * sizeof(gc_map[0]);
           bool is_native = it.MemberIsNative();
           CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
-              << gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " "
+              << gc_map_size << " " << (is_native ? "true" : "false") << " "
               << (status < mirror::Class::kStatusVerified) << " " << status << " "
               << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         }
@@ -504,30 +528,22 @@
  private:
   struct CodeOffsetsKeyComparator {
     bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
-      if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
-        return lhs->GetQuickCode() < rhs->GetQuickCode();
+      // Code is deduplicated by CompilerDriver, compare only data pointers.
+      if (lhs->GetQuickCode().data() != rhs->GetQuickCode().data()) {
+        return lhs->GetQuickCode().data() < rhs->GetQuickCode().data();
       }
       // If the code is the same, all other fields are likely to be the same as well.
-      if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
-        return lhs->GetMappingTable() < rhs->GetMappingTable();
+      if (UNLIKELY(lhs->GetMappingTable().data() != rhs->GetMappingTable().data())) {
+        return lhs->GetMappingTable().data() < rhs->GetMappingTable().data();
       }
-      if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) {
-        return lhs->GetVmapTable() < rhs->GetVmapTable();
+      if (UNLIKELY(lhs->GetVmapTable().data() != rhs->GetVmapTable().data())) {
+        return lhs->GetVmapTable().data() < rhs->GetVmapTable().data();
       }
-      if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) {
-        return lhs->GetGcMap() < rhs->GetGcMap();
+      if (UNLIKELY(lhs->GetGcMap().data() != rhs->GetGcMap().data())) {
+        return lhs->GetGcMap().data() < rhs->GetGcMap().data();
       }
-      const auto& lhs_patches = lhs->GetPatches();
-      const auto& rhs_patches = rhs->GetPatches();
-      if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
-        return lhs_patches.size() < rhs_patches.size();
-      }
-      auto rit = rhs_patches.begin();
-      for (const LinkerPatch& lpatch : lhs_patches) {
-        if (UNLIKELY(!(lpatch == *rit))) {
-          return lpatch < *rit;
-        }
-        ++rit;
+      if (UNLIKELY(lhs->GetPatches().data() != rhs->GetPatches().data())) {
+        return lhs->GetPatches().data() < rhs->GetPatches().data();
       }
       return false;
     }
@@ -568,17 +584,17 @@
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
       DCHECK_EQ(DataAccess::GetOffset(oat_class, method_offsets_index_), 0u);
 
-      const SwapVector<uint8_t>* map = DataAccess::GetData(compiled_method);
-      uint32_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]);
+      ArrayRef<const uint8_t> map = DataAccess::GetData(compiled_method);
+      uint32_t map_size = map.size() * sizeof(map[0]);
       if (map_size != 0u) {
-        auto lb = dedupe_map_.lower_bound(map);
-        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(map, lb->first)) {
+        auto lb = dedupe_map_.lower_bound(map.data());
+        if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(map.data(), lb->first)) {
           DataAccess::SetOffset(oat_class, method_offsets_index_, lb->second);
         } else {
           DataAccess::SetOffset(oat_class, method_offsets_index_, offset_);
-          dedupe_map_.PutBefore(lb, map, offset_);
+          dedupe_map_.PutBefore(lb, map.data(), offset_);
           offset_ += map_size;
-          writer_->oat_header_->UpdateChecksum(&(*map)[0], map_size);
+          writer_->oat_header_->UpdateChecksum(&map[0], map_size);
         }
       }
       ++method_offsets_index_;
@@ -590,7 +606,7 @@
  private:
   // Deduplication is already done on a pointer basis by the compiler driver,
   // so we can simply compare the pointers to find out if things are duplicated.
-  SafeMap<const SwapVector<uint8_t>*, uint32_t> dedupe_map_;
+  SafeMap<const uint8_t*, uint32_t> dedupe_map_;
 };
 
 class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
@@ -632,7 +648,7 @@
       UNREACHABLE();
     }
 
-    if (compiled_method != nullptr && compiled_method->GetQuickCode()->size() != 0) {
+    if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0) {
       method->SetEntryPointFromQuickCompiledCodePtrSize(
           reinterpret_cast<void*>(offsets.code_offset_), pointer_size_);
     }
@@ -655,10 +671,10 @@
       no_thread_suspension_(soa_.Self(), "OatWriter patching"),
       class_linker_(Runtime::Current()->GetClassLinker()),
       dex_cache_(nullptr) {
-    if (writer_->image_writer_ != nullptr) {
+    patched_code_.reserve(16 * KB);
+    if (writer_->HasImage()) {
       // If we're creating the image, the address space must be ready so that we can apply patches.
       CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
-      patched_code_.reserve(16 * KB);
     }
   }
 
@@ -698,10 +714,8 @@
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
-      // Need a wrapper if we create a copy for patching.
-      ArrayRef<const uint8_t> wrapped(*quick_code);
-      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      ArrayRef<const uint8_t> quick_code = compiled_method->GetQuickCode();
+      uint32_t code_size = quick_code.size() * sizeof(uint8_t);
 
       // Deduplicate code arrays.
       const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
@@ -738,8 +752,8 @@
         DCHECK_OFFSET_();
 
         if (!compiled_method->GetPatches().empty()) {
-          patched_code_.assign(quick_code->begin(), quick_code->end());
-          wrapped = ArrayRef<const uint8_t>(patched_code_);
+          patched_code_.assign(quick_code.begin(), quick_code.end());
+          quick_code = ArrayRef<const uint8_t>(patched_code_);
           for (const LinkerPatch& patch : compiled_method->GetPatches()) {
             if (patch.Type() == kLinkerPatchCallRelative) {
               // NOTE: Relative calls across oat files are not supported.
@@ -766,8 +780,8 @@
           }
         }
 
-        writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size);
-        if (!out->WriteFully(wrapped.data(), code_size)) {
+        writer_->oat_header_->UpdateChecksum(quick_code.data(), code_size);
+        if (!out->WriteFully(quick_code.data(), code_size)) {
           ReportWriteFailure("method code", it);
           return false;
         }
@@ -841,24 +855,28 @@
   }
 
   uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->image_writer_ != nullptr) {
+    if (writer_->HasImage()) {
       auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>(
               patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
       const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
       return element - oat_data;
     } else {
-      LOG(FATAL) << "Unimplemented.";
-      UNREACHABLE();
+      size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile());
+      return start + patch.TargetDexCacheElementOffset();
     }
   }
 
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    // NOTE: Direct method pointers across oat files don't use linker patches. However, direct
-    // type pointers across oat files do. (TODO: Investigate why.)
-    if (writer_->image_writer_ != nullptr) {
+    if (writer_->HasImage()) {
       object = writer_->image_writer_->GetImageAddress(object);
+    } else {
+      // NOTE: We're using linker patches for app->boot references when the image can
+      // be relocated and therefore we need to emit .oat_patches. We're not using this
+      // for app->app references, so check that the object is in the image space.
+      DCHECK(Runtime::Current()->GetHeap()->FindSpaceFromObject(object, false)->IsImageSpace());
     }
+    // Note: We only patch targeting Objects in image which is in the low 4gb.
     uint32_t address = PointerToLowMemUInt32(object);
     DCHECK_LE(offset + 4, code->size());
     uint8_t* data = &(*code)[offset];
@@ -870,12 +888,17 @@
 
   void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    // NOTE: Direct method pointers across oat files don't use linker patches. However, direct
-    // type pointers across oat files do. (TODO: Investigate why.)
-    if (writer_->image_writer_ != nullptr) {
+    if (writer_->HasImage()) {
       method = writer_->image_writer_->GetImageMethodAddress(method);
+    } else if (kIsDebugBuild) {
+      // NOTE: We're using linker patches for app->boot references when the image can
+      // be relocated and therefore we need to emit .oat_patches. We're not using this
+      // for app->app references, so check that the method is an image method.
+      gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+      size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
+      CHECK(image_space->GetImageHeader().GetMethodsSection().Contains(method_offset));
     }
-    // Note: We only patch ArtMethods to low 4gb since thats where the image is.
+    // Note: We only patch targeting ArtMethods in image which is in the low 4gb.
     uint32_t address = PointerToLowMemUInt32(method);
     DCHECK_LE(offset + 4, code->size());
     uint8_t* data = &(*code)[offset];
@@ -887,9 +910,11 @@
 
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t address = writer_->image_writer_ == nullptr ? target_offset :
-        PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
-                              writer_->oat_data_offset_ + target_offset);
+    uint32_t address = target_offset;
+    if (writer_->HasImage()) {
+      address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
+                                      writer_->oat_data_offset_ + target_offset);
+    }
     DCHECK_LE(offset + 4, code->size());
     uint8_t* data = &(*code)[offset];
     data[0] = address & 0xffu;
@@ -921,14 +946,14 @@
       ++method_offsets_index_;
 
       // Write deduplicated map.
-      const SwapVector<uint8_t>* map = DataAccess::GetData(compiled_method);
-      size_t map_size = map == nullptr ? 0 : map->size() * sizeof((*map)[0]);
+      ArrayRef<const uint8_t> map = DataAccess::GetData(compiled_method);
+      size_t map_size = map.size() * sizeof(map[0]);
       DCHECK((map_size == 0u && map_offset == 0u) ||
             (map_size != 0u && map_offset != 0u && map_offset <= offset_))
           << map_size << " " << map_offset << " " << offset_ << " "
           << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " for " << DataAccess::Name();
       if (map_size != 0u && map_offset == offset_) {
-        if (UNLIKELY(!out->WriteFully(&(*map)[0], map_size))) {
+        if (UNLIKELY(!out->WriteFully(&map[0], map_size))) {
           ReportWriteFailure(it);
           return false;
         }
@@ -1027,11 +1052,29 @@
     oat_dex_files_[i]->dex_file_offset_ = offset;
 
     const DexFile* dex_file = (*dex_files_)[i];
+
+    // Initialize type lookup table
+    oat_dex_files_[i]->lookup_table_ = dex_file->GetTypeLookupTable();
+
     offset += dex_file->GetHeader().file_size_;
   }
   return offset;
 }
 
+size_t OatWriter::InitLookupTables(size_t offset) {
+  for (OatDexFile* oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file->lookup_table_ != nullptr) {
+      uint32_t aligned_offset = RoundUp(offset, 4);
+      oat_dex_file->lookup_table_offset_ = aligned_offset;
+      size_oat_lookup_table_alignment_ += aligned_offset - offset;
+      offset = aligned_offset + oat_dex_file->lookup_table_->RawDataLength();
+    } else {
+      oat_dex_file->lookup_table_offset_ = 0;
+    }
+  }
+  return offset;
+}
+
 size_t OatWriter::InitOatClasses(size_t offset) {
   // calculate the offsets within OatDexFiles to OatClasses
   InitOatClassesMethodVisitor visitor(this, offset);
@@ -1091,8 +1134,6 @@
       field.reset(compiler_driver_->Create ## fn_name()); \
       offset += field->size();
 
-    DO_TRAMPOLINE(interpreter_to_interpreter_bridge_, InterpreterToInterpreterBridge);
-    DO_TRAMPOLINE(interpreter_to_compiled_code_bridge_, InterpreterToCompiledCodeBridge);
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline);
     DO_TRAMPOLINE(quick_imt_conflict_trampoline_, QuickImtConflictTrampoline);
@@ -1242,6 +1283,9 @@
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
     DO_STAT(size_oat_class_method_offsets_);
+    DO_STAT(size_oat_lookup_table_alignment_);
+    DO_STAT(size_oat_lookup_table_offset_);
+    DO_STAT(size_oat_lookup_table_);
     #undef DO_STAT
 
     VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)"; \
@@ -1295,6 +1339,9 @@
     }
     size_dex_file_ += dex_file->GetHeader().file_size_;
   }
+  if (!WriteLookupTables(out, file_offset)) {
+    return false;
+  }
   for (size_t i = 0; i != oat_classes_.size(); ++i) {
     if (!oat_classes_[i]->Write(this, out, file_offset)) {
       PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
@@ -1304,6 +1351,35 @@
   return true;
 }
 
+bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
+  for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
+    const uint32_t lookup_table_offset = oat_dex_files_[i]->lookup_table_offset_;
+    const TypeLookupTable* table = oat_dex_files_[i]->lookup_table_;
+    DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
+    if (lookup_table_offset == 0) {
+      continue;
+    }
+    const uint32_t expected_offset = file_offset + lookup_table_offset;
+    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+      const DexFile* dex_file = (*dex_files_)[i];
+      PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset
+                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
+      return false;
+    }
+    if (table != nullptr) {
+      if (!out->WriteFully(table->RawData(), table->RawDataLength())) {
+        const DexFile* dex_file = (*dex_files_)[i];
+        PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
+                    << " to " << out->GetLocation();
+        return false;
+      }
+      size_oat_lookup_table_ += table->RawDataLength();
+    }
+  }
+  return true;
+}
+
 size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) {
   #define VISIT(VisitorType)                                              \
     do {                                                                  \
@@ -1350,8 +1426,6 @@
         DCHECK_OFFSET(); \
       } while (false)
 
-    DO_TRAMPOLINE(interpreter_to_interpreter_bridge_);
-    DO_TRAMPOLINE(interpreter_to_compiled_code_bridge_);
     DO_TRAMPOLINE(jni_dlsym_lookup_);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_);
     DO_TRAMPOLINE(quick_imt_conflict_trampoline_);
@@ -1413,6 +1487,7 @@
   dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
   dex_file_location_checksum_ = dex_file.GetLocationChecksum();
   dex_file_offset_ = 0;
+  lookup_table_offset_ = 0;
   methods_offsets_.resize(dex_file.NumClassDefs());
 }
 
@@ -1421,6 +1496,7 @@
           + dex_file_location_size_
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
+          + sizeof(lookup_table_offset_)
           + (sizeof(methods_offsets_[0]) * methods_offsets_.size());
 }
 
@@ -1429,6 +1505,10 @@
   oat_header->UpdateChecksum(dex_file_location_data_, dex_file_location_size_);
   oat_header->UpdateChecksum(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_));
   oat_header->UpdateChecksum(&dex_file_offset_, sizeof(dex_file_offset_));
+  oat_header->UpdateChecksum(&lookup_table_offset_, sizeof(lookup_table_offset_));
+  if (lookup_table_ != nullptr) {
+    oat_header->UpdateChecksum(lookup_table_->RawData(), lookup_table_->RawDataLength());
+  }
   oat_header->UpdateChecksum(&methods_offsets_[0],
                             sizeof(methods_offsets_[0]) * methods_offsets_.size());
 }
@@ -1457,6 +1537,11 @@
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
+  if (!out->WriteFully(&lookup_table_offset_, sizeof(lookup_table_offset_))) {
+    PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_lookup_table_offset_ += sizeof(lookup_table_offset_);
   if (!out->WriteFully(&methods_offsets_[0],
                       sizeof(methods_offsets_[0]) * methods_offsets_.size())) {
     PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 760fb7c..f2fe048 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -24,8 +24,8 @@
 #include "linker/relative_patcher.h"  // For linker::RelativePatcherTargetProvider.
 #include "mem_map.h"
 #include "method_reference.h"
-#include "oat.h"
 #include "mirror/class.h"
+#include "oat.h"
 #include "safe_map.h"
 
 namespace art {
@@ -36,6 +36,7 @@
 class ImageWriter;
 class OutputStream;
 class TimingLogger;
+class TypeLookupTable;
 
 // OatHeader         variable length with count of D OatDexFiles
 //
@@ -49,6 +50,11 @@
 // ...
 // Dex[D]
 //
+// TypeLookupTable[0] one descriptor to class def index hash table for each OatDexFile.
+// TypeLookupTable[1]
+// ...
+// TypeLookupTable[D]
+//
 // OatClass[0]       one variable sized OatClass for each of C DexFile::ClassDefs
 // OatClass[1]       contains OatClass entries with class status, offsets to code, etc.
 // ...
@@ -90,6 +96,13 @@
             TimingLogger* timings,
             SafeMap<std::string, std::string>* key_value_store);
 
+  // Returns whether the oat file has an associated image.
+  bool HasImage() const {
+    // Since the image is being created at the same time as the oat file,
+    // check if there's an image writer.
+    return image_writer_ != nullptr;
+  }
+
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
   }
@@ -161,6 +174,7 @@
 
   size_t InitOatHeader();
   size_t InitOatDexFiles(size_t offset);
+  size_t InitLookupTables(size_t offset);
   size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
@@ -170,6 +184,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool WriteTables(OutputStream* out, const size_t file_offset);
+  bool WriteLookupTables(OutputStream* out, const size_t file_offset);
   size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
@@ -192,6 +207,8 @@
     const uint8_t* dex_file_location_data_;
     uint32_t dex_file_location_checksum_;
     uint32_t dex_file_offset_;
+    uint32_t lookup_table_offset_;
+    TypeLookupTable* lookup_table_;  // Owned by the dex file.
     std::vector<uint32_t> methods_offsets_;
 
    private:
@@ -272,6 +289,10 @@
   // The size of the required .bss section holding the DexCache data.
   size_t bss_size_;
 
+  // Offsets of the dex cache arrays for each app dex file. For the
+  // boot image, this information is provided by the ImageWriter.
+  SafeMap<const DexFile*, size_t> dex_cache_arrays_offsets_;  // DexFiles not owned.
+
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
@@ -285,8 +306,6 @@
   OatHeader* oat_header_;
   std::vector<OatDexFile*> oat_dex_files_;
   std::vector<OatClass*> oat_classes_;
-  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_interpreter_bridge_;
-  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_compiled_code_bridge_;
   std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
   std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
   std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
@@ -324,6 +343,9 @@
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
+  uint32_t size_oat_lookup_table_alignment_;
+  uint32_t size_oat_lookup_table_offset_;
+  uint32_t size_oat_lookup_table_;
 
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index b0e83b0..f985745 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -42,8 +42,8 @@
 // successor and the successor can only be reached from them.
 static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
   if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
-  HBasicBlock* succ1 = block1->GetSuccessor(0);
-  HBasicBlock* succ2 = block2->GetSuccessor(0);
+  HBasicBlock* succ1 = block1->GetSuccessors()[0];
+  HBasicBlock* succ2 = block2->GetSuccessors()[0];
   return succ1 == succ2 && succ1->GetPredecessors().size() == 2u;
 }
 
@@ -69,19 +69,17 @@
   if (cond->IsCondition()) {
     HInstruction* lhs = cond->InputAt(0);
     HInstruction* rhs = cond->InputAt(1);
-    if (cond->IsEqual()) {
-      return new (allocator) HNotEqual(lhs, rhs);
-    } else if (cond->IsNotEqual()) {
-      return new (allocator) HEqual(lhs, rhs);
-    } else if (cond->IsLessThan()) {
-      return new (allocator) HGreaterThanOrEqual(lhs, rhs);
-    } else if (cond->IsLessThanOrEqual()) {
-      return new (allocator) HGreaterThan(lhs, rhs);
-    } else if (cond->IsGreaterThan()) {
-      return new (allocator) HLessThanOrEqual(lhs, rhs);
-    } else {
-      DCHECK(cond->IsGreaterThanOrEqual());
-      return new (allocator) HLessThan(lhs, rhs);
+    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
+      case kCondEQ: return new (allocator) HEqual(lhs, rhs);
+      case kCondNE: return new (allocator) HNotEqual(lhs, rhs);
+      case kCondLT: return new (allocator) HLessThan(lhs, rhs);
+      case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs);
+      case kCondGT: return new (allocator) HGreaterThan(lhs, rhs);
+      case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs);
+      case kCondB:  return new (allocator) HBelow(lhs, rhs);
+      case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs);
+      case kCondA:  return new (allocator) HAbove(lhs, rhs);
+      case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs);
     }
   } else if (cond->IsIntConstant()) {
     HIntConstant* int_const = cond->AsIntConstant();
@@ -91,11 +89,10 @@
       DCHECK(int_const->IsOne());
       return graph->GetIntConstant(0);
     }
-  } else {
-    // General case when 'cond' is another instruction of type boolean,
-    // as verified by SSAChecker.
-    return new (allocator) HBooleanNot(cond);
   }
+  // General case when 'cond' is another instruction of type boolean,
+  // as verified by SSAChecker.
+  return new (allocator) HBooleanNot(cond);
 }
 
 void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
@@ -108,7 +105,7 @@
   if (!BlocksDoMergeTogether(true_block, false_block)) {
     return;
   }
-  HBasicBlock* merge_block = true_block->GetSuccessor(0);
+  HBasicBlock* merge_block = true_block->GetSuccessors()[0];
   if (!merge_block->HasSinglePhi()) {
     return;
   }
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2c7c127..bcc3240 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -797,8 +797,8 @@
     HBasicBlock* new_pre_header = header->GetDominator();
     DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
     HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessor(0);  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessor(1);  // False successor.
+    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
 
     dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
     deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
@@ -845,14 +845,14 @@
     DCHECK(header->IsLoopHeader());
     HBasicBlock* pre_header = header->GetDominator();
     if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessor(0) == pre_header);
+      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
     } else {
       DCHECK(deopt_block == pre_header);
     }
     HGraph* graph = header->GetGraph();
     HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
     if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessor(1));
+      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
     }
 
     HIntConstant* const_instr = graph->GetIntConstant(constant);
@@ -926,7 +926,7 @@
     DCHECK(header->IsLoopHeader());
     HBasicBlock* pre_header = header->GetDominator();
     if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessor(0) == pre_header);
+      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
     } else {
       DCHECK(deopt_block == pre_header);
     }
@@ -1146,7 +1146,6 @@
       return nullptr;
     }
     uint32_t block_id = basic_block->GetBlockId();
-    DCHECK_LT(block_id, maps_.size());
     return &maps_[block_id];
   }
 
@@ -1496,10 +1495,10 @@
     // Start with input 1. Input 0 is from the incoming block.
     HInstruction* input1 = phi->InputAt(1);
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
-        *phi->GetBlock()->GetPredecessor(1)));
+        *phi->GetBlock()->GetPredecessors()[1]));
     for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
       DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
-          *phi->GetBlock()->GetPredecessor(i)));
+          *phi->GetBlock()->GetPredecessors()[i]));
       if (input1 != phi->InputAt(i)) {
         return false;
       }
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index ce6dc75..c9afdf2 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -71,9 +71,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -168,9 +168,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -232,9 +232,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -295,7 +295,8 @@
   HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_5 = graph_->GetIntConstant(5);
@@ -363,7 +364,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -477,7 +479,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -689,7 +692,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -791,7 +795,8 @@
   HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_0 = graph_->GetIntConstant(0);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 5acc5fd..ed193c7 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -159,9 +159,13 @@
   int locals_index = locals_.size() - number_of_parameters;
   int parameter_index = 0;
 
+  const DexFile::MethodId& referrer_method_id =
+      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
   if (!dex_compilation_unit_->IsStatic()) {
     // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++,
+    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
+                                                              referrer_method_id.class_idx_,
+                                                              parameter_index++,
                                                               Primitive::kPrimNot,
                                                               true);
     entry_block_->AddInstruction(parameter);
@@ -170,11 +174,16 @@
     number_of_parameters--;
   }
 
-  uint32_t pos = 1;
-  for (int i = 0; i < number_of_parameters; i++) {
-    HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++,
-                                                              Primitive::GetType(shorty[pos++]),
-                                                              false);
+  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
+    HParameterValue* parameter = new (arena_) HParameterValue(
+        *dex_file_,
+        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
+        parameter_index++,
+        Primitive::GetType(shorty[shorty_pos]),
+        false);
+    ++shorty_pos;
     entry_block_->AddInstruction(parameter);
     HLocal* local = GetLocalAt(locals_index++);
     // Store the parameter value in the local that the dex code will use
@@ -375,7 +384,7 @@
   // We do not split each edge separately, but rather create one boundary block
   // that all predecessors are relinked to. This preserves loop headers (b/23895756).
   for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlock(entry.first);
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
     for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
       if (GetTryItem(predecessor, try_block_info) != entry.second) {
         // Found a predecessor not covered by the same TryItem. Insert entering
@@ -392,10 +401,10 @@
   // Do a second pass over the try blocks and insert exit TryBoundaries where
   // the successor is not in the same TryItem.
   for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlock(entry.first);
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
     // NOTE: Do not use iterators because SplitEdge would invalidate them.
     for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
-      HBasicBlock* successor = try_block->GetSuccessor(i);
+      HBasicBlock* successor = try_block->GetSuccessors()[i];
 
       // If the successor is a try block, all of its predecessors must be
       // covered by the same TryItem. Otherwise the previous pass would have
@@ -581,7 +590,6 @@
 
 HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const {
   DCHECK_GE(dex_pc, 0);
-  DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.size());
   return branch_targets_[dex_pc];
 }
 
@@ -766,11 +774,12 @@
                                                        &string_init_offset);
   // Replace calls to String.<init> with StringFactory.
   if (is_string_init) {
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = ComputeDispatchInfo(is_string_init,
-                                                                            string_init_offset,
-                                                                            target_method,
-                                                                            direct_method,
-                                                                            direct_code);
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        dchecked_integral_cast<uint64_t>(string_init_offset),
+        0U
+    };
     HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
         arena_,
         number_of_arguments - 1,
@@ -833,11 +842,12 @@
       clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
     }
 
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = ComputeDispatchInfo(is_string_init,
-                                                                            string_init_offset,
-                                                                            target_method,
-                                                                            direct_method,
-                                                                            direct_code);
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0U
+    };
     invoke = new (arena_) HInvokeStaticOrDirect(arena_,
                                                 number_of_arguments,
                                                 return_type,
@@ -950,77 +960,6 @@
   return clinit_check;
 }
 
-HInvokeStaticOrDirect::DispatchInfo HGraphBuilder::ComputeDispatchInfo(
-    bool is_string_init,
-    int32_t string_init_offset,
-    MethodReference target_method,
-    uintptr_t direct_method,
-    uintptr_t direct_code) {
-  HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
-  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
-  uint64_t method_load_data = 0u;
-  uint64_t direct_code_ptr = 0u;
-
-  if (is_string_init) {
-    // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
-    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kStringInit;
-    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
-    method_load_data = string_init_offset;
-  } else if (target_method.dex_file == outer_compilation_unit_->GetDexFile() &&
-      target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex()) {
-    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
-    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
-  } else {
-    if (direct_method != 0u) {  // Should we use a direct pointer to the method?
-      if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
-        method_load_data = direct_method;
-      } else {  // The direct pointer will be known at link time.
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup;
-      }
-    } else {  // Use dex cache.
-      DCHECK(target_method.dex_file == dex_compilation_unit_->GetDexFile());
-      DexCacheArraysLayout layout =
-          compiler_driver_->GetDexCacheArraysLayout(target_method.dex_file);
-      if (layout.Valid()) {  // Can we use PC-relative access to the dex cache arrays?
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
-        method_load_data = layout.MethodOffset(target_method.dex_method_index);
-      } else {  // We must go through the ArtMethod's pointer to resolved methods.
-        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
-      }
-    }
-    if (direct_code != 0u) {  // Should we use a direct pointer to the code?
-      if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
-        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
-        direct_code_ptr = direct_code;
-      } else if (compiler_driver_->IsImage() ||
-          target_method.dex_file == dex_compilation_unit_->GetDexFile()) {
-        // Use PC-relative calls for invokes within a multi-dex oat file.
-        // TODO: Recognize when the target dex file is within the current oat file for
-        // app compilation. At the moment we recognize only the boot image as multi-dex.
-        // NOTE: This will require changing the ARM backend which currently falls
-        // through from kCallPCRelative to kDirectCodeFixup for different dex files.
-        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
-      } else {  // The direct pointer will be known at link time.
-        // NOTE: This is used for app->boot calls when compiling an app against
-        // a relocatable but not yet relocated image.
-        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup;
-      }
-    } else {  // We must use the code pointer from the ArtMethod.
-      code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
-    }
-  }
-
-  if (graph_->IsDebuggable()) {
-    // For debuggable apps always use the code pointer from ArtMethod
-    // so that we don't circumvent instrumentation stubs if installed.
-    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
-  }
-
-  return HInvokeStaticOrDirect::DispatchInfo {
-    method_load_kind, code_ptr_location, method_load_data, direct_code_ptr };
-}
-
 bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke,
                                          uint32_t number_of_vreg_arguments,
                                          uint32_t* args,
@@ -1176,10 +1115,9 @@
         verified_method->GetStringInitPcRegMap();
     auto map_it = string_init_map.find(dex_pc);
     if (map_it != string_init_map.end()) {
-      std::set<uint32_t> reg_set = map_it->second;
-      for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+      for (uint32_t reg : map_it->second) {
         HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot, dex_pc);
-        UpdateLocal(*set_it, load_local, dex_pc);
+        UpdateLocal(reg, load_local, dex_pc);
       }
     }
   } else {
@@ -1234,12 +1172,14 @@
                                                            field_index,
                                                            dex_pc);
     } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
       field_set = new (arena_) HInstanceFieldSet(null_check,
                                                  value,
                                                  field_type,
                                                  resolved_field->GetOffset(),
                                                  resolved_field->IsVolatile(),
                                                  field_index,
+                                                 class_def_index,
                                                  *dex_file_,
                                                  dex_compilation_unit_->GetDexCache(),
                                                  dex_pc);
@@ -1254,11 +1194,13 @@
                                                            field_index,
                                                            dex_pc);
     } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
       field_get = new (arena_) HInstanceFieldGet(null_check,
                                                  field_type,
                                                  resolved_field->GetOffset(),
                                                  resolved_field->IsVolatile(),
                                                  field_index,
+                                                 class_def_index,
                                                  *dex_file_,
                                                  dex_compilation_unit_->GetDexCache(),
                                                  dex_pc);
@@ -1303,7 +1245,13 @@
       soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
-  return outer_class.Get() == cls.Get();
+  // GetOutermostCompilingClass returns null when the class is unresolved
+  // (e.g. if it derives from an unresolved class). This is bogus knowing that
+  // we are compiling it.
+  // When this happens we cannot establish a direct relation between the current
+  // class and the outer class, so we return false.
+  // (Note that this is only used for optimizing invokes and field accesses)
+  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
 }
 
 void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
@@ -1394,6 +1342,8 @@
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
+
+  uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
   if (is_put) {
     // We need to keep the class alive before loading the value.
     Temporaries temps(graph_);
@@ -1406,6 +1356,7 @@
                                                                 resolved_field->GetOffset(),
                                                                 resolved_field->IsVolatile(),
                                                                 field_index,
+                                                                class_def_index,
                                                                 *dex_file_,
                                                                 dex_cache_,
                                                                 dex_pc));
@@ -1415,6 +1366,7 @@
                                                                 resolved_field->GetOffset(),
                                                                 resolved_field->IsVolatile(),
                                                                 field_index,
+                                                                class_def_index,
                                                                 *dex_file_,
                                                                 dex_cache_,
                                                                 dex_pc));
@@ -2877,7 +2829,6 @@
 }  // NOLINT(readability/fn_size)
 
 HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
-  DCHECK_LT(register_index, locals_.size());
   return locals_[register_index];
 }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 6910d51..9eaa4b6 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -276,12 +276,6 @@
                                      uint32_t dex_pc,
                                      HInvoke* invoke);
 
-  HInvokeStaticOrDirect::DispatchInfo ComputeDispatchInfo(bool is_string_init,
-                                                          int32_t string_init_offset,
-                                                          MethodReference target_method,
-                                                          uintptr_t direct_method,
-                                                          uintptr_t direct_code);
-
   bool SetupInvokeArguments(HInvoke* invoke,
                             uint32_t number_of_vreg_arguments,
                             uint32_t* args,
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 1da2a07..a1bb5e0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -32,6 +32,10 @@
 #include "code_generator_x86_64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_mips64
 #include "code_generator_mips64.h"
 #endif
@@ -41,6 +45,7 @@
 #include "driver/dex_compilation_unit.h"
 #include "gc_map_builder.h"
 #include "graph_visualizer.h"
+#include "intrinsics.h"
 #include "leb128.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
@@ -155,7 +160,6 @@
 }
 
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
-  DCHECK_LT(current_block_index_, block_order_->size());
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
 }
@@ -172,7 +176,7 @@
 
 HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
   while (block->IsSingleJump()) {
-    block = block->GetSuccessor(0);
+    block = block->GetSuccessors()[0];
   }
   return block;
 }
@@ -375,13 +379,17 @@
 
   if (invoke->IsInvokeStaticOrDirect()) {
     HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
-    if (call->IsStringInit()) {
-      locations->AddTemp(visitor->GetMethodLocation());
-    } else if (call->IsRecursive()) {
-      locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
-    } else {
-      locations->AddTemp(visitor->GetMethodLocation());
-      locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+    switch (call->GetMethodLoadKind()) {
+      case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+        locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
+        break;
+      case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+        locations->AddTemp(visitor->GetMethodLocation());
+        locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+        break;
+      default:
+        locations->AddTemp(visitor->GetMethodLocation());
+        break;
     }
   } else {
     locations->AddTemp(visitor->GetMethodLocation());
@@ -742,11 +750,12 @@
     }
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
-    case kMips:
-      UNUSED(compiler_options);
-      UNUSED(graph);
-      UNUSED(isa_features);
-      return nullptr;
+    case kMips: {
+      return new mips::CodeGeneratorMIPS(graph,
+                                         *isa_features.AsMipsInstructionSetFeatures(),
+                                         compiler_options,
+                                         stats);
+    }
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64: {
@@ -894,7 +903,7 @@
 }
 
 void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaAllocatorAdapter<uint8_t>> vmap_encoder(data);
+  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
   // We currently don't use callee-saved registers.
   size_t size = 0 + 1 /* marker */ + 0;
   vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
@@ -1382,4 +1391,57 @@
   }
 }
 
+void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 0a36989..47b6f30 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -172,6 +172,7 @@
                                OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGenerator() {}
 
+  // Get the graph. This is the outermost graph, never the graph of a method being inlined.
   HGraph* GetGraph() const { return graph_; }
 
   HBasicBlock* GetNextBlockToEmit() const;
@@ -421,6 +422,8 @@
                                              Location runtime_type_index_location,
                                              Location runtime_return_location);
 
+  static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
+
   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
 
@@ -429,6 +432,12 @@
                              uint32_t dex_pc,
                              SlowPathCode* slow_path) = 0;
 
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) = 0;
+
   // Generate a call to a static or direct method.
   virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
   // Generate a call to a virtual method.
@@ -531,6 +540,8 @@
 
   template <typename LabelType>
   LabelType* CommonInitializeLabels() {
+    // We use raw array allocations instead of ArenaVector<> because Labels are
+    // non-constructible and non-movable and as such cannot be held in a vector.
     size_t size = GetGraph()->GetBlocks().size();
     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
                                                                       kArenaAllocCodeGenerator);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 08d8d88..3dc3b7f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -56,6 +56,8 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
 
@@ -409,7 +411,7 @@
 #undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
-inline Condition ARMSignedOrFPCondition(IfCondition cond) {
+inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return EQ;
     case kCondNE: return NE;
@@ -417,19 +419,30 @@
     case kCondLE: return LE;
     case kCondGT: return GT;
     case kCondGE: return GE;
+    case kCondB:  return LO;
+    case kCondBE: return LS;
+    case kCondA:  return HI;
+    case kCondAE: return HS;
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 }
 
+// Maps signed condition to unsigned condition.
 inline Condition ARMUnsignedCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return EQ;
     case kCondNE: return NE;
+    // Signed to unsigned.
     case kCondLT: return LO;
     case kCondLE: return LS;
     case kCondGT: return HI;
     case kCondGE: return HS;
+    // Unsigned remain unchanged.
+    case kCondB:  return LO;
+    case kCondBE: return LS;
+    case kCondA:  return HI;
+    case kCondAE: return HS;
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
@@ -502,17 +515,6 @@
     uint32_t new_position = __ GetAdjustedPosition(old_position);
     stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
   }
-  // Adjust native pc offsets of block labels.
-  for (HBasicBlock* block : *block_order_) {
-    // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid
-    // FirstNonEmptyBlock() which could lead to adjusting a label more than once.
-    DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size());
-    Label* block_label = &block_labels_[block->GetBlockId()];
-    DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump());
-    if (block_label->IsBound()) {
-      __ AdjustLabelPosition(block_label);
-    }
-  }
   // Adjust pc offsets for the disassembly information.
   if (disasm_info_ != nullptr) {
     GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
@@ -527,10 +529,6 @@
       it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
     }
   }
-  // Adjust pc offsets for relative call patches.
-  for (MethodPatchInfo<Label>& info : relative_call_patches_) {
-    __ AdjustLabelPosition(&info.label);
-  }
 
   CodeGenerator::Finalize(allocator);
 }
@@ -721,7 +719,8 @@
 }
 
 void CodeGeneratorARM::Bind(HBasicBlock* block) {
-  __ Bind(GetLabelOf(block));
+  Label* label = GetLabelOf(block);
+  __ BindTrackedLabel(label);
 }
 
 Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
@@ -1130,8 +1129,7 @@
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM::VisitExit(HExit* exit) {
-  UNUSED(exit);
+void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
 void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) {
@@ -1149,12 +1147,13 @@
                                                   Label* true_label,
                                                   Label* false_label) {
   __ vmstat();  // transfer FP status register to ARM APSR.
+  // TODO: merge into a single branch (except "equal or unordered" and "not equal")
   if (cond->IsFPConditionTrueIfNaN()) {
     __ b(true_label, VS);  // VS for unordered.
   } else if (cond->IsFPConditionFalseIfNaN()) {
     __ b(false_label, VS);  // VS for unordered.
   }
-  __ b(true_label, ARMSignedOrFPCondition(cond->GetCondition()));
+  __ b(true_label, ARMCondition(cond->GetCondition()));
 }
 
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1169,10 +1168,11 @@
   Register left_low = left.AsRegisterPairLow<Register>();
   IfCondition true_high_cond = if_cond;
   IfCondition false_high_cond = cond->GetOppositeCondition();
-  Condition final_condition = ARMUnsignedCondition(if_cond);
+  Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
 
   // Set the conditions for the test, remembering that == needs to be
   // decided using the low words.
+  // TODO: consider avoiding jumps with temporary and CMP low+SBC high
   switch (if_cond) {
     case kCondEQ:
     case kCondNE:
@@ -1190,6 +1190,18 @@
     case kCondGE:
       true_high_cond = kCondGT;
       break;
+    case kCondB:
+      false_high_cond = kCondA;
+      break;
+    case kCondBE:
+      true_high_cond = kCondB;
+      break;
+    case kCondA:
+      false_high_cond = kCondB;
+      break;
+    case kCondAE:
+      true_high_cond = kCondA;
+      break;
   }
   if (right.IsConstant()) {
     int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
@@ -1198,12 +1210,12 @@
 
     GenerateCompareWithImmediate(left_high, val_high);
     if (if_cond == kCondNE) {
-      __ b(true_label, ARMSignedOrFPCondition(true_high_cond));
+      __ b(true_label, ARMCondition(true_high_cond));
     } else if (if_cond == kCondEQ) {
-      __ b(false_label, ARMSignedOrFPCondition(false_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
     } else {
-      __ b(true_label, ARMSignedOrFPCondition(true_high_cond));
-      __ b(false_label, ARMSignedOrFPCondition(false_high_cond));
+      __ b(true_label, ARMCondition(true_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
     }
     // Must be equal high, so compare the lows.
     GenerateCompareWithImmediate(left_low, val_low);
@@ -1213,17 +1225,18 @@
 
     __ cmp(left_high, ShifterOperand(right_high));
     if (if_cond == kCondNE) {
-      __ b(true_label, ARMSignedOrFPCondition(true_high_cond));
+      __ b(true_label, ARMCondition(true_high_cond));
     } else if (if_cond == kCondEQ) {
-      __ b(false_label, ARMSignedOrFPCondition(false_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
     } else {
-      __ b(true_label, ARMSignedOrFPCondition(true_high_cond));
-      __ b(false_label, ARMSignedOrFPCondition(false_high_cond));
+      __ b(true_label, ARMCondition(true_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
     }
     // Must be equal high, so compare the lows.
     __ cmp(left_low, ShifterOperand(right_low));
   }
   // The last comparison might be unsigned.
+  // TODO: optimize cases where this is always true/false
   __ b(true_label, final_condition);
 }
 
@@ -1315,7 +1328,7 @@
         DCHECK(right.IsConstant());
         GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
       }
-      __ b(true_target, ARMSignedOrFPCondition(cond->AsCondition()->GetCondition()));
+      __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
     }
   }
   if (false_target != nullptr) {
@@ -1351,8 +1364,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   HInstruction* cond = deoptimize->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
@@ -1417,11 +1429,11 @@
         GenerateCompareWithImmediate(left.AsRegister<Register>(),
                                      CodeGenerator::GetInt32ValueOf(right.GetConstant()));
       }
-      __ it(ARMSignedOrFPCondition(cond->GetCondition()), kItElse);
+      __ it(ARMCondition(cond->GetCondition()), kItElse);
       __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
-             ARMSignedOrFPCondition(cond->GetCondition()));
+             ARMCondition(cond->GetCondition()));
       __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0),
-             ARMSignedOrFPCondition(cond->GetOppositeCondition()));
+             ARMCondition(cond->GetOppositeCondition()));
       return;
     }
     case Primitive::kPrimLong:
@@ -1500,6 +1512,38 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderARM::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
 void LocationsBuilderARM::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1512,9 +1556,8 @@
   load->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load) {
+void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(load);
 }
 
 void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
@@ -1541,8 +1584,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store) {
-  UNUSED(store);
+void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
@@ -1551,9 +1593,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
+void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) {
@@ -1562,9 +1603,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) {
+void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
@@ -1573,9 +1613,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) {
+void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM::VisitFloatConstant(HFloatConstant* constant) {
@@ -1584,9 +1623,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM::VisitFloatConstant(HFloatConstant* constant) {
+void InstructionCodeGeneratorARM::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM::VisitDoubleConstant(HDoubleConstant* constant) {
@@ -1595,9 +1633,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant) {
+void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
@@ -1612,8 +1649,7 @@
   ret->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) {
-  UNUSED(ret);
+void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
   codegen_->GenerateFrameExit();
 }
 
@@ -1623,8 +1659,7 @@
   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
 }
 
-void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
-  UNUSED(ret);
+void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) {
   codegen_->GenerateFrameExit();
 }
 
@@ -1645,6 +1680,7 @@
   DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+                                         codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -1684,6 +1720,7 @@
 
 void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+                                         codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -3268,8 +3305,7 @@
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) {
-  UNUSED(instruction);
+void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -3512,6 +3548,47 @@
   }
 }
 
+Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant,
+                                                             Opcode opcode) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+  if (constant->IsConstant() &&
+      CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+  return Location::RequiresRegister();
+}
+
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst,
+                                                       Opcode opcode) {
+  uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
+  if (Primitive::Is64BitType(input_cst->GetType())) {
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
+        CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+  } else {
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
+  }
+}
+
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+  ShifterOperand so;
+  ArmAssembler* assembler = codegen_->GetAssembler();
+  if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+    return true;
+  }
+  Opcode neg_opcode = kNoOperand;
+  switch (opcode) {
+    case AND:
+      neg_opcode = BIC;
+      break;
+    case ORR:
+      neg_opcode = ORN;
+      break;
+    default:
+      return false;
+  }
+  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+}
+
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
                                                  const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
@@ -4189,13 +4266,11 @@
   temp->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp) {
+void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(temp);
 }
 
-void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) {
-  UNUSED(instruction);
+void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -4253,7 +4328,6 @@
 }
 
 void ParallelMoveResolverARM::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4386,7 +4460,6 @@
 }
 
 void ParallelMoveResolverARM::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4914,17 +4987,18 @@
       nullptr);
 }
 
-void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
-void LocationsBuilderARM::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
-void LocationsBuilderARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
+void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
+void LocationsBuilderARM::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction, ORR); }
+void LocationsBuilderARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction, EOR); }
 
-void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
+void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
          || instruction->GetResultType() == Primitive::kPrimLong);
+  // Note: GVN reorders commutative operations to have the constant on the right hand side.
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
@@ -4940,72 +5014,180 @@
   HandleBitwiseOperation(instruction);
 }
 
+void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
+  // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
+  if (value == 0xffffffffu) {
+    if (out != first) {
+      __ mov(out, ShifterOperand(first));
+    }
+    return;
+  }
+  if (value == 0u) {
+    __ mov(out, ShifterOperand(0));
+    return;
+  }
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) {
+    __ and_(out, first, so);
+  } else {
+    DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so));
+    __ bic(out, first, ShifterOperand(~value));
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateOrrConst(Register out, Register first, uint32_t value) {
+  // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
+  if (value == 0u) {
+    if (out != first) {
+      __ mov(out, ShifterOperand(first));
+    }
+    return;
+  }
+  if (value == 0xffffffffu) {
+    __ mvn(out, ShifterOperand(0));
+    return;
+  }
+  ShifterOperand so;
+  if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, ORR, value, &so)) {
+    __ orr(out, first, so);
+  } else {
+    DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, ORN, ~value, &so));
+    __ orn(out, first, ShifterOperand(~value));
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first, uint32_t value) {
+  // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
+  if (value == 0u) {
+    if (out != first) {
+      __ mov(out, ShifterOperand(first));
+    }
+    return;
+  }
+  __ eor(out, first, ShifterOperand(value));
+}
+
 void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (second.IsConstant()) {
+    uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+    uint32_t value_low = Low32Bits(value);
+    if (instruction->GetResultType() == Primitive::kPrimInt) {
+      Register first_reg = first.AsRegister<Register>();
+      Register out_reg = out.AsRegister<Register>();
+      if (instruction->IsAnd()) {
+        GenerateAndConst(out_reg, first_reg, value_low);
+      } else if (instruction->IsOr()) {
+        GenerateOrrConst(out_reg, first_reg, value_low);
+      } else {
+        DCHECK(instruction->IsXor());
+        GenerateEorConst(out_reg, first_reg, value_low);
+      }
+    } else {
+      DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+      uint32_t value_high = High32Bits(value);
+      Register first_low = first.AsRegisterPairLow<Register>();
+      Register first_high = first.AsRegisterPairHigh<Register>();
+      Register out_low = out.AsRegisterPairLow<Register>();
+      Register out_high = out.AsRegisterPairHigh<Register>();
+      if (instruction->IsAnd()) {
+        GenerateAndConst(out_low, first_low, value_low);
+        GenerateAndConst(out_high, first_high, value_high);
+      } else if (instruction->IsOr()) {
+        GenerateOrrConst(out_low, first_low, value_low);
+        GenerateOrrConst(out_high, first_high, value_high);
+      } else {
+        DCHECK(instruction->IsXor());
+        GenerateEorConst(out_low, first_low, value_low);
+        GenerateEorConst(out_high, first_high, value_high);
+      }
+    }
+    return;
+  }
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    Register first = locations->InAt(0).AsRegister<Register>();
-    Register second = locations->InAt(1).AsRegister<Register>();
-    Register out = locations->Out().AsRegister<Register>();
+    Register first_reg = first.AsRegister<Register>();
+    ShifterOperand second_reg(second.AsRegister<Register>());
+    Register out_reg = out.AsRegister<Register>();
     if (instruction->IsAnd()) {
-      __ and_(out, first, ShifterOperand(second));
+      __ and_(out_reg, first_reg, second_reg);
     } else if (instruction->IsOr()) {
-      __ orr(out, first, ShifterOperand(second));
+      __ orr(out_reg, first_reg, second_reg);
     } else {
       DCHECK(instruction->IsXor());
-      __ eor(out, first, ShifterOperand(second));
+      __ eor(out_reg, first_reg, second_reg);
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
-    Location first = locations->InAt(0);
-    Location second = locations->InAt(1);
-    Location out = locations->Out();
+    Register first_low = first.AsRegisterPairLow<Register>();
+    Register first_high = first.AsRegisterPairHigh<Register>();
+    ShifterOperand second_low(second.AsRegisterPairLow<Register>());
+    ShifterOperand second_high(second.AsRegisterPairHigh<Register>());
+    Register out_low = out.AsRegisterPairLow<Register>();
+    Register out_high = out.AsRegisterPairHigh<Register>();
     if (instruction->IsAnd()) {
-      __ and_(out.AsRegisterPairLow<Register>(),
-              first.AsRegisterPairLow<Register>(),
-              ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ and_(out.AsRegisterPairHigh<Register>(),
-              first.AsRegisterPairHigh<Register>(),
-              ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      __ and_(out_low, first_low, second_low);
+      __ and_(out_high, first_high, second_high);
     } else if (instruction->IsOr()) {
-      __ orr(out.AsRegisterPairLow<Register>(),
-             first.AsRegisterPairLow<Register>(),
-             ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ orr(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      __ orr(out_low, first_low, second_low);
+      __ orr(out_high, first_high, second_high);
     } else {
       DCHECK(instruction->IsXor());
-      __ eor(out.AsRegisterPairLow<Register>(),
-             first.AsRegisterPairLow<Register>(),
-             ShifterOperand(second.AsRegisterPairLow<Register>()));
-      __ eor(out.AsRegisterPairHigh<Register>(),
-             first.AsRegisterPairHigh<Register>(),
-             ShifterOperand(second.AsRegisterPairHigh<Register>()));
+      __ eor(out_low, first_low, second_low);
+      __ eor(out_high, first_high, second_high);
     }
   }
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) {
+  if (desired_dispatch_info.method_load_kind ==
+      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
+    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
+    return HInvokeStaticOrDirect::DispatchInfo {
+      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+      0u,
+      0u
+    };
+  }
+  if (desired_dispatch_info.code_ptr_location ==
+      HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
+    const DexFile& outer_dex_file = GetGraph()->GetDexFile();
+    if (&outer_dex_file != target_method.dex_file) {
+      // Calls across dex files are more likely to exceed the available BL range,
+      // so use absolute patch with fixup if available and kCallArtMethod otherwise.
+      HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+          (desired_dispatch_info.method_load_kind ==
+           HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup)
+          ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup
+          : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        code_ptr_location,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    }
+  }
+  return desired_dispatch_info;
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
-  bool direct_code_loaded = false;
   switch (invoke->GetCodePtrLocation()) {
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      if (IsSameDexFile(*invoke->GetTargetMethod().dex_file, GetGraph()->GetDexFile())) {
-        break;
-      }
-      // Calls across dex files are more likely to exceed the available BL range,
-      // so use absolute patch by falling through to kDirectCodeFixup.
-      FALLTHROUGH_INTENDED;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
       // LR = code address from literal pool with link-time patch.
       __ LoadLiteral(LR, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
-      direct_code_loaded = true;
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR = invoke->GetDirectCodePtr();
       __ LoadImmediate(LR, invoke->GetDirectCodePtr());
-      direct_code_loaded = true;
       break;
     default:
       break;
@@ -5028,8 +5210,10 @@
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+      // TODO: Implement this type.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
       Register method_reg;
@@ -5058,20 +5242,14 @@
       __ bl(GetFrameEntryLabel());
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      if (!direct_code_loaded) {
-        relative_call_patches_.emplace_back(invoke->GetTargetMethod());
-        __ Bind(&relative_call_patches_.back().label);
-        Label label;
-        __ bl(&label);  // Arbitrarily branch to the instruction after BL, override at link time.
-        __ Bind(&label);
-        break;
-      }
-      // If we loaded the direct code above, fall through.
-      FALLTHROUGH_INTENDED;
+      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+      __ BindTrackedLabel(&relative_call_patches_.back().label);
+      // Arbitrarily branch to the BL itself, override at link time.
+      __ bl(&relative_call_patches_.back().label);
+      break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR prepared above for better instruction scheduling.
-      DCHECK(direct_code_loaded);
       // LR()
       __ blx(LR);
       break;
@@ -5161,15 +5339,13 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
-void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) {
+void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) {
+void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
@@ -5190,25 +5366,64 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
+  if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+      codegen_->GetAssembler()->IsThumb()) {
+    locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
+    if (switch_instr->GetStartValue() != 0) {
+      locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
+    }
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    GenerateCompareWithImmediate(value_reg, lower_bound + i);
-    __ b(codegen_->GetLabelOf(successors.at(i)), EQ);
-  }
+  if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+    // Create a series of compare/jumps.
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    for (uint32_t i = 0; i < num_entries; i++) {
+      GenerateCompareWithImmediate(value_reg, lower_bound + i);
+      __ b(codegen_->GetLabelOf(successors[i]), EQ);
+    }
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ b(codegen_->GetLabelOf(default_block));
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ b(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    // Create a table lookup.
+    Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+
+    // Materialize a pointer to the switch table
+    std::vector<Label*> labels(num_entries);
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    for (uint32_t i = 0; i < num_entries; i++) {
+      labels[i] = codegen_->GetLabelOf(successors[i]);
+    }
+    JumpTable* table = __ CreateJumpTable(std::move(labels), temp_reg);
+
+    // Remove the bias.
+    Register key_reg;
+    if (lower_bound != 0) {
+      key_reg = locations->GetTemp(1).AsRegister<Register>();
+      __ AddConstant(key_reg, value_reg, -lower_bound);
+    } else {
+      key_reg = value_reg;
+    }
+
+    // Check whether the value is in the table, jump to default block if not.
+    __ CmpConstant(key_reg, num_entries - 1);
+    __ b(codegen_->GetLabelOf(default_block), Condition::HI);
+
+    // Load the displacement from the table.
+    __ ldr(temp_reg, Address(temp_reg, key_reg, Shift::LSL, 2));
+
+    // Dispatch is a direct add to the PC (for Thumb2).
+    __ EmitJumpTableDispatch(table, temp_reg);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 16d1d38..cef1095 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -169,11 +169,15 @@
 
  private:
   void HandleInvoke(HInvoke* invoke);
-  void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
+  Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitorARM parameter_visitor_;
 
@@ -205,6 +209,9 @@
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+  void GenerateAndConst(Register out, Register first, uint32_t value);
+  void GenerateOrrConst(Register out, Register first, uint32_t value);
+  void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -355,6 +362,12 @@
 
   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
 
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 415b37d..b0be446 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -77,6 +77,10 @@
     case kCondLE: return le;
     case kCondGT: return gt;
     case kCondGE: return ge;
+    case kCondB:  return lo;
+    case kCondBE: return ls;
+    case kCondA:  return hi;
+    case kCondAE: return hs;
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
@@ -658,7 +662,6 @@
 }
 
 void ParallelMoveResolverARM64::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
 }
@@ -1327,8 +1330,7 @@
 };
 
 #define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name)                               \
-  void InstructionCodeGeneratorARM64::Visit##name(H##name* instr) {                   \
-    UNUSED(instr);                                                                    \
+  void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) {  \
     __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name));                               \
   }                                                                                   \
   void LocationsBuilderARM64::Visit##name(H##name* instr) {                           \
@@ -1578,6 +1580,21 @@
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
+    HArm64IntermediateAddress* instruction) {
+  __ Add(OutputRegister(instruction),
+         InputRegisterAt(instruction, 0),
+         Operand(InputOperandAt(instruction, 1)));
+}
+
 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1591,14 +1608,16 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
   Primitive::Type type = instruction->GetType();
   Register obj = InputRegisterAt(instruction, 0);
-  Location index = locations->InAt(1);
+  Location index = instruction->GetLocations()->InAt(1);
   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   MemOperand source = HeapOperand(obj);
+  CPURegister dest = OutputCPURegister(instruction);
+
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
+  // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
   if (index.IsConstant()) {
@@ -1606,15 +1625,26 @@
     source = HeapOperand(obj, offset);
   } else {
     Register temp = temps.AcquireSameSizeAs(obj);
-    __ Add(temp, obj, offset);
+    if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+      // We do not need to compute the intermediate address from the array: the
+      // input instruction has done it already. See the comment in
+      // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+        DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
+      }
+      temp = obj;
+    } else {
+      __ Add(temp, obj, offset);
+    }
     source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
   }
 
-  codegen_->Load(type, OutputCPURegister(instruction), source);
+  codegen_->Load(type, dest, source);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
-  if (type == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+  if (instruction->GetType() == Primitive::kPrimNot) {
+    GetAssembler()->MaybeUnpoisonHeapReference(dest.W());
   }
 }
 
@@ -1668,7 +1698,18 @@
     } else {
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
-      __ Add(temp, array, offset);
+      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // We do not need to compute the intermediate address from the array: the
+        // input instruction has done it already. See the comment in
+        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        if (kIsDebugBuild) {
+          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
+        }
+        temp = array;
+      } else {
+        __ Add(temp, array, offset);
+      }
       destination = HeapOperand(temp,
                                 XRegisterFrom(index),
                                 LSL,
@@ -1678,6 +1719,7 @@
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
     DCHECK(needs_write_barrier);
+    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
     vixl::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
     {
@@ -1938,7 +1980,11 @@
   M(LessThan)                                                                            \
   M(LessThanOrEqual)                                                                     \
   M(GreaterThan)                                                                         \
-  M(GreaterThanOrEqual)
+  M(GreaterThanOrEqual)                                                                  \
+  M(Below)                                                                               \
+  M(BelowOrEqual)                                                                        \
+  M(Above)                                                                               \
+  M(AboveOrEqual)
 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
@@ -2176,8 +2222,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitDoubleConstant(HDoubleConstant* constant) {
-  UNUSED(constant);
+void InstructionCodeGeneratorARM64::VisitDoubleConstant(
+    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
 }
 
@@ -2185,8 +2231,7 @@
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM64::VisitExit(HExit* exit) {
-  UNUSED(exit);
+void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
@@ -2195,8 +2240,7 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant) {
-  UNUSED(constant);
+void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
 }
 
@@ -2349,8 +2393,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   HInstruction* cond = deoptimize->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
@@ -2683,9 +2726,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) {
+void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
@@ -2693,9 +2735,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) {
+void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
@@ -2785,6 +2826,13 @@
   return false;
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  // On arm64 we support all dispatch types.
+  return desired_dispatch_info;
+}
+
 void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   bool direct_code_loaded = false;
@@ -3086,9 +3134,8 @@
   load->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load) {
+void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(load);
 }
 
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
@@ -3125,9 +3172,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant) {
+void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -3394,8 +3440,7 @@
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) {
-  UNUSED(instruction);
+void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -3465,8 +3510,7 @@
   locations->SetInAt(0, ARM64ReturnLocation(return_type));
 }
 
-void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
-  UNUSED(instruction);
+void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
   codegen_->GenerateFrameExit();
 }
 
@@ -3474,8 +3518,7 @@
   instruction->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
-  UNUSED(instruction);
+void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
   codegen_->GenerateFrameExit();
 }
 
@@ -3519,8 +3562,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store) {
-  UNUSED(store);
+void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
@@ -3637,9 +3679,8 @@
   temp->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM64::VisitTemporary(HTemporary* temp) {
+void InstructionCodeGeneratorARM64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(temp);
 }
 
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
@@ -3738,15 +3779,13 @@
   HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction) {
+void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) {
+void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
@@ -3779,7 +3818,7 @@
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   for (int32_t i = 0; i < num_entries; i++) {
     int32_t case_value = lower_bound + i;
-    vixl::Label* succ = codegen_->GetLabelOf(successors.at(i));
+    vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
     if (case_value == 0) {
       __ Cbz(value_reg, succ);
     } else {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index a068b48..ab684ea 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -382,12 +382,18 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path);
 
-  ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
+  ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
     return false;
   }
 
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
new file mode 100644
index 0000000..29d08be
--- /dev/null
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -0,0 +1,4242 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips.h"
+
+#include "arch/mips/entrypoints_direct_mips.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "art_method.h"
+#include "code_generator_utils.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_mips.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils/assembler.h"
+#include "utils/mips/assembler_mips.h"
+#include "utils/stack_checks.h"
+
+namespace art {
+namespace mips {
+
+static constexpr int kCurrentMethodStackOffset = 0;
+static constexpr Register kMethodRegisterArgument = A0;
+
+// We need extra temporary/scratch registers (in addition to AT) in some cases.
+static constexpr Register TMP = T8;
+static constexpr FRegister FTMP = F8;
+
+// ART Thread Register.
+static constexpr Register TR = S1;
+
+Location MipsReturnLocation(Primitive::Type return_type) {
+  switch (return_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      return Location::RegisterLocation(V0);
+
+    case Primitive::kPrimLong:
+      return Location::RegisterPairLocation(V0, V1);
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      return Location::FpuRegisterLocation(F0);
+
+    case Primitive::kPrimVoid:
+      return Location();
+  }
+  UNREACHABLE();
+}
+
+Location InvokeDexCallingConventionVisitorMIPS::GetReturnLocation(Primitive::Type type) const {
+  return MipsReturnLocation(type);
+}
+
+Location InvokeDexCallingConventionVisitorMIPS::GetMethodLocation() const {
+  return Location::RegisterLocation(kMethodRegisterArgument);
+}
+
+Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(Primitive::Type type) {
+  Location next_location;
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      uint32_t gp_index = gp_index_++;
+      if (gp_index < calling_convention.GetNumberOfRegisters()) {
+        next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index));
+      } else {
+        size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+        next_location = Location::StackSlot(stack_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t gp_index = gp_index_;
+      gp_index_ += 2;
+      if (gp_index + 1 < calling_convention.GetNumberOfRegisters()) {
+        if (calling_convention.GetRegisterAt(gp_index) == A1) {
+          gp_index_++;  // Skip A1, and use A2_A3 instead.
+          gp_index++;
+        }
+        Register low_even = calling_convention.GetRegisterAt(gp_index);
+        Register high_odd = calling_convention.GetRegisterAt(gp_index + 1);
+        DCHECK_EQ(low_even + 1, high_odd);
+        next_location = Location::RegisterPairLocation(low_even, high_odd);
+      } else {
+        size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+        next_location = Location::DoubleStackSlot(stack_offset);
+      }
+      break;
+    }
+
+    // Note: both float and double types are stored in even FPU registers. On 32 bit FPU, double
+    // will take up the even/odd pair, while floats are stored in even regs only.
+    // On 64 bit FPU, both double and float are stored in even registers only.
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      uint32_t float_index = float_index_++;
+      if (float_index < calling_convention.GetNumberOfFpuRegisters()) {
+        next_location = Location::FpuRegisterLocation(
+            calling_convention.GetFpuRegisterAt(float_index));
+      } else {
+        size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+        next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
+                                                     : Location::StackSlot(stack_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected parameter type " << type;
+      break;
+  }
+
+  // Space on the stack is reserved for all arguments.
+  stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
+
+  return next_location;
+}
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) {
+  return MipsReturnLocation(type);
+}
+
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+
+class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimInt,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimInt);
+    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this,
+                                IsDirectEntrypoint(kQuickThrowArrayBounds));
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+  }
+
+  bool IsFatal() const OVERRIDE { return true; }
+
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; }
+
+ private:
+  HBoundsCheck* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS);
+};
+
+class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this,
+                                IsDirectEntrypoint(kQuickThrowDivZero));
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+  }
+
+  bool IsFatal() const OVERRIDE { return true; }
+
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; }
+
+ private:
+  HDivZeroCheck* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS);
+};
+
+class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  LoadClassSlowPathMIPS(HLoadClass* cls,
+                        HInstruction* at,
+                        uint32_t dex_pc,
+                        bool do_clinit)
+      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = at_->GetLocations();
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+
+    int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
+                                            : QUICK_ENTRY_POINT(pInitializeType);
+    bool direct = do_clinit_ ? IsDirectEntrypoint(kQuickInitializeStaticStorage)
+                             : IsDirectEntrypoint(kQuickInitializeType);
+
+    mips_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this, direct);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
+
+    // Move the class to the desired location.
+    Location out = locations->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      Primitive::Type type = at_->GetType();
+      mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+    }
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS"; }
+
+ private:
+  // The class this slow path will load.
+  HLoadClass* const cls_;
+
+  // The instruction where this slow path is happening.
+  // (Might be the load class or an initialization check).
+  HInstruction* const at_;
+
+  // The dex PC of `at_`.
+  const uint32_t dex_pc_;
+
+  // Whether to initialize the class.
+  const bool do_clinit_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS);
+};
+
+class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit LoadStringSlowPathMIPS(HLoadString* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this,
+                                IsDirectEntrypoint(kQuickResolveString));
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+    Primitive::Type type = instruction_->GetType();
+    mips_codegen->MoveLocation(locations->Out(),
+                               calling_convention.GetReturnLocation(type),
+                               type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; }
+
+ private:
+  HLoadString* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS);
+};
+
+class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit NullCheckSlowPathMIPS(HNullCheck* instr) : instruction_(instr) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this,
+                                IsDirectEntrypoint(kQuickThrowNullPointer));
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+  }
+
+  bool IsFatal() const OVERRIDE { return true; }
+
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; }
+
+ private:
+  HNullCheck* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS);
+};
+
+class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this,
+                                IsDirectEntrypoint(kQuickTestSuspend));
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    if (successor_ == nullptr) {
+      __ B(GetReturnLabel());
+    } else {
+      __ B(mips_codegen->GetLabelOf(successor_));
+    }
+  }
+
+  MipsLabel* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
+
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; }
+
+ private:
+  HSuspendCheck* const instruction_;
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
+  MipsLabel return_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS);
+};
+
+class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
+    uint32_t dex_pc = instruction_->GetDexPc();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               object_class,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
+
+    if (instruction_->IsInstanceOf()) {
+      mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                  instruction_,
+                                  dex_pc,
+                                  this,
+                                  IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+      Primitive::Type ret_type = instruction_->GetType();
+      Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+      mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
+                           uint32_t,
+                           const mirror::Class*,
+                           const mirror::Class*>();
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
+                                  instruction_,
+                                  dex_pc,
+                                  this,
+                                  IsDirectEntrypoint(kQuickCheckCast));
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+    }
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS);
+};
+
+class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
+    : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                instruction_,
+                                dex_pc,
+                                this,
+                                IsDirectEntrypoint(kQuickDeoptimize));
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
+};
+
+CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
+                                     const MipsInstructionSetFeatures& isa_features,
+                                     const CompilerOptions& compiler_options,
+                                     OptimizingCompilerStats* stats)
+    : CodeGenerator(graph,
+                    kNumberOfCoreRegisters,
+                    kNumberOfFRegisters,
+                    kNumberOfRegisterPairs,
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+                                        arraysize(kCoreCalleeSaves)),
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                        arraysize(kFpuCalleeSaves)),
+                    compiler_options,
+                    stats),
+      block_labels_(nullptr),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this),
+      assembler_(&isa_features),
+      isa_features_(isa_features) {
+  // Save RA (containing the return address) to mimic Quick.
+  AddAllocatedRegister(Location::RegisterLocation(RA));
+}
+
+#undef __
+#define __ down_cast<MipsAssembler*>(GetAssembler())->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
+
+void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
+  // Ensure that we fix up branches.
+  __ FinalizeCode();
+
+  // Adjust native pc offsets in stack maps.
+  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
+    uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset;
+    uint32_t new_position = __ GetAdjustedPosition(old_position);
+    DCHECK_GE(new_position, old_position);
+    stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
+  }
+
+  // Adjust pc offsets for the disassembly information.
+  if (disasm_info_ != nullptr) {
+    GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+    frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+    frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+    for (auto& it : *disasm_info_->GetInstructionIntervals()) {
+      it.second.start = __ GetAdjustedPosition(it.second.start);
+      it.second.end = __ GetAdjustedPosition(it.second.end);
+    }
+    for (auto& it : *disasm_info_->GetSlowPathIntervals()) {
+      it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start);
+      it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
+    }
+  }
+
+  CodeGenerator::Finalize(allocator);
+}
+
+MipsAssembler* ParallelMoveResolverMIPS::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverMIPS::EmitMove(size_t index) {
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverMIPS::EmitSwap(size_t index) {
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
+  Primitive::Type type = move->GetType();
+  Location loc1 = move->GetDestination();
+  Location loc2 = move->GetSource();
+
+  DCHECK(!loc1.IsConstant());
+  DCHECK(!loc2.IsConstant());
+
+  if (loc1.Equals(loc2)) {
+    return;
+  }
+
+  if (loc1.IsRegister() && loc2.IsRegister()) {
+    // Swap 2 GPRs.
+    Register r1 = loc1.AsRegister<Register>();
+    Register r2 = loc2.AsRegister<Register>();
+    __ Move(TMP, r2);
+    __ Move(r2, r1);
+    __ Move(r1, TMP);
+  } else if (loc1.IsFpuRegister() && loc2.IsFpuRegister()) {
+    FRegister f1 = loc1.AsFpuRegister<FRegister>();
+    FRegister f2 = loc2.AsFpuRegister<FRegister>();
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, f2);
+      __ MovS(f2, f1);
+      __ MovS(f1, FTMP);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, f2);
+      __ MovD(f2, f1);
+      __ MovD(f1, FTMP);
+    }
+  } else if ((loc1.IsRegister() && loc2.IsFpuRegister()) ||
+             (loc1.IsFpuRegister() && loc2.IsRegister())) {
+    // Swap FPR and GPR.
+    DCHECK_EQ(type, Primitive::kPrimFloat);  // Can only swap a float.
+    FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
+                                        : loc2.AsFpuRegister<FRegister>();
+    Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>()
+                                    : loc2.AsRegister<Register>();
+    __ Move(TMP, r2);
+    __ Mfc1(r2, f1);
+    __ Mtc1(TMP, f1);
+  } else if (loc1.IsRegisterPair() && loc2.IsRegisterPair()) {
+    // Swap 2 GPR register pairs.
+    Register r1 = loc1.AsRegisterPairLow<Register>();
+    Register r2 = loc2.AsRegisterPairLow<Register>();
+    __ Move(TMP, r2);
+    __ Move(r2, r1);
+    __ Move(r1, TMP);
+    r1 = loc1.AsRegisterPairHigh<Register>();
+    r2 = loc2.AsRegisterPairHigh<Register>();
+    __ Move(TMP, r2);
+    __ Move(r2, r1);
+    __ Move(r1, TMP);
+  } else if ((loc1.IsRegisterPair() && loc2.IsFpuRegister()) ||
+             (loc1.IsFpuRegister() && loc2.IsRegisterPair())) {
+    // Swap FPR and GPR register pair.
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
+                                        : loc2.AsFpuRegister<FRegister>();
+    Register r2_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>()
+                                          : loc2.AsRegisterPairLow<Register>();
+    Register r2_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>()
+                                          : loc2.AsRegisterPairHigh<Register>();
+    // Use 2 temporary registers because we can't first swap the low 32 bits of an FPR and
+    // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
+    // unpredictable and the following mfch1 will fail.
+    __ Mfc1(TMP, f1);
+    __ Mfhc1(AT, f1);
+    __ Mtc1(r2_l, f1);
+    __ Mthc1(r2_h, f1);
+    __ Move(r2_l, TMP);
+    __ Move(r2_h, AT);
+  } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
+    Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false);
+  } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) {
+    Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
+  } else {
+    LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported";
+  }
+}
+
+void ParallelMoveResolverMIPS::RestoreScratch(int reg) {
+  __ Pop(static_cast<Register>(reg));
+}
+
+void ParallelMoveResolverMIPS::SpillScratch(int reg) {
+  __ Push(static_cast<Register>(reg));
+}
+
+void ParallelMoveResolverMIPS::Exchange(int index1, int index2, bool double_slot) {
+  // Allocate a scratch register other than TMP, if available.
+  // Else, spill V0 (arbitrary choice) and use it as a scratch register (it will be
+  // automatically unspilled when the scratch scope object is destroyed).
+  ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
+  // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
+  int stack_offset = ensure_scratch.IsSpilled() ? kMipsWordSize : 0;
+  for (int i = 0; i <= (double_slot ? 1 : 0); i++, stack_offset += kMipsWordSize) {
+    __ LoadFromOffset(kLoadWord,
+                      Register(ensure_scratch.GetRegister()),
+                      SP,
+                      index1 + stack_offset);
+    __ LoadFromOffset(kLoadWord,
+                      TMP,
+                      SP,
+                      index2 + stack_offset);
+    __ StoreToOffset(kStoreWord,
+                     Register(ensure_scratch.GetRegister()),
+                     SP,
+                     index2 + stack_offset);
+    __ StoreToOffset(kStoreWord, TMP, SP, index1 + stack_offset);
+  }
+}
+
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::MipsCore(static_cast<int>(reg));
+}
+
+// TODO: mapping of floating-point registers to DWARF.
+
+void CodeGeneratorMIPS::GenerateFrameEntry() {
+  __ Bind(&frame_entry_label_);
+
+  bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kMips) || !IsLeafMethod();
+
+  if (do_overflow_check) {
+    __ LoadFromOffset(kLoadWord,
+                      ZERO,
+                      SP,
+                      -static_cast<int32_t>(GetStackOverflowReservedBytes(kMips)));
+    RecordPcInfo(nullptr, 0);
+  }
+
+  if (HasEmptyFrame()) {
+    return;
+  }
+
+  // Make sure the frame size isn't unreasonably large.
+  if (GetFrameSize() > GetStackOverflowReservedBytes(kMips)) {
+    LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips) << " bytes";
+  }
+
+  // Spill callee-saved registers.
+  // Note that their cumulative size is small and they can be indexed using
+  // 16-bit offsets.
+
+  // TODO: increment/decrement SP in one step instead of two or remove this comment.
+
+  uint32_t ofs = FrameEntrySpillSize();
+  bool unaligned_float = ofs & 0x7;
+  bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
+  __ IncreaseFrameSize(ofs);
+
+  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+    Register reg = kCoreCalleeSaves[i];
+    if (allocated_registers_.ContainsCoreRegister(reg)) {
+      ofs -= kMipsWordSize;
+      __ Sw(reg, SP, ofs);
+      __ cfi().RelOffset(DWARFReg(reg), ofs);
+    }
+  }
+
+  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+    FRegister reg = kFpuCalleeSaves[i];
+    if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+      ofs -= kMipsDoublewordSize;
+      // TODO: Change the frame to avoid unaligned accesses for fpu registers.
+      if (unaligned_float) {
+        if (fpu_32bit) {
+          __ Swc1(reg, SP, ofs);
+          __ Swc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
+        } else {
+          __ Mfhc1(TMP, reg);
+          __ Swc1(reg, SP, ofs);
+          __ Sw(TMP, SP, ofs + 4);
+        }
+      } else {
+        __ Sdc1(reg, SP, ofs);
+      }
+      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
+    }
+  }
+
+  // Allocate the rest of the frame and store the current method pointer
+  // at its end.
+
+  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
+
+  static_assert(IsInt<16>(kCurrentMethodStackOffset),
+                "kCurrentMethodStackOffset must fit into int16_t");
+  __ Sw(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+}
+
+void CodeGeneratorMIPS::GenerateFrameExit() {
+  __ cfi().RememberState();
+
+  if (!HasEmptyFrame()) {
+    // Deallocate the rest of the frame.
+
+    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
+
+    // Restore callee-saved registers.
+    // Note that their cumulative size is small and they can be indexed using
+    // 16-bit offsets.
+
+    // TODO: increment/decrement SP in one step instead of two or remove this comment.
+
+    uint32_t ofs = 0;
+    bool unaligned_float = FrameEntrySpillSize() & 0x7;
+    bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
+
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      FRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        if (unaligned_float) {
+          if (fpu_32bit) {
+            __ Lwc1(reg, SP, ofs);
+            __ Lwc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
+          } else {
+            __ Lwc1(reg, SP, ofs);
+            __ Lw(TMP, SP, ofs + 4);
+            __ Mthc1(TMP, reg);
+          }
+        } else {
+          __ Ldc1(reg, SP, ofs);
+        }
+        ofs += kMipsDoublewordSize;
+        // TODO: __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+      Register reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        __ Lw(reg, SP, ofs);
+        ofs += kMipsWordSize;
+        __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    DCHECK_EQ(ofs, FrameEntrySpillSize());
+    __ DecreaseFrameSize(ofs);
+  }
+
+  __ Jr(RA);
+  __ Nop();
+
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorMIPS::Bind(HBasicBlock* block) {
+  __ Bind(GetLabelOf(block));
+}
+
+void CodeGeneratorMIPS::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
+  if (src.Equals(dst)) {
+    return;
+  }
+
+  if (src.IsConstant()) {
+    MoveConstant(dst, src.GetConstant());
+  } else {
+    if (Primitive::Is64BitType(dst_type)) {
+      Move64(dst, src);
+    } else {
+      Move32(dst, src);
+    }
+  }
+}
+
+void CodeGeneratorMIPS::Move32(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ Move(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    } else if (source.IsFpuRegister()) {
+      __ Mfc1(destination.AsRegister<Register>(), source.AsFpuRegister<FRegister>());
+    } else {
+      DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination;
+      __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex());
+    }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ Mtc1(source.AsRegister<Register>(), destination.AsFpuRegister<FRegister>());
+    } else if (source.IsFpuRegister()) {
+      __ MovS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+    } else {
+      DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination;
+      __ LoadSFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+    }
+  } else {
+    DCHECK(destination.IsStackSlot()) << destination;
+    if (source.IsRegister()) {
+      __ StoreToOffset(kStoreWord, source.AsRegister<Register>(), SP, destination.GetStackIndex());
+    } else if (source.IsFpuRegister()) {
+      __ StoreSToOffset(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+    } else {
+      DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination;
+      __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex());
+      __ StoreToOffset(kStoreWord, TMP, SP, destination.GetStackIndex());
+    }
+  }
+}
+
+void CodeGeneratorMIPS::Move64(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+
+  if (destination.IsRegisterPair()) {
+    if (source.IsRegisterPair()) {
+      __ Move(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+      __ Move(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
+    } else if (source.IsFpuRegister()) {
+      Register dst_high = destination.AsRegisterPairHigh<Register>();
+      Register dst_low =  destination.AsRegisterPairLow<Register>();
+      FRegister src = source.AsFpuRegister<FRegister>();
+      __ Mfc1(dst_low, src);
+      __ Mfhc1(dst_high, src);
+    } else {
+      DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
+      int32_t off = source.GetStackIndex();
+      Register r = destination.AsRegisterPairLow<Register>();
+      __ LoadFromOffset(kLoadDoubleword, r, SP, off);
+    }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegisterPair()) {
+      FRegister dst = destination.AsFpuRegister<FRegister>();
+      Register src_high = source.AsRegisterPairHigh<Register>();
+      Register src_low = source.AsRegisterPairLow<Register>();
+      __ Mtc1(src_low, dst);
+      __ Mthc1(src_high, dst);
+    } else if (source.IsFpuRegister()) {
+      __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+    } else {
+      DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
+      __ LoadDFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+    }
+  } else {
+    DCHECK(destination.IsDoubleStackSlot()) << destination;
+    int32_t off = destination.GetStackIndex();
+    if (source.IsRegisterPair()) {
+      __ StoreToOffset(kStoreDoubleword, source.AsRegisterPairLow<Register>(), SP, off);
+    } else if (source.IsFpuRegister()) {
+      __ StoreDToOffset(source.AsFpuRegister<FRegister>(), SP, off);
+    } else {
+      DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
+      __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex());
+      __ StoreToOffset(kStoreWord, TMP, SP, off);
+      __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex() + 4);
+      __ StoreToOffset(kStoreWord, TMP, SP, off + 4);
+    }
+  }
+}
+
+void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) {
+  if (c->IsIntConstant() || c->IsNullConstant()) {
+    // Move 32 bit constant.
+    int32_t value = GetInt32ValueOf(c);
+    if (destination.IsRegister()) {
+      Register dst = destination.AsRegister<Register>();
+      __ LoadConst32(dst, value);
+    } else {
+      DCHECK(destination.IsStackSlot())
+          << "Cannot move " << c->DebugName() << " to " << destination;
+      __ StoreConst32ToOffset(value, SP, destination.GetStackIndex(), TMP);
+    }
+  } else if (c->IsLongConstant()) {
+    // Move 64 bit constant.
+    int64_t value = GetInt64ValueOf(c);
+    if (destination.IsRegisterPair()) {
+      Register r_h = destination.AsRegisterPairHigh<Register>();
+      Register r_l = destination.AsRegisterPairLow<Register>();
+      __ LoadConst64(r_h, r_l, value);
+    } else {
+      DCHECK(destination.IsDoubleStackSlot())
+          << "Cannot move " << c->DebugName() << " to " << destination;
+      __ StoreConst64ToOffset(value, SP, destination.GetStackIndex(), TMP);
+    }
+  } else if (c->IsFloatConstant()) {
+    // Move 32 bit float constant.
+    int32_t value = GetInt32ValueOf(c);
+    if (destination.IsFpuRegister()) {
+      __ LoadSConst32(destination.AsFpuRegister<FRegister>(), value, TMP);
+    } else {
+      DCHECK(destination.IsStackSlot())
+          << "Cannot move " << c->DebugName() << " to " << destination;
+      __ StoreConst32ToOffset(value, SP, destination.GetStackIndex(), TMP);
+    }
+  } else {
+    // Move 64 bit double constant.
+    DCHECK(c->IsDoubleConstant()) << c->DebugName();
+    int64_t value = GetInt64ValueOf(c);
+    if (destination.IsFpuRegister()) {
+      FRegister fd = destination.AsFpuRegister<FRegister>();
+      __ LoadDConst64(fd, value, TMP);
+    } else {
+      DCHECK(destination.IsDoubleStackSlot())
+          << "Cannot move " << c->DebugName() << " to " << destination;
+      __ StoreConst64ToOffset(value, SP, destination.GetStackIndex(), TMP);
+    }
+  }
+}
+
+void CodeGeneratorMIPS::MoveConstant(Location destination, int32_t value) {
+  DCHECK(destination.IsRegister());
+  Register dst = destination.AsRegister<Register>();
+  __ LoadConst32(dst, value);
+}
+
+void CodeGeneratorMIPS::Move(HInstruction* instruction,
+                             Location location,
+                             HInstruction* move_for) {
+  LocationSummary* locations = instruction->GetLocations();
+  Primitive::Type type = instruction->GetType();
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (instruction->IsCurrentMethod()) {
+    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
+  } else if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  } else if (instruction->IsIntConstant()
+             || instruction->IsLongConstant()
+             || instruction->IsNullConstant()) {
+    MoveConstant(location, instruction->AsConstant());
+  } else if (instruction->IsTemporary()) {
+    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
+    if (temp_location.IsStackSlot()) {
+      Move32(location, temp_location);
+    } else {
+      DCHECK(temp_location.IsDoubleStackSlot());
+      Move64(location, temp_location);
+    }
+  } else if (instruction->IsLoadLocal()) {
+    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
+    if (Primitive::Is64BitType(type)) {
+      Move64(location, Location::DoubleStackSlot(stack_slot));
+    } else {
+      Move32(location, Location::StackSlot(stack_slot));
+    }
+  } else {
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
+    if (Primitive::Is64BitType(type)) {
+      Move64(location, locations->Out());
+    } else {
+      Move32(location, locations->Out());
+    }
+  }
+}
+
+void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else if (location.IsRegisterPair()) {
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
+Location CodeGeneratorMIPS::GetStackLocation(HLoadLocal* load) const {
+  Primitive::Type type = load->GetType();
+
+  switch (type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << type;
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location::NoLocation();
+}
+
+void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
+  MipsLabel done;
+  Register card = AT;
+  Register temp = TMP;
+  __ Beqz(value, &done);
+  __ LoadFromOffset(kLoadWord,
+                    card,
+                    TR,
+                    Thread::CardTableOffset<kMipsWordSize>().Int32Value());
+  __ Srl(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Addu(temp, card, temp);
+  __ Sb(card, temp, 0);
+  __ Bind(&done);
+}
+
+void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+  // Don't allocate the dalvik style register pair passing.
+  blocked_register_pairs_[A1_A2] = true;
+
+  // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
+  blocked_core_registers_[ZERO] = true;
+  blocked_core_registers_[K0] = true;
+  blocked_core_registers_[K1] = true;
+  blocked_core_registers_[GP] = true;
+  blocked_core_registers_[SP] = true;
+  blocked_core_registers_[RA] = true;
+
+  // AT and TMP(T8) are used as temporary/scratch registers
+  // (similar to how AT is used by MIPS assemblers).
+  blocked_core_registers_[AT] = true;
+  blocked_core_registers_[TMP] = true;
+  blocked_fpu_registers_[FTMP] = true;
+
+  // Reserve suspend and thread registers.
+  blocked_core_registers_[S0] = true;
+  blocked_core_registers_[TR] = true;
+
+  // Reserve T9 for function calls
+  blocked_core_registers_[T9] = true;
+
+  // Reserve odd-numbered FPU registers.
+  for (size_t i = 1; i < kNumberOfFRegisters; i += 2) {
+    blocked_fpu_registers_[i] = true;
+  }
+
+  if (is_baseline) {
+    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+      blocked_core_registers_[kCoreCalleeSaves[i]] = true;
+    }
+
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
+  }
+
+  UpdateBlockedPairRegisters();
+}
+
+void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const {
+  for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+    MipsManagedRegister current =
+        MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+    if (blocked_core_registers_[current.AsRegisterPairLow()]
+        || blocked_core_registers_[current.AsRegisterPairHigh()]) {
+      blocked_register_pairs_[i] = true;
+    }
+  }
+}
+
+Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
+  switch (type) {
+    case Primitive::kPrimLong: {
+      size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
+      MipsManagedRegister pair =
+          MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
+      DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
+      DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
+
+      blocked_core_registers_[pair.AsRegisterPairLow()] = true;
+      blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
+      UpdateBlockedPairRegisters();
+      return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
+    }
+
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
+      // Block all register pairs that contain `reg`.
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        MipsManagedRegister current =
+            MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
+          blocked_register_pairs_[i] = true;
+        }
+      }
+      return Location::RegisterLocation(reg);
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
+      return Location::FpuRegisterLocation(reg);
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  UNREACHABLE();
+}
+
+size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
+  return kMipsWordSize;
+}
+
+size_t CodeGeneratorMIPS::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadFromOffset(kLoadWord, Register(reg_id), SP, stack_index);
+  return kMipsWordSize;
+}
+
+size_t CodeGeneratorMIPS::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreDToOffset(FRegister(reg_id), SP, stack_index);
+  return kMipsDoublewordSize;
+}
+
+size_t CodeGeneratorMIPS::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadDFromOffset(FRegister(reg_id), SP, stack_index);
+  return kMipsDoublewordSize;
+}
+
+void CodeGeneratorMIPS::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << MipsManagedRegister::FromCoreRegister(Register(reg));
+}
+
+void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << MipsManagedRegister::FromFRegister(FRegister(reg));
+}
+
+void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
+                                      HInstruction* instruction,
+                                      uint32_t dex_pc,
+                                      SlowPathCode* slow_path) {
+  InvokeRuntime(GetThreadOffset<kMipsWordSize>(entrypoint).Int32Value(),
+                instruction,
+                dex_pc,
+                slow_path,
+                IsDirectEntrypoint(entrypoint));
+}
+
+constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16;
+
+void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset,
+                                      HInstruction* instruction,
+                                      uint32_t dex_pc,
+                                      SlowPathCode* slow_path,
+                                      bool is_direct_entrypoint) {
+  if (is_direct_entrypoint) {
+    // Reserve argument space on stack (for $a0-$a3) for
+    // entrypoints that directly reference native implementations.
+    // Called function may use this space to store $a0-$a3 regs.
+    __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
+  }
+  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+  __ Jalr(T9);
+  __ Nop();
+  if (is_direct_entrypoint) {
+    __ DecreaseFrameSize(kMipsDirectEntrypointRuntimeOffset);
+  }
+  RecordPcInfo(instruction, dex_pc, slow_path);
+}
+
+void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
+                                                                    Register class_reg) {
+  __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
+  __ LoadConst32(AT, mirror::Class::kStatusInitialized);
+  __ Blt(TMP, AT, slow_path->GetEntryLabel());
+  // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+  __ Sync(0);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
+  __ Sync(0);  // Only stype 0 is supported.
+}
+
+void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                        HBasicBlock* successor) {
+  SuspendCheckSlowPathMIPS* slow_path =
+    new (GetGraph()->GetArena()) SuspendCheckSlowPathMIPS(instruction, successor);
+  codegen_->AddSlowPath(slow_path);
+
+  __ LoadFromOffset(kLoadUnsignedHalfword,
+                    TMP,
+                    TR,
+                    Thread::ThreadFlagsOffset<kMipsWordSize>().Int32Value());
+  if (successor == nullptr) {
+    __ Bnez(TMP, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Beqz(TMP, codegen_->GetLabelOf(successor));
+    __ B(slow_path->GetEntryLabel());
+    // slow_path will return to GetLabelOf(successor).
+  }
+}
+
+InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
+                                                           CodeGeneratorMIPS* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
+void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) {
+  DCHECK_EQ(instruction->InputCount(), 2U);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      HInstruction* right = instruction->InputAt(1);
+      bool can_use_imm = false;
+      if (right->IsConstant()) {
+        int32_t imm = CodeGenerator::GetInt32ValueOf(right->AsConstant());
+        if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
+          can_use_imm = IsUint<16>(imm);
+        } else if (instruction->IsAdd()) {
+          can_use_imm = IsInt<16>(imm);
+        } else {
+          DCHECK(instruction->IsSub());
+          can_use_imm = IsInt<16>(-imm);
+        }
+      }
+      if (can_use_imm)
+        locations->SetInAt(1, Location::ConstantLocation(right->AsConstant()));
+      else
+        locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      // TODO: can 2nd param be const?
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      if (instruction->IsAdd() || instruction->IsSub()) {
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      } else {
+        DCHECK(instruction->IsAnd() || instruction->IsOr() || instruction->IsXor());
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(instruction->IsAdd() || instruction->IsSub());
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Location rhs_location = locations->InAt(1);
+
+      Register rhs_reg = ZERO;
+      int32_t rhs_imm = 0;
+      bool use_imm = rhs_location.IsConstant();
+      if (use_imm) {
+        rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+      } else {
+        rhs_reg = rhs_location.AsRegister<Register>();
+      }
+
+      if (instruction->IsAnd()) {
+        if (use_imm)
+          __ Andi(dst, lhs, rhs_imm);
+        else
+          __ And(dst, lhs, rhs_reg);
+      } else if (instruction->IsOr()) {
+        if (use_imm)
+          __ Ori(dst, lhs, rhs_imm);
+        else
+          __ Or(dst, lhs, rhs_reg);
+      } else if (instruction->IsXor()) {
+        if (use_imm)
+          __ Xori(dst, lhs, rhs_imm);
+        else
+          __ Xor(dst, lhs, rhs_reg);
+      } else if (instruction->IsAdd()) {
+        if (use_imm)
+          __ Addiu(dst, lhs, rhs_imm);
+        else
+          __ Addu(dst, lhs, rhs_reg);
+      } else {
+        DCHECK(instruction->IsSub());
+        if (use_imm)
+          __ Addiu(dst, lhs, -rhs_imm);
+        else
+          __ Subu(dst, lhs, rhs_reg);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      // TODO: can 2nd param be const?
+      Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+      Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+      Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>();
+
+      if (instruction->IsAnd()) {
+        __ And(dst_low, lhs_low, rhs_low);
+        __ And(dst_high, lhs_high, rhs_high);
+      } else if (instruction->IsOr()) {
+        __ Or(dst_low, lhs_low, rhs_low);
+        __ Or(dst_high, lhs_high, rhs_high);
+      } else if (instruction->IsXor()) {
+        __ Xor(dst_low, lhs_low, rhs_low);
+        __ Xor(dst_high, lhs_high, rhs_high);
+      } else if (instruction->IsAdd()) {
+        __ Addu(dst_low, lhs_low, rhs_low);
+        __ Sltu(TMP, dst_low, lhs_low);
+        __ Addu(dst_high, lhs_high, rhs_high);
+        __ Addu(dst_high, dst_high, TMP);
+      } else {
+        DCHECK(instruction->IsSub());
+        __ Subu(dst_low, lhs_low, rhs_low);
+        __ Sltu(TMP, lhs_low, dst_low);
+        __ Subu(dst_high, lhs_high, rhs_high);
+        __ Subu(dst_high, dst_high, TMP);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      if (instruction->IsAdd()) {
+        if (type == Primitive::kPrimFloat) {
+          __ AddS(dst, lhs, rhs);
+        } else {
+          __ AddD(dst, lhs, rhs);
+        }
+      } else {
+        DCHECK(instruction->IsSub());
+        if (type == Primitive::kPrimFloat) {
+          __ SubS(dst, lhs, rhs);
+        } else {
+          __ SubD(dst, lhs, rhs);
+        }
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected binary operation type " << type;
+  }
+}
+
+void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift type " << type;
+  }
+}
+
+static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte;
+
+void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  LocationSummary* locations = instr->GetLocations();
+  Primitive::Type type = instr->GetType();
+
+  Location rhs_location = locations->InAt(1);
+  bool use_imm = rhs_location.IsConstant();
+  Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
+  int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
+  uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
+  uint32_t shift_value = rhs_imm & shift_mask;
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      if (use_imm) {
+        if (instr->IsShl()) {
+          __ Sll(dst, lhs, shift_value);
+        } else if (instr->IsShr()) {
+          __ Sra(dst, lhs, shift_value);
+        } else {
+          __ Srl(dst, lhs, shift_value);
+        }
+      } else {
+        if (instr->IsShl()) {
+          __ Sllv(dst, lhs, rhs_reg);
+        } else if (instr->IsShr()) {
+          __ Srav(dst, lhs, rhs_reg);
+        } else {
+          __ Srlv(dst, lhs, rhs_reg);
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+      Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+      Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+      if (use_imm) {
+          if (shift_value == 0) {
+            codegen_->Move64(locations->Out(), locations->InAt(0));
+          } else if (shift_value < kMipsBitsPerWord) {
+            if (instr->IsShl()) {
+              __ Sll(dst_low, lhs_low, shift_value);
+              __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value);
+              __ Sll(dst_high, lhs_high, shift_value);
+              __ Or(dst_high, dst_high, TMP);
+            } else if (instr->IsShr()) {
+              __ Sra(dst_high, lhs_high, shift_value);
+              __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+              __ Srl(dst_low, lhs_low, shift_value);
+              __ Or(dst_low, dst_low, TMP);
+            } else {
+              __ Srl(dst_high, lhs_high, shift_value);
+              __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
+              __ Srl(dst_low, lhs_low, shift_value);
+              __ Or(dst_low, dst_low, TMP);
+            }
+          } else {
+            shift_value -= kMipsBitsPerWord;
+            if (instr->IsShl()) {
+              __ Sll(dst_high, lhs_low, shift_value);
+              __ Move(dst_low, ZERO);
+            } else if (instr->IsShr()) {
+              __ Sra(dst_low, lhs_high, shift_value);
+              __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
+            } else {
+              __ Srl(dst_low, lhs_high, shift_value);
+              __ Move(dst_high, ZERO);
+            }
+          }
+      } else {
+        MipsLabel done;
+        if (instr->IsShl()) {
+          __ Sllv(dst_low, lhs_low, rhs_reg);
+          __ Nor(AT, ZERO, rhs_reg);
+          __ Srl(TMP, lhs_low, 1);
+          __ Srlv(TMP, TMP, AT);
+          __ Sllv(dst_high, lhs_high, rhs_reg);
+          __ Or(dst_high, dst_high, TMP);
+          __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+          __ Beqz(TMP, &done);
+          __ Move(dst_high, dst_low);
+          __ Move(dst_low, ZERO);
+        } else if (instr->IsShr()) {
+          __ Srav(dst_high, lhs_high, rhs_reg);
+          __ Nor(AT, ZERO, rhs_reg);
+          __ Sll(TMP, lhs_high, 1);
+          __ Sllv(TMP, TMP, AT);
+          __ Srlv(dst_low, lhs_low, rhs_reg);
+          __ Or(dst_low, dst_low, TMP);
+          __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+          __ Beqz(TMP, &done);
+          __ Move(dst_low, dst_high);
+          __ Sra(dst_high, dst_high, 31);
+        } else {
+          __ Srlv(dst_high, lhs_high, rhs_reg);
+          __ Nor(AT, ZERO, rhs_reg);
+          __ Sll(TMP, lhs_high, 1);
+          __ Sllv(TMP, TMP, AT);
+          __ Srlv(dst_low, lhs_low, rhs_reg);
+          __ Or(dst_low, dst_low, TMP);
+          __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+          __ Beqz(TMP, &done);
+          __ Move(dst_low, dst_high);
+          __ Move(dst_high, ZERO);
+        }
+        __ Bind(&done);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected shift operation type " << type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location index = locations->InAt(1);
+  Primitive::Type type = instruction->GetType();
+
+  switch (type) {
+    case Primitive::kPrimBoolean: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Register out = locations->Out().AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+      } else {
+        __ Addu(TMP, obj, index.AsRegister<Register>());
+        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimByte: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
+      Register out = locations->Out().AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+      } else {
+        __ Addu(TMP, obj, index.AsRegister<Register>());
+        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimShort: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
+      Register out = locations->Out().AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
+        __ Addu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimChar: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Register out = locations->Out().AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
+        __ Addu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Register out = locations->Out().AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadFromOffset(kLoadWord, out, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadWord, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      Register out = locations->Out().AsRegisterPairLow<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
+        __ Addu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      FRegister out = locations->Out().AsFpuRegister<FRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadSFromOffset(out, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(TMP, obj, TMP);
+        __ LoadSFromOffset(out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      FRegister out = locations->Out().AsFpuRegister<FRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadDFromOffset(out, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
+        __ Addu(TMP, obj, TMP);
+        __ LoadDFromOffset(out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      UNREACHABLE();
+  }
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+}
+
+void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+  __ LoadFromOffset(kLoadWord, out, obj, offset);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+}
+
+void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool is_object = value_type == Primitive::kPrimNot;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (is_object) {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+      locations->SetInAt(2, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(2, Location::RequiresRegister());
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location index = locations->InAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_runtime_call = locations->WillCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ StoreToOffset(kStoreByte, value, obj, offset);
+      } else {
+        __ Addu(TMP, obj, index.AsRegister<Register>());
+        __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
+        __ Addu(TMP, obj, TMP);
+        __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      if (!needs_runtime_call) {
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        Register value = locations->InAt(2).AsRegister<Register>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ StoreToOffset(kStoreWord, value, obj, offset);
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+          __ Addu(TMP, obj, TMP);
+          __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+        }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (needs_write_barrier) {
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          codegen_->MarkGCCard(obj, value);
+        }
+      } else {
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                instruction,
+                                instruction->GetDexPc(),
+                                nullptr,
+                                IsDirectEntrypoint(kQuickAputObject));
+        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegisterPairLow<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
+        __ Addu(TMP, obj, TMP);
+        __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      FRegister value = locations->InAt(2).AsFpuRegister<FRegister>();
+      DCHECK(locations->InAt(2).IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreSToOffset(value, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(TMP, obj, TMP);
+        __ StoreSToOffset(value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      FRegister value = locations->InAt(2).AsFpuRegister<FRegister>();
+      DCHECK(locations->InAt(2).IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreDToOffset(value, obj, offset);
+      } else {
+        __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
+        __ Addu(TMP, obj, TMP);
+        __ StoreDToOffset(value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      UNREACHABLE();
+  }
+
+  // Ints and objects are handled in the switch.
+  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  BoundsCheckSlowPathMIPS* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  Register index = locations->InAt(0).AsRegister<Register>();
+  Register length = locations->InAt(1).AsRegister<Register>();
+
+  // length is limited by the maximum positive signed 32-bit integer.
+  // Unsigned comparison of length and index checks for index < 0
+  // and for length <= index simultaneously.
+  __ Bgeu(index, length, slow_path->GetEntryLabel());
+}
+
+void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathMIPS uses this register too.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register cls = locations->InAt(1).AsRegister<Register>();
+  Register obj_cls = locations->GetTemp(0).AsRegister<Register>();
+
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ Beqz(obj, slow_path->GetExitLabel());
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+  __ Bne(obj_cls, cls, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (check->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) {
+  // We assume the class is not null.
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+      check->GetLoadClass(),
+      check,
+      check->GetDexPc(),
+      true);
+  codegen_->AddSlowPath(slow_path);
+  GenerateClassInitializationCheck(slow_path,
+                                   check->GetLocations()->InAt(0).AsRegister<Register>());
+}
+
+void LocationsBuilderMIPS::VisitCompare(HCompare* compare) {
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
+
+  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+
+  switch (in_type) {
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      // Output overlaps because it is written before doing the low comparison.
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Primitive::Type in_type = instruction->InputAt(0)->GetType();
+
+  //  0 if: left == right
+  //  1 if: left  > right
+  // -1 if: left  < right
+  switch (in_type) {
+    case Primitive::kPrimLong: {
+      MipsLabel done;
+      Register res = locations->Out().AsRegister<Register>();
+      Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register lhs_low  = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register rhs_low  = locations->InAt(1).AsRegisterPairLow<Register>();
+      // TODO: more efficient (direct) comparison with a constant.
+      __ Slt(TMP, lhs_high, rhs_high);
+      __ Slt(AT, rhs_high, lhs_high);  // Inverted: is actually gt.
+      __ Subu(res, AT, TMP);           // Result -1:1:0 for [ <, >, == ].
+      __ Bnez(res, &done);             // If we compared ==, check if lower bits are also equal.
+      __ Sltu(TMP, lhs_low, rhs_low);
+      __ Sltu(AT, rhs_low, lhs_low);   // Inverted: is actually gt.
+      __ Subu(res, AT, TMP);           // Result -1:1:0 for [ <, >, == ].
+      __ Bind(&done);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int32_t entry_point_offset;
+      bool direct;
+      if (in_type == Primitive::kPrimFloat) {
+        if (instruction->IsGtBias()) {
+          entry_point_offset = QUICK_ENTRY_POINT(pCmpgFloat);
+          direct = IsDirectEntrypoint(kQuickCmpgFloat);
+        } else {
+          entry_point_offset = QUICK_ENTRY_POINT(pCmplFloat);
+          direct = IsDirectEntrypoint(kQuickCmplFloat);
+        }
+      } else {
+        if (instruction->IsGtBias()) {
+          entry_point_offset = QUICK_ENTRY_POINT(pCmpgDouble);
+          direct = IsDirectEntrypoint(kQuickCmpgDouble);
+        } else {
+          entry_point_offset = QUICK_ENTRY_POINT(pCmplDouble);
+          direct = IsDirectEntrypoint(kQuickCmplDouble);
+        }
+      }
+      codegen_->InvokeRuntime(entry_point_offset,
+                              instruction,
+                              instruction->GetDexPc(),
+                              nullptr,
+                              direct);
+      if (in_type == Primitive::kPrimFloat) {
+        if (instruction->IsGtBias()) {
+          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
+        } else {
+          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
+        }
+      } else {
+        if (instruction->IsGtBias()) {
+          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
+        } else {
+          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
+        }
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << in_type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (instruction->NeedsMaterialization()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) {
+  if (!instruction->NeedsMaterialization()) {
+    return;
+  }
+  // TODO: generalize to long
+  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Register dst = locations->Out().AsRegister<Register>();
+
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+
+  Register rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  IfCondition if_cond = instruction->GetCondition();
+
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        if (if_cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (if_cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      // Use sltiu instruction if rhs_imm is in range [0, 32767] or in
+      // [max_unsigned - 32767 = 0xffff8000, max_unsigned = 0xffffffff].
+      if (use_imm &&
+          (IsUint<15>(rhs_imm) ||
+              IsUint<15>(rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
+        if (IsUint<15>(rhs_imm)) {
+          __ Sltiu(dst, lhs, rhs_imm);
+        } else {
+          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
+          // and then used as unsigned integer (range [0xffff8000, 0xffffffff]).
+          __ Sltiu(dst, lhs, rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      // Use sltiu instruction if rhs_imm is in range [0, 32766] or in
+      // [max_unsigned - 32767 - 1 = 0xffff7fff, max_unsigned - 1 = 0xfffffffe].
+      // lhs <= rhs is simulated via lhs < rhs + 1.
+      if (use_imm && (rhs_imm != -1) &&
+          (IsUint<15>(rhs_imm + 1) ||
+              IsUint<15>(rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
+        if (IsUint<15>(rhs_imm + 1)) {
+          // Simulate lhs <= rhs via lhs < rhs + 1.
+          __ Sltiu(dst, lhs, rhs_imm + 1);
+        } else {
+          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
+          // and then used as unsigned integer (range [0xffff8000, 0xffffffff] where rhs_imm
+          // is in range [0xffff7fff, 0xfffffffe] since lhs <= rhs is simulated via lhs < rhs + 1).
+          __ Sltiu(dst, lhs, rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
+        }
+        if (if_cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (if_cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void LocationsBuilderMIPS::VisitDiv(HDiv* div) {
+  Primitive::Type type = div->GetResultType();
+  LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected div type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Register rhs = locations->InAt(1).AsRegister<Register>();
+      if (isR6) {
+        __ DivR6(dst, lhs, rhs);
+      } else {
+        __ DivR2(dst, lhs, rhs);
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv),
+                              instruction,
+                              instruction->GetDexPc(),
+                              nullptr,
+                              IsDirectEntrypoint(kQuickLdiv));
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      if (type == Primitive::kPrimFloat) {
+        __ DivS(dst, lhs, rhs);
+      } else {
+        __ DivD(dst, lhs, rhs);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected div type " << type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathMIPS(instruction);
+  codegen_->AddSlowPath(slow_path);
+  Location value = instruction->GetLocations()->InAt(0);
+  Primitive::Type type = instruction->GetType();
+
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt: {
+      if (value.IsConstant()) {
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+          __ B(slow_path->GetEntryLabel());
+        } else {
+          // A division by a non-null constant is valid. We don't need to perform
+          // any check, so simply fall through.
+        }
+      } else {
+        DCHECK(value.IsRegister()) << value;
+        __ Beqz(value.AsRegister<Register>(), slow_path->GetEntryLabel());
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (value.IsConstant()) {
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ B(slow_path->GetEntryLabel());
+        } else {
+          // A division by a non-null constant is valid. We don't need to perform
+          // any check, so simply fall through.
+        }
+      } else {
+        DCHECK(value.IsRegisterPair()) << value;
+        __ Or(TMP, value.AsRegisterPairHigh<Register>(), value.AsRegisterPairLow<Register>());
+        __ Beqz(TMP, slow_path->GetEntryLabel());
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+  }
+}
+
+void LocationsBuilderMIPS::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS::VisitDoubleConstant(HDoubleConstant* cst ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS::VisitExit(HExit* exit) {
+  exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
+}
+
+void LocationsBuilderMIPS::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS::VisitGoto(HGoto* got) {
+  got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) {
+  DCHECK(!successor->IsExitBlock());
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+  HLoopInformation* info = block->GetLoopInformation();
+
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(block, successor)) {
+    __ B(codegen_->GetLabelOf(successor));
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitGoto(HGoto* got) {
+  HandleGoto(got, got->GetSuccessor());
+}
+
+void LocationsBuilderMIPS::VisitTryBoundary(HTryBoundary* try_boundary) {
+  try_boundary->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitTryBoundary(HTryBoundary* try_boundary) {
+  HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
+  if (!successor->IsExitBlock()) {
+    HandleGoto(try_boundary, successor);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
+                                                         MipsLabel* true_target,
+                                                         MipsLabel* false_target,
+                                                         MipsLabel* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
+  HCondition* condition = cond->AsCondition();
+
+  if (cond->IsIntConstant()) {
+    int32_t cond_value = cond->AsIntConstant()->GetValue();
+    if (cond_value == 1) {
+      if (always_true_target != nullptr) {
+        __ B(always_true_target);
+      }
+      return;
+    } else {
+      DCHECK_EQ(cond_value, 0);
+    }
+  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    // The condition instruction has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(0);
+    DCHECK(cond_val.IsRegister());
+    __ Bnez(cond_val.AsRegister<Register>(), true_target);
+  } else {
+    // The condition instruction has not been materialized, use its inputs as
+    // the comparison and its condition as the branch condition.
+    Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>();
+    Location rhs_location = condition->GetLocations()->InAt(1);
+    Register rhs_reg = ZERO;
+    int32_t rhs_imm = 0;
+    bool use_imm = rhs_location.IsConstant();
+    if (use_imm) {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_reg = rhs_location.AsRegister<Register>();
+    }
+
+    IfCondition if_cond = condition->GetCondition();
+    if (use_imm && rhs_imm == 0) {
+      switch (if_cond) {
+        case kCondEQ:
+          __ Beqz(lhs, true_target);
+          break;
+        case kCondNE:
+          __ Bnez(lhs, true_target);
+          break;
+        case kCondLT:
+          __ Bltz(lhs, true_target);
+          break;
+        case kCondGE:
+          __ Bgez(lhs, true_target);
+          break;
+        case kCondLE:
+          __ Blez(lhs, true_target);
+          break;
+        case kCondGT:
+          __ Bgtz(lhs, true_target);
+          break;
+        case kCondB:
+          break;  // always false
+        case kCondBE:
+          __ Beqz(lhs, true_target);  // <= 0 if zero
+          break;
+        case kCondA:
+          __ Bnez(lhs, true_target);  // > 0 if non-zero
+          break;
+        case kCondAE:
+          __ B(true_target);  // always true
+          break;
+      }
+    } else {
+      if (use_imm) {
+        // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
+        rhs_reg = TMP;
+        __ LoadConst32(rhs_reg, rhs_imm);
+      }
+      switch (if_cond) {
+        case kCondEQ:
+          __ Beq(lhs, rhs_reg, true_target);
+          break;
+        case kCondNE:
+          __ Bne(lhs, rhs_reg, true_target);
+          break;
+        case kCondLT:
+          __ Blt(lhs, rhs_reg, true_target);
+          break;
+        case kCondGE:
+          __ Bge(lhs, rhs_reg, true_target);
+          break;
+        case kCondLE:
+          __ Bge(rhs_reg, lhs, true_target);
+          break;
+        case kCondGT:
+          __ Blt(rhs_reg, lhs, true_target);
+          break;
+        case kCondB:
+          __ Bltu(lhs, rhs_reg, true_target);
+          break;
+        case kCondAE:
+          __ Bgeu(lhs, rhs_reg, true_target);
+          break;
+        case kCondBE:
+          __ Bgeu(rhs_reg, lhs, true_target);
+          break;
+        case kCondA:
+          __ Bltu(rhs_reg, lhs, true_target);
+          break;
+      }
+    }
+  }
+  if (false_target != nullptr) {
+    __ B(false_target);
+  }
+}
+
+void LocationsBuilderMIPS::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
+  MipsLabel* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  MipsLabel* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  MipsLabel* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
+  }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathMIPS(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  MipsLabel* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
+void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
+  bool generate_volatile = field_info.IsVolatile() && is_wide;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (generate_volatile) {
+    InvokeRuntimeCallingConvention calling_convention;
+    // need A0 to hold base + offset
+    locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    if (field_type == Primitive::kPrimLong) {
+      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong));
+    } else {
+      locations->SetOut(Location::RequiresFpuRegister());
+      // Need some temp core regs since FP results are returned in core registers
+      Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong);
+      locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>()));
+      locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairHigh<Register>()));
+    }
+  } else {
+    if (Primitive::IsFloatingPointType(instruction->GetType())) {
+      locations->SetOut(Location::RequiresFpuRegister());
+    } else {
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
+                                                  const FieldInfo& field_info,
+                                                  uint32_t dex_pc) {
+  Primitive::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  LoadOperandType load_type = kLoadUnsignedByte;
+  bool is_volatile = field_info.IsVolatile();
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+      load_type = kLoadUnsignedByte;
+      break;
+    case Primitive::kPrimByte:
+      load_type = kLoadSignedByte;
+      break;
+    case Primitive::kPrimShort:
+      load_type = kLoadSignedHalfword;
+      break;
+    case Primitive::kPrimChar:
+      load_type = kLoadUnsignedHalfword;
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimNot:
+      load_type = kLoadWord;
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      load_type = kLoadDoubleword;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+
+  if (is_volatile && load_type == kLoadDoubleword) {
+    InvokeRuntimeCallingConvention calling_convention;
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
+               obj, field_info.GetFieldOffset().Uint32Value());
+    // Do implicit Null check
+    __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Load),
+                            instruction,
+                            dex_pc,
+                            nullptr,
+                            IsDirectEntrypoint(kQuickA64Load));
+    CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
+    if (type == Primitive::kPrimDouble) {
+      // Need to move to FP regs since FP results are returned in core registers.
+      __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
+              locations->Out().AsFpuRegister<FRegister>());
+      __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
+               locations->Out().AsFpuRegister<FRegister>());
+    }
+  } else {
+    if (!Primitive::IsFloatingPointType(type)) {
+      Register dst;
+      if (type == Primitive::kPrimLong) {
+        DCHECK(locations->Out().IsRegisterPair());
+        dst = locations->Out().AsRegisterPairLow<Register>();
+      } else {
+        DCHECK(locations->Out().IsRegister());
+        dst = locations->Out().AsRegister<Register>();
+      }
+      __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    } else {
+      DCHECK(locations->Out().IsFpuRegister());
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      if (type == Primitive::kPrimFloat) {
+        __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+      } else {
+        __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+      }
+    }
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  }
+}
+
+void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
+  bool generate_volatile = field_info.IsVolatile() && is_wide;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (generate_volatile) {
+    InvokeRuntimeCallingConvention calling_convention;
+    // need A0 to hold base + offset
+    locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    if (field_type == Primitive::kPrimLong) {
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+    } else {
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      // Pass FP parameters in core registers.
+      locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+    }
+  } else {
+    if (Primitive::IsFloatingPointType(field_type)) {
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(1, Location::RequiresRegister());
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
+                                                  const FieldInfo& field_info,
+                                                  uint32_t dex_pc) {
+  Primitive::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  StoreOperandType store_type = kStoreByte;
+  bool is_volatile = field_info.IsVolatile();
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      store_type = kStoreByte;
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      store_type = kStoreHalfword;
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimNot:
+      store_type = kStoreWord;
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      store_type = kStoreDoubleword;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+  }
+
+  if (is_volatile && store_type == kStoreDoubleword) {
+    InvokeRuntimeCallingConvention calling_convention;
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
+               obj, field_info.GetFieldOffset().Uint32Value());
+    // Do implicit Null check.
+    __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+    if (type == Primitive::kPrimDouble) {
+      // Pass FP parameters in core registers.
+      __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
+              locations->InAt(1).AsFpuRegister<FRegister>());
+      __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
+               locations->InAt(1).AsFpuRegister<FRegister>());
+    }
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
+                            instruction,
+                            dex_pc,
+                            nullptr,
+                            IsDirectEntrypoint(kQuickA64Store));
+    CheckEntrypointTypes<kQuickA64Store, void, volatile int64_t *, int64_t>();
+  } else {
+    if (!Primitive::IsFloatingPointType(type)) {
+      Register src;
+      if (type == Primitive::kPrimLong) {
+        DCHECK(locations->InAt(1).IsRegisterPair());
+        src = locations->InAt(1).AsRegisterPairLow<Register>();
+      } else {
+        DCHECK(locations->InAt(1).IsRegister());
+        src = locations->InAt(1).AsRegister<Register>();
+      }
+      __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+    } else {
+      DCHECK(locations->InAt(1).IsFpuRegister());
+      FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
+      if (type == Primitive::kPrimFloat) {
+        __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+      } else {
+        __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+      }
+    }
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+
+  // TODO: memory barriers?
+  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+    DCHECK(locations->InAt(1).IsRegister());
+    Register src = locations->InAt(1).AsRegister<Register>();
+    codegen_->MarkGCCard(obj, src);
+  }
+
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  }
+}
+
+void LocationsBuilderMIPS::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
+}
+
+void LocationsBuilderMIPS::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
+}
+
+void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind =
+      instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The output does overlap inputs.
+  // Note that TypeCheckSlowPathMIPS uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register cls = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  MipsLabel done;
+
+  // Return 0 if `obj` is null.
+  // TODO: Avoid this check if we know `obj` is not null.
+  __ Move(out, ZERO);
+  __ Beqz(obj, &done);
+
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+  if (instruction->IsExactCheck()) {
+    // Classes must be equal for the instanceof to succeed.
+    __ Xor(out, out, cls);
+    __ Sltiu(out, out, 1);
+  } else {
+    // If the classes are not equal, we go into a slow path.
+    DCHECK(locations->OnlyCallsOnSlowPath());
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
+    codegen_->AddSlowPath(slow_path);
+    __ Bne(out, cls, slow_path->GetEntryLabel());
+    __ LoadConst32(out, 1);
+    __ Bind(slow_path->GetExitLabel());
+  }
+
+  __ Bind(&done);
+}
+
+void LocationsBuilderMIPS::VisitIntConstant(HIntConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS::HandleInvoke(HInvoke* invoke) {
+  InvokeDexCallingConventionVisitorMIPS calling_convention_visitor;
+  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+}
+
+void LocationsBuilderMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
+  HandleInvoke(invoke);
+  // The register T0 is required to be used for the hidden argument in
+  // art_quick_imt_conflict_trampoline, so add the hidden argument.
+  invoke->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
+  Location receiver = invoke->GetLocations()->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+
+  // Set the hidden argument.
+  __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
+                 invoke->GetDexMethodIndex());
+
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+    __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
+  } else {
+    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  }
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // temp = temp->GetImtEntryAt(method_offset);
+  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
+  // T9 = temp->GetEntryPoint();
+  __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
+  // T9();
+  __ Jalr(T9);
+  __ Nop();
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
+  IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
+  HandleInvoke(invoke);
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorMIPS intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  switch (desired_dispatch_info.method_load_kind) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0u
+      };
+    default:
+      break;
+  }
+  switch (desired_dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement these types. For the moment, we fall back to kCallArtMethod.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return desired_dispatch_info;
+  }
+}
+
+void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // All registers are assumed to be correctly set up per the calling convention.
+
+  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      // temp = thread->string_init_entrypoint
+      __ LoadFromOffset(kLoadWord,
+                        temp.AsRegister<Register>(),
+                        TR,
+                        invoke->GetStringInitOffset());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement these types.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Register reg = temp.AsRegister<Register>();
+      Register method_reg;
+      if (current_method.IsRegister()) {
+        method_reg = current_method.AsRegister<Register>();
+      } else {
+        // TODO: use the appropriate DCHECK() here if possible.
+        // DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = reg;
+        __ Lw(reg, SP, kCurrentMethodStackOffset);
+      }
+
+      // temp = temp->dex_cache_resolved_methods_;
+      __ LoadFromOffset(kLoadWord,
+                        reg,
+                        method_reg,
+                        ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value());
+      // temp = temp[index_in_cache]
+      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      __ LoadFromOffset(kLoadWord,
+                        reg,
+                        reg,
+                        CodeGenerator::GetCachePointerOffset(index_in_cache));
+      break;
+    }
+  }
+
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ Jalr(&frame_entry_label_, T9);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR = invoke->GetDirectCodePtr();
+      __ LoadConst32(T9, invoke->GetDirectCodePtr());
+      // LR()
+      __ Jalr(T9);
+      __ Nop();
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement these types.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // T9 = callee_method->entry_point_from_quick_compiled_code_;
+      __ LoadFromOffset(kLoadWord,
+                        T9,
+                        callee_method.AsRegister<Register>(),
+                        ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                            kMipsWordSize).Int32Value());
+      // T9()
+      __ Jalr(T9);
+      __ Nop();
+      break;
+  }
+  DCHECK(!IsLeafMethod());
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  codegen_->GenerateStaticOrDirectCall(invoke,
+                                       locations->HasTemps()
+                                           ? locations->GetTemp(0)
+                                           : Location::NoLocation());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+      invoke->GetVTableIndex(), kMipsPointerSize).SizeValue();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
+
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+    __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
+  } else {
+    DCHECK(receiver.IsRegister());
+    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  }
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // temp = temp->GetMethodAt(method_offset);
+  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
+  // T9 = temp->GetEntryPoint();
+  __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
+  // T9();
+  __ Jalr(T9);
+  __ Nop();
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
+  InvokeRuntimeCallingConvention calling_convention;
+  CodeGenerator::CreateLoadClassLocationSummary(
+      cls,
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+      Location::RegisterLocation(V0));
+}
+
+void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary* locations = cls->GetLocations();
+  if (cls->NeedsAccessCheck()) {
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
+                            cls,
+                            cls->GetDexPc(),
+                            nullptr,
+                            IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+    return;
+  }
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register current_method = locations->InAt(0).AsRegister<Register>();
+  if (cls->IsReferrersClass()) {
+    DCHECK(!cls->CanCallRuntime());
+    DCHECK(!cls->MustGenerateClinitCheck());
+    __ LoadFromOffset(kLoadWord, out, current_method,
+                      ArtMethod::DeclaringClassOffset().Int32Value());
+  } else {
+    DCHECK(cls->CanCallRuntime());
+    __ LoadFromOffset(kLoadWord, out, current_method,
+                      ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
+    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+        cls,
+        cls,
+        cls->GetDexPc(),
+        cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    __ Beqz(out, slow_path->GetEntryLabel());
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
+    }
+  }
+}
+
+static int32_t GetExceptionTlsOffset() {
+  return Thread::ExceptionOffset<kMipsWordSize>().Int32Value();
+}
+
+void LocationsBuilderMIPS::VisitLoadException(HLoadException* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitLoadException(HLoadException* load) {
+  Register out = load->GetLocations()->Out().AsRegister<Register>();
+  __ LoadFromOffset(kLoadWord, out, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderMIPS::VisitClearException(HClearException* clear) {
+  new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
+}
+
+void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+  __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderMIPS::VisitLoadLocal(HLoadLocal* load) {
+  load->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = load->GetLocations();
+  Register out = locations->Out().AsRegister<Register>();
+  Register current_method = locations->InAt(0).AsRegister<Register>();
+  __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  __ Beqz(out, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitLocal(HLocal* local) {
+  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+}
+
+void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorMIPS::VisitMonitorOperation(HMonitorOperation* instruction) {
+  if (instruction->IsEnter()) {
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLockObject),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr,
+                            IsDirectEntrypoint(kQuickLockObject));
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pUnlockObject),
+                            instruction,
+                            instruction->GetDexPc(),
+                            nullptr,
+                            IsDirectEntrypoint(kQuickUnlockObject));
+  }
+  CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+}
+
+void LocationsBuilderMIPS::VisitMul(HMul* mul) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
+  switch (mul->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitMul(HMul* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Register rhs = locations->InAt(1).AsRegister<Register>();
+
+      if (isR6) {
+        __ MulR6(dst, lhs, rhs);
+      } else {
+        __ MulR2(dst, lhs, rhs);
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+      Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+      Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+      Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>();
+
+      // Extra checks to protect caused by the existance of A1_A2.
+      // The algorithm is wrong if dst_high is either lhs_lo or rhs_lo:
+      // (e.g. lhs=a0_a1, rhs=a2_a3 and dst=a1_a2).
+      DCHECK_NE(dst_high, lhs_low);
+      DCHECK_NE(dst_high, rhs_low);
+
+      // A_B * C_D
+      // dst_hi:  [ low(A*D) + low(B*C) + hi(B*D) ]
+      // dst_lo:  [ low(B*D) ]
+      // Note: R2 and R6 MUL produce the low 32 bit of the multiplication result.
+
+      if (isR6) {
+        __ MulR6(TMP, lhs_high, rhs_low);
+        __ MulR6(dst_high, lhs_low, rhs_high);
+        __ Addu(dst_high, dst_high, TMP);
+        __ MuhuR6(TMP, lhs_low, rhs_low);
+        __ Addu(dst_high, dst_high, TMP);
+        __ MulR6(dst_low, lhs_low, rhs_low);
+      } else {
+        __ MulR2(TMP, lhs_high, rhs_low);
+        __ MulR2(dst_high, lhs_low, rhs_high);
+        __ Addu(dst_high, dst_high, TMP);
+        __ MultuR2(lhs_low, rhs_low);
+        __ Mfhi(TMP);
+        __ Addu(dst_high, dst_high, TMP);
+        __ Mflo(dst_low);
+      }
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      if (type == Primitive::kPrimFloat) {
+        __ MulS(dst, lhs, rhs);
+      } else {
+        __ MulD(dst, lhs, rhs);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected mul type " << type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register src = locations->InAt(0).AsRegister<Register>();
+      __ Subu(dst, ZERO, src);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+      Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+      Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+      __ Subu(dst_low, ZERO, src_low);
+      __ Sltu(TMP, ZERO, dst_low);
+      __ Subu(dst_high, ZERO, src_high);
+      __ Subu(dst_high, dst_high, TMP);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+      if (type == Primitive::kPrimFloat) {
+        __ NegS(dst, src);
+      } else {
+        __ NegD(dst, src);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected neg type " << type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+}
+
+void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
+  InvokeRuntimeCallingConvention calling_convention;
+  Register current_method_register = calling_convention.GetRegisterAt(2);
+  __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
+  // Move an uint16_t value to a register.
+  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  codegen_->InvokeRuntime(
+      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+      instruction,
+      instruction->GetDexPc(),
+      nullptr,
+      IsDirectEntrypoint(kQuickAllocArrayWithAccessCheck));
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
+                       void*, uint32_t, int32_t, ArtMethod*>();
+}
+
+void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
+  InvokeRuntimeCallingConvention calling_convention;
+  Register current_method_register = calling_convention.GetRegisterAt(1);
+  __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
+  // Move an uint16_t value to a register.
+  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  codegen_->InvokeRuntime(
+      GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
+      instruction,
+      instruction->GetDexPc(),
+      nullptr,
+      IsDirectEntrypoint(kQuickAllocObjectWithAccessCheck));
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+}
+
+void LocationsBuilderMIPS::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS::VisitNot(HNot* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register src = locations->InAt(0).AsRegister<Register>();
+      __ Nor(dst, src, ZERO);
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+      Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+      Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+      __ Nor(dst_high, src_high, ZERO);
+      __ Nor(dst_low, src_low, ZERO);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+  }
+}
+
+void LocationsBuilderMIPS::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  __ Xori(locations->Out().AsRegister<Register>(),
+          locations->InAt(0).AsRegister<Register>(),
+          1);
+}
+
+void LocationsBuilderMIPS::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+    return;
+  }
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Lw(ZERO, obj.AsRegister<Register>(), 0);
+  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorMIPS::GenerateExplicitNullCheck(HNullCheck* instruction) {
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Beqz(obj.AsRegister<Register>(), slow_path->GetEntryLabel());
+}
+
+void InstructionCodeGeneratorMIPS::VisitNullCheck(HNullCheck* instruction) {
+  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
+    GenerateImplicitNullCheck(instruction);
+  } else {
+    GenerateExplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderMIPS::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorMIPS::VisitParallelMove(HParallelMove* instruction) {
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderMIPS::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorMIPS::VisitParameterValue(HParameterValue* instruction
+                                                         ATTRIBUTE_UNUSED) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderMIPS::VisitCurrentMethod(HCurrentMethod* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
+}
+
+void InstructionCodeGeneratorMIPS::VisitCurrentMethod(HCurrentMethod* instruction
+                                                        ATTRIBUTE_UNUSED) {
+  // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderMIPS::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorMIPS::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderMIPS::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind =
+      (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register dst = locations->Out().AsRegister<Register>();
+      Register lhs = locations->InAt(0).AsRegister<Register>();
+      Register rhs = locations->InAt(1).AsRegister<Register>();
+      if (isR6) {
+        __ ModR6(dst, lhs, rhs);
+      } else {
+        __ ModR2(dst, lhs, rhs);
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
+                              instruction,
+                              instruction->GetDexPc(),
+                              nullptr,
+                              IsDirectEntrypoint(kQuickLmod));
+      CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf),
+                              instruction, instruction->GetDexPc(),
+                              nullptr,
+                              IsDirectEntrypoint(kQuickFmodf));
+      CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      break;
+    }
+    case Primitive::kPrimDouble: {
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod),
+                              instruction, instruction->GetDexPc(),
+                              nullptr,
+                              IsDirectEntrypoint(kQuickFmod));
+      CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+  memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
+void LocationsBuilderMIPS::VisitReturn(HReturn* ret) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
+  Primitive::Type return_type = ret->InputAt(0)->GetType();
+  locations->SetInAt(0, MipsReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorMIPS::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderMIPS::VisitReturnVoid(HReturnVoid* ret) {
+  ret->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderMIPS::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorMIPS::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderMIPS::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderMIPS::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  Primitive::Type field_type = store->InputAt(1)->GetType();
+  switch (field_type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << field_type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
+}
+
+void LocationsBuilderMIPS::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
+}
+
+void LocationsBuilderMIPS::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
+}
+
+void LocationsBuilderMIPS::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(instruction,
+                                                 instruction->GetFieldType(),
+                                                 calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(instruction,
+                                                 instruction->GetFieldType(),
+                                                 calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(instruction,
+                                                 instruction->GetFieldType(),
+                                                 calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(instruction,
+                                                 instruction->GetFieldType(),
+                                                 calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
+  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void LocationsBuilderMIPS::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorMIPS::VisitThrow(HThrow* instruction) {
+  codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
+                          instruction,
+                          instruction->GetDexPc(),
+                          nullptr,
+                          IsDirectEntrypoint(kQuickDeliverException));
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
+  Primitive::Type input_type = conversion->GetInputType();
+  Primitive::Type result_type = conversion->GetResultType();
+  DCHECK_NE(input_type, result_type);
+
+  if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
+      (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
+    LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+  }
+
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+    call_kind = LocationSummary::kCall;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
+  if (call_kind == LocationSummary::kNoCall) {
+    if (Primitive::IsFloatingPointType(input_type)) {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(0, Location::RequiresRegister());
+    }
+
+    if (Primitive::IsFloatingPointType(result_type)) {
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+    } else {
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    }
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+
+    if (Primitive::IsFloatingPointType(input_type)) {
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+    } else {
+      DCHECK_EQ(input_type, Primitive::kPrimLong);
+      locations->SetInAt(0, Location::RegisterPairLocation(
+                 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+    }
+
+    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+  }
+}
+
+void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversion) {
+  LocationSummary* locations = conversion->GetLocations();
+  Primitive::Type result_type = conversion->GetResultType();
+  Primitive::Type input_type = conversion->GetInputType();
+  bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+
+  DCHECK_NE(input_type, result_type);
+
+  if (result_type == Primitive::kPrimLong && Primitive::IsIntegralType(input_type)) {
+    Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+    Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+    Register src = locations->InAt(0).AsRegister<Register>();
+
+    __ Move(dst_low, src);
+    __ Sra(dst_high, src, 31);
+  } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    Register dst = locations->Out().AsRegister<Register>();
+    Register src = (input_type == Primitive::kPrimLong)
+        ? locations->InAt(0).AsRegisterPairLow<Register>()
+        : locations->InAt(0).AsRegister<Register>();
+
+    switch (result_type) {
+      case Primitive::kPrimChar:
+        __ Andi(dst, src, 0xFFFF);
+        break;
+      case Primitive::kPrimByte:
+        if (has_sign_extension) {
+          __ Seb(dst, src);
+        } else {
+          __ Sll(dst, src, 24);
+          __ Sra(dst, dst, 24);
+        }
+        break;
+      case Primitive::kPrimShort:
+        if (has_sign_extension) {
+          __ Seh(dst, src);
+        } else {
+          __ Sll(dst, src, 16);
+          __ Sra(dst, dst, 16);
+        }
+        break;
+      case Primitive::kPrimInt:
+        __ Move(dst, src);
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type conversion from " << input_type
+                   << " to " << result_type;
+    }
+  } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
+    if (input_type != Primitive::kPrimLong) {
+      Register src = locations->InAt(0).AsRegister<Register>();
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      __ Mtc1(src, FTMP);
+      if (result_type == Primitive::kPrimFloat) {
+        __ Cvtsw(dst, FTMP);
+      } else {
+        __ Cvtdw(dst, FTMP);
+      }
+    } else {
+      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                    : QUICK_ENTRY_POINT(pL2d);
+      bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+                                                           : IsDirectEntrypoint(kQuickL2d);
+      codegen_->InvokeRuntime(entry_offset,
+                              conversion,
+                              conversion->GetDexPc(),
+                              nullptr,
+                              direct);
+      if (result_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      } else {
+        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      }
+    }
+  } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
+    CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+    int32_t entry_offset;
+    bool direct;
+    if (result_type != Primitive::kPrimLong) {
+      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
+                                                           : QUICK_ENTRY_POINT(pD2iz);
+      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
+                                                      : IsDirectEntrypoint(kQuickD2iz);
+    } else {
+      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                           : QUICK_ENTRY_POINT(pD2l);
+      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+                                                      : IsDirectEntrypoint(kQuickD2l);
+    }
+    codegen_->InvokeRuntime(entry_offset,
+                            conversion,
+                            conversion->GetDexPc(),
+                            nullptr,
+                            direct);
+    if (result_type != Primitive::kPrimLong) {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+      }
+    } else {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+      }
+    }
+  } else if (Primitive::IsFloatingPointType(result_type) &&
+             Primitive::IsFloatingPointType(input_type)) {
+    FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+    FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+    if (result_type == Primitive::kPrimFloat) {
+      __ Cvtsd(dst, src);
+    } else {
+      __ Cvtds(dst, src);
+    }
+  } else {
+    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+                << " to " << result_type;
+  }
+}
+
+void LocationsBuilderMIPS::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void LocationsBuilderMIPS::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorMIPS::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderMIPS::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
+  DCHECK(codegen_->IsBaseline());
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant()));
+}
+
+void InstructionCodeGeneratorMIPS::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) {
+  DCHECK(codegen_->IsBaseline());
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Create a set of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int32_t i = 0; i < num_entries; ++i) {
+    int32_t case_value = lower_bound + i;
+    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
+    if (case_value == 0) {
+      __ Beqz(value_reg, successor_label);
+    } else {
+      __ LoadConst32(TMP, case_value);
+      __ Beq(value_reg, TMP, successor_label);
+    }
+  }
+
+  // Insert the default branch for every other value.
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+    __ B(codegen_->GetLabelOf(default_block));
+  }
+}
+
+void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
+  // The trampoline uses the same calling convention as dex calling conventions,
+  // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+  // the method_idx.
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
+  codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+}
+
+#undef __
+#undef QUICK_ENTRY_POINT
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
new file mode 100644
index 0000000..059131d
--- /dev/null
+++ b/compiler/optimizing/code_generator_mips.h
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
+
+#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
+#include "nodes.h"
+#include "parallel_move_resolver.h"
+#include "utils/mips/assembler_mips.h"
+
+namespace art {
+namespace mips {
+
+// InvokeDexCallingConvention registers
+
+static constexpr Register kParameterCoreRegisters[] =
+    { A1, A2, A3 };
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+static constexpr FRegister kParameterFpuRegisters[] =
+    { F12, F14 };
+static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+
+
+// InvokeRuntimeCallingConvention registers
+
+static constexpr Register kRuntimeParameterCoreRegisters[] =
+    { A0, A1, A2, A3 };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+static constexpr FRegister kRuntimeParameterFpuRegisters[] =
+    { F12, F14};
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
+
+
+static constexpr Register kCoreCalleeSaves[] =
+    { S0, S1, S2, S3, S4, S5, S6, S7, FP, RA };
+static constexpr FRegister kFpuCalleeSaves[] =
+    { F20, F22, F24, F26, F28, F30 };
+
+
+class CodeGeneratorMIPS;
+
+class InvokeDexCallingConvention : public CallingConvention<Register, FRegister> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFpuRegisters,
+                          kParameterFpuRegistersLength,
+                          kMipsPointerSize) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitorMIPS : public InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitorMIPS() {}
+  virtual ~InvokeDexCallingConventionVisitorMIPS() {}
+
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
+  Location GetMethodLocation() const OVERRIDE;
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorMIPS);
+};
+
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, FRegister> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength,
+                          kMipsPointerSize) {}
+
+  Location GetReturnLocation(Primitive::Type return_type);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+class FieldAccessCallingConventionMIPS : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionMIPS() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return Location::RegisterLocation(A1);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return Location::RegisterLocation(A0);
+  }
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterPairLocation(V0, V1)
+        : Location::RegisterLocation(V0);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterPairLocation(A2, A3)
+        : (is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1));
+  }
+  Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return Location::FpuRegisterLocation(F0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionMIPS);
+};
+
+class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap {
+ public:
+  ParallelMoveResolverMIPS(ArenaAllocator* allocator, CodeGeneratorMIPS* codegen)
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+  void EmitMove(size_t index) OVERRIDE;
+  void EmitSwap(size_t index) OVERRIDE;
+  void SpillScratch(int reg) OVERRIDE;
+  void RestoreScratch(int reg) OVERRIDE;
+
+  void Exchange(int index1, int index2, bool double_slot);
+
+  MipsAssembler* GetAssembler() const;
+
+ private:
+  CodeGeneratorMIPS* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverMIPS);
+};
+
+class SlowPathCodeMIPS : public SlowPathCode {
+ public:
+  SlowPathCodeMIPS() : entry_label_(), exit_label_() {}
+
+  MipsLabel* GetEntryLabel() { return &entry_label_; }
+  MipsLabel* GetExitLabel() { return &exit_label_; }
+
+ private:
+  MipsLabel entry_label_;
+  MipsLabel exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS);
+};
+
+class LocationsBuilderMIPS : public HGraphVisitor {
+ public:
+  LocationsBuilderMIPS(HGraph* graph, CodeGeneratorMIPS* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr) OVERRIDE;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void VisitInstruction(HInstruction* instruction) OVERRIDE {
+    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
+               << " (id " << instruction->GetId() << ")";
+  }
+
+ private:
+  void HandleInvoke(HInvoke* invoke);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  InvokeDexCallingConventionVisitorMIPS parameter_visitor_;
+
+  CodeGeneratorMIPS* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
+};
+
+class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+ public:
+  InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr) OVERRIDE;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void VisitInstruction(HInstruction* instruction) OVERRIDE {
+    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
+               << " (id " << instruction->GetId() << ")";
+  }
+
+  MipsAssembler* GetAssembler() const { return assembler_; }
+
+ private:
+  void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             MipsLabel* true_target,
+                             MipsLabel* false_target,
+                             MipsLabel* always_true_target);
+  void HandleGoto(HInstruction* got, HBasicBlock* successor);
+
+  MipsAssembler* const assembler_;
+  CodeGeneratorMIPS* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorMIPS);
+};
+
+class CodeGeneratorMIPS : public CodeGenerator {
+ public:
+  CodeGeneratorMIPS(HGraph* graph,
+                    const MipsInstructionSetFeatures& isa_features,
+                    const CompilerOptions& compiler_options,
+                    OptimizingCompilerStats* stats = nullptr);
+  virtual ~CodeGeneratorMIPS() {}
+
+  void GenerateFrameEntry() OVERRIDE;
+  void GenerateFrameExit() OVERRIDE;
+
+  void Bind(HBasicBlock* block) OVERRIDE;
+
+  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+  void Move32(Location destination, Location source);
+  void Move64(Location destination, Location source);
+  void MoveConstant(Location location, HConstant* c);
+
+  size_t GetWordSize() const OVERRIDE { return kMipsWordSize; }
+
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMipsDoublewordSize; }
+
+  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+    return assembler_.GetLabelLocation(GetLabelOf(block));
+  }
+
+  HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+  HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+  MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; }
+  const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; }
+
+  void MarkGCCard(Register object, Register value);
+
+  // Register allocation.
+
+  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
+  // AllocateFreeRegister() is only used when allocating registers locally
+  // during CompileBaseline().
+  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+
+  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+
+  void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+  void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+  // Blocks all register pairs made out of blocked core registers.
+  void UpdateBlockedPairRegisters() const;
+
+  InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; }
+
+  const MipsInstructionSetFeatures& GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
+  MipsLabel* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<MipsLabel>(block_labels_, block);
+  }
+
+  void Initialize() OVERRIDE {
+    block_labels_ = CommonInitializeLabels<MipsLabel>();
+  }
+
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
+
+  // Code generation helpers.
+
+  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
+
+  void MoveConstant(Location destination, int32_t value);
+
+  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
+  // Generate code to invoke a runtime entry point.
+  void InvokeRuntime(QuickEntrypointEnum entrypoint,
+                     HInstruction* instruction,
+                     uint32_t dex_pc,
+                     SlowPathCode* slow_path) OVERRIDE;
+
+  void InvokeRuntime(int32_t offset,
+                     HInstruction* instruction,
+                     uint32_t dex_pc,
+                     SlowPathCode* slow_path,
+                     bool is_direct_entrypoint);
+
+  ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
+
+  bool NeedsTwoRegisters(Primitive::Type type) const {
+    return type == Primitive::kPrimLong;
+  }
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
+  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
+                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
+  }
+
+  void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
+                              Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
+  }
+
+ private:
+  // Labels for each block that will be compiled.
+  MipsLabel* block_labels_;
+  MipsLabel frame_entry_label_;
+  LocationsBuilderMIPS location_builder_;
+  InstructionCodeGeneratorMIPS instruction_visitor_;
+  ParallelMoveResolverMIPS move_resolver_;
+  MipsAssembler assembler_;
+  const MipsInstructionSetFeatures& isa_features_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 756336d..55efd5f 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -342,8 +342,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -450,13 +449,11 @@
 }
 
 void ParallelMoveResolverMIPS64::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
 }
 
 void ParallelMoveResolverMIPS64::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
 }
@@ -1780,6 +1777,9 @@
     return;
   }
 
+  // TODO: generalize to long
+  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
+
   LocationSummary* locations = instruction->GetLocations();
 
   GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
@@ -1857,6 +1857,48 @@
         }
       }
       break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) {
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Sltiu(dst, lhs, rhs_imm + 1);
+        if (if_cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (if_cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
   }
 }
 
@@ -2074,6 +2116,17 @@
         case kCondGT:
           __ Bgtzc(lhs, true_target);
           break;
+        case kCondB:
+          break;  // always false
+        case kCondBE:
+          __ Beqzc(lhs, true_target);  // <= 0 if zero
+          break;
+        case kCondA:
+          __ Bnezc(lhs, true_target);  // > 0 if non-zero
+          break;
+        case kCondAE:
+          __ B(true_target);  // always true
+          break;
       }
     } else {
       if (use_imm) {
@@ -2088,12 +2141,16 @@
           case kCondEQ:
           case kCondGE:
           case kCondLE:
+          case kCondBE:
+          case kCondAE:
             // if lhs == rhs for a positive condition, then it is a branch
             __ B(true_target);
             break;
           case kCondNE:
           case kCondLT:
           case kCondGT:
+          case kCondB:
+          case kCondA:
             // if lhs == rhs for a negative condition, then it is a NOP
             break;
         }
@@ -2117,6 +2174,18 @@
           case kCondGT:
             __ Bltc(rhs_reg, lhs, true_target);
             break;
+          case kCondB:
+            __ Bltuc(lhs, rhs_reg, true_target);
+            break;
+          case kCondAE:
+            __ Bgeuc(lhs, rhs_reg, true_target);
+            break;
+          case kCondBE:
+            __ Bgeuc(rhs_reg, lhs, true_target);
+            break;
+          case kCondA:
+            __ Bltuc(rhs_reg, lhs, true_target);
+            break;
         }
       }
     }
@@ -2153,8 +2222,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   HInstruction* cond = deoptimize->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
@@ -2460,6 +2528,37 @@
   return false;
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  switch (desired_dispatch_info.method_load_kind) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0u
+      };
+    default:
+      break;
+  }
+  switch (desired_dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement these types. For the moment, we fall back to kCallArtMethod.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return desired_dispatch_info;
+  }
+}
+
 void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // All registers are assumed to be correctly set up per the calling convention.
 
@@ -2479,13 +2578,11 @@
       __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-      // TODO: Implement this type. (Needs literal support.) At the moment, the
-      // CompilerDriver will not direct the backend to use this type for MIPS.
-      LOG(FATAL) << "Unsupported!";
-      UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+      // TODO: Implement these types.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
       GpuRegister reg = temp.AsRegister<GpuRegister>();
@@ -2525,12 +2622,12 @@
       // LR()
       __ Jalr(T9);
       break;
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement kCallPCRelative. For the moment, we fall back to kMethodCode.
-      FALLTHROUGH_INTENDED;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
-      // TODO: Implement kDirectCodeFixup. For the moment, we fall back to kMethodCode.
-      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement these types.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // T9 = callee_method->entry_point_from_quick_compiled_code_;
       __ LoadFromOffset(kLoadDoubleword,
@@ -3464,6 +3561,38 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
 void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
   DCHECK(codegen_->IsBaseline());
   LocationSummary* locations =
@@ -3494,7 +3623,7 @@
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   for (int32_t i = 0; i < num_entries; i++) {
     int32_t case_value = lower_bound + i;
-    Label* succ = codegen_->GetLabelOf(successors.at(i));
+    Label* succ = codegen_->GetLabelOf(successors[i]);
     if (case_value == 0) {
       __ Beqzc(value_reg, succ);
     } else {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 5e8f9e7..9bbd027 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -117,7 +117,7 @@
     return Location::RegisterLocation(A0);
   }
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
-    return Location::RegisterLocation(A0);
+    return Location::RegisterLocation(V0);
   }
   Location GetSetValueLocation(
       Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE {
@@ -217,9 +217,6 @@
   Mips64Assembler* GetAssembler() const { return assembler_; }
 
  private:
-  // Generate code for the given suspend check. If not null, `successor`
-  // is the block to branch to if the suspend check is not needed, and after
-  // the suspend call.
   void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
@@ -329,6 +326,12 @@
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
 
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
                            Location temp ATTRIBUTE_UNUSED) OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 5ef7de0..0df7e3b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -428,7 +428,7 @@
 #undef __
 #define __ down_cast<X86Assembler*>(GetAssembler())->
 
-inline Condition X86SignedCondition(IfCondition cond) {
+inline Condition X86Condition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return kEqual;
     case kCondNE: return kNotEqual;
@@ -436,19 +436,30 @@
     case kCondLE: return kLessEqual;
     case kCondGT: return kGreater;
     case kCondGE: return kGreaterEqual;
+    case kCondB:  return kBelow;
+    case kCondBE: return kBelowEqual;
+    case kCondA:  return kAbove;
+    case kCondAE: return kAboveEqual;
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 }
 
+// Maps signed condition to unsigned condition and FP condition to x86 name.
 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return kEqual;
     case kCondNE: return kNotEqual;
+    // Signed to unsigned, and FP to x86 name.
     case kCondLT: return kBelow;
     case kCondLE: return kBelowEqual;
     case kCondGT: return kAbove;
     case kCondGE: return kAboveEqual;
+    // Unsigned remain unchanged.
+    case kCondB:  return kBelow;
+    case kCondBE: return kBelowEqual;
+    case kCondA:  return kAbove;
+    case kCondAE: return kAboveEqual;
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
@@ -521,7 +532,8 @@
       move_resolver_(graph->GetArena(), this),
       isa_features_(isa_features),
       method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -1040,8 +1052,7 @@
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86::VisitExit(HExit* exit) {
-  UNUSED(exit);
+void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
@@ -1067,7 +1078,7 @@
   Register left_low = left.AsRegisterPairLow<Register>();
   IfCondition true_high_cond = if_cond;
   IfCondition false_high_cond = cond->GetOppositeCondition();
-  Condition final_condition = X86UnsignedOrFPCondition(if_cond);
+  Condition final_condition = X86UnsignedOrFPCondition(if_cond);  // unsigned on lower part
 
   // Set the conditions for the test, remembering that == needs to be
   // decided using the low words.
@@ -1088,6 +1099,18 @@
     case kCondGE:
       true_high_cond = kCondGT;
       break;
+    case kCondB:
+      false_high_cond = kCondA;
+      break;
+    case kCondBE:
+      true_high_cond = kCondB;
+      break;
+    case kCondA:
+      false_high_cond = kCondB;
+      break;
+    case kCondAE:
+      true_high_cond = kCondA;
+      break;
   }
 
   if (right.IsConstant()) {
@@ -1101,12 +1124,12 @@
       __ cmpl(left_high, Immediate(val_high));
     }
     if (if_cond == kCondNE) {
-      __ j(X86SignedCondition(true_high_cond), true_label);
+      __ j(X86Condition(true_high_cond), true_label);
     } else if (if_cond == kCondEQ) {
-      __ j(X86SignedCondition(false_high_cond), false_label);
+      __ j(X86Condition(false_high_cond), false_label);
     } else {
-      __ j(X86SignedCondition(true_high_cond), true_label);
-      __ j(X86SignedCondition(false_high_cond), false_label);
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
     }
     // Must be equal high, so compare the lows.
     if (val_low == 0) {
@@ -1120,12 +1143,12 @@
 
     __ cmpl(left_high, right_high);
     if (if_cond == kCondNE) {
-      __ j(X86SignedCondition(true_high_cond), true_label);
+      __ j(X86Condition(true_high_cond), true_label);
     } else if (if_cond == kCondEQ) {
-      __ j(X86SignedCondition(false_high_cond), false_label);
+      __ j(X86Condition(false_high_cond), false_label);
     } else {
-      __ j(X86SignedCondition(true_high_cond), true_label);
-      __ j(X86SignedCondition(false_high_cond), false_label);
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
     }
     // Must be equal high, so compare the lows.
     __ cmpl(left_low, right_low);
@@ -1214,7 +1237,7 @@
         }
         __ j(kNotEqual, true_target);
       } else {
-        __ j(X86SignedCondition(cond->AsCondition()->GetCondition()), true_target);
+        __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
       // Condition has not been materialized, use its inputs as the
@@ -1247,7 +1270,7 @@
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
-      __ j(X86SignedCondition(cond->AsCondition()->GetCondition()), true_target);
+      __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
     }
   }
   if (false_target != nullptr) {
@@ -1283,8 +1306,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   HInstruction* cond = deoptimize->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::Any());
   }
 }
@@ -1309,9 +1331,8 @@
   local->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load) {
+void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(load);
 }
 
 void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) {
@@ -1338,8 +1359,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) {
-  UNUSED(store);
+void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
 void LocationsBuilderX86::VisitCondition(HCondition* cond) {
@@ -1405,7 +1425,7 @@
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
-      __ setb(X86SignedCondition(cond->GetCondition()), reg);
+      __ setb(X86Condition(cond->GetCondition()), reg);
       return;
     }
     case Primitive::kPrimLong:
@@ -1483,15 +1503,46 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderX86::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
+void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
@@ -1500,9 +1551,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) {
+void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
@@ -1511,9 +1561,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) {
+void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
@@ -1522,9 +1571,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant) {
+void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
@@ -1533,9 +1581,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant) {
+void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
@@ -1550,8 +1597,7 @@
   ret->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) {
-  UNUSED(ret);
+void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
   codegen_->GenerateFrameExit();
 }
 
@@ -3685,8 +3731,7 @@
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) {
-  UNUSED(instruction);
+void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -3712,6 +3757,34 @@
   }
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  if (desired_dispatch_info.method_load_kind ==
+      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
+    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
+    return HInvokeStaticOrDirect::DispatchInfo {
+      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+      0u,
+      0u
+    };
+  }
+  switch (desired_dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
+      // (Though the direct CALL ptr16:32 is available for consideration).
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return desired_dispatch_info;
+  }
+}
 
 void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
@@ -3732,8 +3805,10 @@
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-      FALLTHROUGH_INTENDED;
+      // TODO: Implement this type.
+      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
       Register method_reg;
@@ -3769,9 +3844,9 @@
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
-      // (Though the direct CALL ptr16:32 is available for consideration).
-      FALLTHROUGH_INTENDED;
+      // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // (callee_method + offset_of_quick_compiled_code)()
       __ call(Address(callee_method.AsRegister<Register>(),
@@ -4684,13 +4759,11 @@
   temp->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp) {
+void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(temp);
 }
 
-void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) {
-  UNUSED(instruction);
+void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -4768,7 +4841,6 @@
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4921,7 +4993,6 @@
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -5615,15 +5686,13 @@
   }
 }
 
-void LocationsBuilderX86::VisitBoundType(HBoundType* instruction) {
+void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) {
+void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
@@ -5662,7 +5731,7 @@
     } else {
       __ cmpl(value_reg, Immediate(case_value));
     }
-    __ j(kEqual, codegen_->GetLabelOf(successors.at(i)));
+    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
   }
 
   // And the default for any other value.
@@ -5671,6 +5740,51 @@
   }
 }
 
+void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+
+  // Constant area pointer.
+  locations->SetInAt(1, Location::RequiresRegister());
+
+  // And the temporary we need.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Optimizing has a jump area.
+  Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+  Register constant_area = locations->InAt(1).AsRegister<Register>();
+
+  // Remove the bias, if needed.
+  if (lower_bound != 0) {
+    __ leal(temp_reg, Address(value_reg, -lower_bound));
+    value_reg = temp_reg;
+  }
+
+  // Is the value in range?
+  DCHECK_GE(num_entries, 1);
+  __ cmpl(value_reg, Immediate(num_entries - 1));
+  __ j(kAbove, codegen_->GetLabelOf(default_block));
+
+  // We are in the range of the table.
+  // Load (target-constant_area) from the jump table, indexing by the value.
+  __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
+
+  // Compute the actual target address by adding in constant_area.
+  __ addl(temp_reg, constant_area);
+
+  // And jump.
+  __ jmp(temp_reg);
+}
+
 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
     HX86ComputeBaseMethodAddress* insn) {
   LocationSummary* locations =
@@ -5754,28 +5868,18 @@
   }
 }
 
-void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
-  // Generate the constant area if needed.
-  X86Assembler* assembler = GetAssembler();
-  if (!assembler->IsConstantAreaEmpty()) {
-    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
-    // byte values.
-    assembler->Align(4, 0);
-    constant_area_start_ = assembler->CodeSize();
-    assembler->AddConstantArea();
-  }
-
-  // And finish up.
-  CodeGenerator::Finalize(allocator);
-}
-
 /**
  * Class to handle late fixup of offsets into constant area.
  */
 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
  public:
-  RIPFixup(const CodeGeneratorX86& codegen, int offset)
-      : codegen_(codegen), offset_into_constant_area_(offset) {}
+  RIPFixup(CodeGeneratorX86& codegen, size_t offset)
+      : codegen_(&codegen), offset_into_constant_area_(offset) {}
+
+ protected:
+  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
+
+  CodeGeneratorX86* codegen_;
 
  private:
   void Process(const MemoryRegion& region, int pos) OVERRIDE {
@@ -5783,19 +5887,77 @@
     // last 4 bytes of the instruction.
     // The value to patch is the distance from the offset in the constant area
     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
-    int32_t constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
-    int32_t relative_position = constant_offset - codegen_.GetMethodAddressOffset();;
+    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
+    int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();;
 
     // Patch in the right value.
     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
   }
 
-  const CodeGeneratorX86& codegen_;
-
   // Location in constant area that the fixup refers to.
-  int offset_into_constant_area_;
+  int32_t offset_into_constant_area_;
 };
 
+/**
+ * Class to handle late fixup of offsets to a jump table that will be created in the
+ * constant area.
+ */
+class JumpTableRIPFixup : public RIPFixup {
+ public:
+  JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
+      : RIPFixup(codegen, static_cast<size_t>(-1)), switch_instr_(switch_instr) {}
+
+  void CreateJumpTable() {
+    X86Assembler* assembler = codegen_->GetAssembler();
+
+    // Ensure that the reference to the jump table has the correct offset.
+    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
+    SetOffset(offset_in_constant_table);
+
+    // The label values in the jump table are computed relative to the
+    // instruction addressing the constant area.
+    const int32_t relative_offset = codegen_->GetMethodAddressOffset();
+
+    // Populate the jump table with the correct values for the jump table.
+    int32_t num_entries = switch_instr_->GetNumEntries();
+    HBasicBlock* block = switch_instr_->GetBlock();
+    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
+    // The value that we want is the target offset - the position of the table.
+    for (int32_t i = 0; i < num_entries; i++) {
+      HBasicBlock* b = successors[i];
+      Label* l = codegen_->GetLabelOf(b);
+      DCHECK(l->IsBound());
+      int32_t offset_to_block = l->Position() - relative_offset;
+      assembler->AppendInt32(offset_to_block);
+    }
+  }
+
+ private:
+  const HX86PackedSwitch* switch_instr_;
+};
+
+void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
+  // Generate the constant area if needed.
+  X86Assembler* assembler = GetAssembler();
+  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+    // byte values.
+    assembler->Align(4, 0);
+    constant_area_start_ = assembler->CodeSize();
+
+    // Populate any jump tables.
+    for (auto jump_table : fixups_to_jump_tables_) {
+      jump_table->CreateJumpTable();
+    }
+
+    // And now add the constant area to the generated code.
+    assembler->AddConstantArea();
+  }
+
+  // And finish up.
+  CodeGenerator::Finalize(allocator);
+}
+
 Address CodeGeneratorX86::LiteralDoubleAddress(double v, Register reg) {
   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   return Address(reg, kDummy32BitOffset, fixup);
@@ -5816,98 +5978,18 @@
   return Address(reg, kDummy32BitOffset, fixup);
 }
 
-/**
- * Finds instructions that need the constant area base as an input.
- */
-class ConstantHandlerVisitor : public HGraphVisitor {
- public:
-  explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
+                                           Register reg,
+                                           Register value) {
+  // Create a fixup to be used to create and address the jump table.
+  JumpTableRIPFixup* table_fixup =
+      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
 
- private:
-  void VisitAdd(HAdd* add) OVERRIDE {
-    BinaryFP(add);
-  }
+  // We have to populate the jump tables.
+  fixups_to_jump_tables_.push_back(table_fixup);
 
-  void VisitSub(HSub* sub) OVERRIDE {
-    BinaryFP(sub);
-  }
-
-  void VisitMul(HMul* mul) OVERRIDE {
-    BinaryFP(mul);
-  }
-
-  void VisitDiv(HDiv* div) OVERRIDE {
-    BinaryFP(div);
-  }
-
-  void VisitReturn(HReturn* ret) OVERRIDE {
-    HConstant* value = ret->InputAt(0)->AsConstant();
-    if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
-      ReplaceInput(ret, value, 0, true);
-    }
-  }
-
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
-    HandleInvoke(invoke);
-  }
-
-  void BinaryFP(HBinaryOperation* bin) {
-    HConstant* rhs = bin->InputAt(1)->AsConstant();
-    if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
-      ReplaceInput(bin, rhs, 1, false);
-    }
-  }
-
-  void InitializeConstantAreaPointer(HInstruction* user) {
-    // Ensure we only initialize the pointer once.
-    if (base_ != nullptr) {
-      return;
-    }
-
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
-    DCHECK(base_ != nullptr);
-  }
-
-  void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializeConstantAreaPointer(insn);
-    HGraph* graph = GetGraph();
-    HBasicBlock* block = insn->GetBlock();
-    HX86LoadFromConstantTable* load_constant =
-        new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
-    block->InsertInstructionBefore(load_constant, insn);
-    insn->ReplaceInput(load_constant, input_index);
-  }
-
-  void HandleInvoke(HInvoke* invoke) {
-    // Ensure that we can load FP arguments from the constant area.
-    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
-      HConstant* input = invoke->InputAt(i)->AsConstant();
-      if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
-        ReplaceInput(invoke, input, i, true);
-      }
-    }
-  }
-
-  // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
-  // input to the HX86LoadFromConstantTable instructions.
-  HX86ComputeBaseMethodAddress* base_;
-};
-
-void ConstantAreaFixups::Run() {
-  ConstantHandlerVisitor visitor(graph_);
-  visitor.VisitInsertionOrder();
+  // We want a scaled address, as we are extracting the correct offset from the table.
+  return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
 }
 
 // TODO: target as memory.
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ae2d84f..ac3d06c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -245,6 +245,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
 };
 
+class JumpTableRIPFixup;
+
 class CodeGeneratorX86 : public CodeGenerator {
  public:
   CodeGeneratorX86(HGraph* graph,
@@ -331,6 +333,12 @@
   // Helper method to move a 64bits value between two locations.
   void Move64(Location destination, Location source);
 
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   // Generate a call to a static or direct method.
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   // Generate a call to a virtual method.
@@ -385,6 +393,8 @@
   Address LiteralInt32Address(int32_t v, Register reg);
   Address LiteralInt64Address(int64_t v, Register reg);
 
+  Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
+
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
  private:
@@ -405,6 +415,9 @@
   // Used for fixups to the constant area.
   int32_t constant_area_start_;
 
+  // Fixups for jump tables that need to be patched after the constant table is generated.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   // If there is a HX86ComputeBaseMethodAddress instruction in the graph
   // (which shall be the sole instruction of this kind), subtracting this offset
   // from the value contained in the out register of this HX86ComputeBaseMethodAddress
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 272d86f..5218d70 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -36,9 +36,6 @@
 
 namespace x86_64 {
 
-// Some x86_64 instructions require a register to be available as temp.
-static constexpr Register TMP = R11;
-
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
 
@@ -452,11 +449,16 @@
     case kCondLE: return kLessEqual;
     case kCondGT: return kGreater;
     case kCondGE: return kGreaterEqual;
+    case kCondB:  return kBelow;
+    case kCondBE: return kBelowEqual;
+    case kCondA:  return kAbove;
+    case kCondAE: return kAboveEqual;
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 }
 
+// Maps FP condition to x86_64 name.
 inline Condition X86_64FPCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return kEqual;
@@ -465,11 +467,30 @@
     case kCondLE: return kBelowEqual;
     case kCondGT: return kAbove;
     case kCondGE: return kAboveEqual;
+    default:      break;  // should not happen
   };
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 }
 
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method ATTRIBUTE_UNUSED) {
+  switch (desired_dispatch_info.code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
+      return HInvokeStaticOrDirect::DispatchInfo {
+        desired_dispatch_info.method_load_kind,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        desired_dispatch_info.method_load_data,
+        0u
+      };
+    default:
+      return desired_dispatch_info;
+  }
+}
+
 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
                                                      Location temp) {
   // All registers are assumed to be correctly set up.
@@ -536,8 +557,9 @@
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
-      FALLTHROUGH_INTENDED;
+      // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // (callee_method + offset_of_quick_compiled_code)()
       __ call(Address(callee_method.AsRegister<CpuRegister>(),
@@ -673,7 +695,8 @@
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -1046,8 +1069,7 @@
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
-  UNUSED(exit);
+void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
@@ -1249,8 +1271,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
   HInstruction* cond = deoptimize->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::Any());
   }
 }
@@ -1275,9 +1296,8 @@
   local->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load) {
+void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(load);
 }
 
 void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
@@ -1304,8 +1324,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) {
-  UNUSED(store);
+void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
 void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
@@ -1477,6 +1496,38 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
+  VisitCondition(comp);
+}
+
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
@@ -1578,9 +1629,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
+void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
@@ -1589,9 +1639,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) {
+void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
@@ -1600,9 +1649,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
+void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
@@ -1611,9 +1659,8 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant) {
+void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
@@ -1622,9 +1669,9 @@
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
+    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
   // Will be generated at use site.
-  UNUSED(constant);
 }
 
 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
@@ -1639,8 +1686,7 @@
   ret->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
-  UNUSED(ret);
+void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
   codegen_->GenerateFrameExit();
 }
 
@@ -3594,8 +3640,7 @@
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) {
-  UNUSED(instruction);
+void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unimplemented";
 }
 
@@ -4415,13 +4460,11 @@
   temp->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp) {
+void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
   // Nothing to do, this is driven by the code generator.
-  UNUSED(temp);
 }
 
-void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) {
-  UNUSED(instruction);
+void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unimplemented";
 }
 
@@ -4479,7 +4522,6 @@
 }
 
 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4638,7 +4680,6 @@
 }
 
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -5298,15 +5339,13 @@
   }
 }
 
-void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction) {
+void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
+void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
-  UNUSED(instruction);
   LOG(FATAL) << "Unreachable";
 }
 
@@ -5327,31 +5366,43 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
   int32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
-  CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  // Remove the bias, if needed.
+  Register value_reg_out = value_reg_in.AsRegister();
+  if (lower_bound != 0) {
+    __ leal(temp_reg, Address(value_reg_in, -lower_bound));
+    value_reg_out = temp_reg.AsRegister();
+  }
+  CpuRegister value_reg(value_reg_out);
+
+  // Is the value in range?
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+  __ cmpl(value_reg, Immediate(num_entries - 1));
+  __ j(kAbove, codegen_->GetLabelOf(default_block));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors.at(i)));
-  }
+  // We are in the range of the table.
+  // Load the address of the jump table in the constant area.
+  __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
-  }
+  // Load the (signed) offset from the jump table.
+  __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
+
+  // Add the offset to the address of the table base.
+  __ addq(temp_reg, base_reg);
+
+  // And jump.
+  __ jmp(temp_reg);
 }
 
 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
@@ -5377,15 +5428,85 @@
   }
 }
 
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
+ public:
+  RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
+      : codegen_(&codegen), offset_into_constant_area_(offset) {}
+
+ protected:
+  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
+
+  CodeGeneratorX86_64* codegen_;
+
+ private:
+  void Process(const MemoryRegion& region, int pos) OVERRIDE {
+    // Patch the correct offset for the instruction.  We use the address of the
+    // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
+    int32_t relative_position = constant_offset - pos;
+
+    // Patch in the right value.
+    region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+  }
+
+  // Location in constant area that the fixup refers to.
+  size_t offset_into_constant_area_;
+};
+
+/**
+ t * Class to handle late fixup of offsets to a jump table that will be created in the
+ * constant area.
+ */
+class JumpTableRIPFixup : public RIPFixup {
+ public:
+  JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
+      : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
+
+  void CreateJumpTable() {
+    X86_64Assembler* assembler = codegen_->GetAssembler();
+
+    // Ensure that the reference to the jump table has the correct offset.
+    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
+    SetOffset(offset_in_constant_table);
+
+    // Compute the offset from the start of the function to this jump table.
+    const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
+
+    // Populate the jump table with the correct values for the jump table.
+    int32_t num_entries = switch_instr_->GetNumEntries();
+    HBasicBlock* block = switch_instr_->GetBlock();
+    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
+    // The value that we want is the target offset - the position of the table.
+    for (int32_t i = 0; i < num_entries; i++) {
+      HBasicBlock* b = successors[i];
+      Label* l = codegen_->GetLabelOf(b);
+      DCHECK(l->IsBound());
+      int32_t offset_to_block = l->Position() - current_table_offset;
+      assembler->AppendInt32(offset_to_block);
+    }
+  }
+
+ private:
+  const HPackedSwitch* switch_instr_;
+};
+
 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();
-  if (!assembler->IsConstantAreaEmpty()) {
-    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
-    // byte values.  If used for vectors at a later time, this will need to be
-    // updated to 16 bytes with the appropriate offset.
+  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
     assembler->Align(4, 0);
     constant_area_start_ = assembler->CodeSize();
+
+    // Populate any jump tables.
+    for (auto jump_table : fixups_to_jump_tables_) {
+      jump_table->CreateJumpTable();
+    }
+
+    // And now add the constant area to the generated code.
     assembler->AddConstantArea();
   }
 
@@ -5393,31 +5514,6 @@
   CodeGenerator::Finalize(allocator);
 }
 
-/**
- * Class to handle late fixup of offsets into constant area.
- */
-class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
-  public:
-    RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
-      : codegen_(codegen), offset_into_constant_area_(offset) {}
-
-  private:
-    void Process(const MemoryRegion& region, int pos) OVERRIDE {
-      // Patch the correct offset for the instruction.  We use the address of the
-      // 'next' instruction, which is 'pos' (patch the 4 bytes before).
-      int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
-      int relative_position = constant_offset - pos;
-
-      // Patch in the right value.
-      region.StoreUnaligned<int32_t>(pos - 4, relative_position);
-    }
-
-    const CodeGeneratorX86_64& codegen_;
-
-    // Location in constant area that the fixup refers to.
-    int offset_into_constant_area_;
-};
-
 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   return Address::RIP(fixup);
@@ -5458,6 +5554,16 @@
   GetMoveResolver()->EmitNativeCode(&parallel_move);
 }
 
+Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
+  // Create a fixup to be used to create and address the jump table.
+  JumpTableRIPFixup* table_fixup =
+      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
+
+  // We have to populate the jump tables.
+  fixups_to_jump_tables_.push_back(table_fixup);
+  return Address::RIP(table_fixup);
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ecc8630..fc485f5 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -30,6 +30,9 @@
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
 
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
 static constexpr FloatRegister kParameterFloatRegisters[] =
     { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
@@ -231,6 +234,9 @@
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
 };
 
+// Class for fixups to jump tables.
+class JumpTableRIPFixup;
+
 class CodeGeneratorX86_64 : public CodeGenerator {
  public:
   CodeGeneratorX86_64(HGraph* graph,
@@ -329,6 +335,12 @@
     return false;
   }
 
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      MethodReference target_method) OVERRIDE;
+
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
@@ -351,6 +363,7 @@
 
   // Load a 64 bit value into a register in the most efficient manner.
   void Load64BitValue(CpuRegister dest, int64_t value);
+  Address LiteralCaseTable(HPackedSwitch* switch_instr);
 
   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
   void Store64BitValueToStack(Location dest, int64_t value);
@@ -388,6 +401,9 @@
   // We will fix this up in the linker later to have the right value.
   static constexpr int32_t kDummy32BitOffset = 256;
 
+  // Fixups for jump tables need to be handled specially.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 5fc305c..57de41f 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -20,6 +20,8 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arch/arm/registers_arm.h"
 #include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips/registers_mips.h"
 #include "arch/mips64/instruction_set_features_mips64.h"
 #include "arch/mips64/registers_mips64.h"
 #include "arch/x86/instruction_set_features_x86.h"
@@ -29,6 +31,7 @@
 #include "builder.h"
 #include "code_generator_arm.h"
 #include "code_generator_arm64.h"
+#include "code_generator_mips.h"
 #include "code_generator_mips64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
@@ -43,6 +46,7 @@
 #include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/mips/managed_register_mips.h"
 #include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
 
@@ -177,6 +181,14 @@
     Run(allocator, codegenARM64, has_result, expected);
   }
 
+  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+      MipsInstructionSetFeatures::FromCppDefines());
+  mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
+  codegenMIPS.CompileBaseline(&allocator, true);
+  if (kRuntimeISA == kMips) {
+    Run(allocator, codegenMIPS, has_result, expected);
+  }
+
   std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
       Mips64InstructionSetFeatures::FromCppDefines());
   mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
@@ -234,6 +246,11 @@
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+  } else if (kRuntimeISA == kMips) {
+    std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+        MipsInstructionSetFeatures::FromCppDefines());
+    mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
+    RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
@@ -561,7 +578,7 @@
   ASSERT_FALSE(equal->NeedsMaterialization());
 
   auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
+    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
     HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
     block->InsertInstructionBefore(move, block->GetLastInstruction());
   };
@@ -667,7 +684,7 @@
     code_block->AddInstruction(&ret);
 
     auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
+      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
       HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
@@ -733,7 +750,7 @@
     if_false_block->AddInstruction(&ret_ge);
 
     auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
+      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
       HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
@@ -761,4 +778,130 @@
   TestCode(data, true, 2);
 }
 
+// Helper method.
+static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitive::Type type) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateGraph(&allocator);
+
+  HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry_block);
+  graph->SetEntryBlock(entry_block);
+  entry_block->AddInstruction(new (&allocator) HGoto());
+
+  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+
+  HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(exit_block);
+  graph->SetExitBlock(exit_block);
+  exit_block->AddInstruction(new (&allocator) HExit());
+
+  entry_block->AddSuccessor(block);
+  block->AddSuccessor(exit_block);
+
+  HInstruction* op1;
+  HInstruction* op2;
+  if (type == Primitive::kPrimInt) {
+    op1 = graph->GetIntConstant(i);
+    op2 = graph->GetIntConstant(j);
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
+    op1 = graph->GetLongConstant(i);
+    op2 = graph->GetLongConstant(j);
+  }
+
+  HInstruction* comparison = nullptr;
+  bool expected_result = false;
+  const uint64_t x = i;
+  const uint64_t y = j;
+  switch (condition) {
+    case kCondEQ:
+      comparison = new (&allocator) HEqual(op1, op2);
+      expected_result = (i == j);
+      break;
+    case kCondNE:
+      comparison = new (&allocator) HNotEqual(op1, op2);
+      expected_result = (i != j);
+      break;
+    case kCondLT:
+      comparison = new (&allocator) HLessThan(op1, op2);
+      expected_result = (i < j);
+      break;
+    case kCondLE:
+      comparison = new (&allocator) HLessThanOrEqual(op1, op2);
+      expected_result = (i <= j);
+      break;
+    case kCondGT:
+      comparison = new (&allocator) HGreaterThan(op1, op2);
+      expected_result = (i > j);
+      break;
+    case kCondGE:
+      comparison = new (&allocator) HGreaterThanOrEqual(op1, op2);
+      expected_result = (i >= j);
+      break;
+    case kCondB:
+      comparison = new (&allocator) HBelow(op1, op2);
+      expected_result = (x < y);
+      break;
+    case kCondBE:
+      comparison = new (&allocator) HBelowOrEqual(op1, op2);
+      expected_result = (x <= y);
+      break;
+    case kCondA:
+      comparison = new (&allocator) HAbove(op1, op2);
+      expected_result = (x > y);
+      break;
+    case kCondAE:
+      comparison = new (&allocator) HAboveOrEqual(op1, op2);
+      expected_result = (x >= y);
+      break;
+  }
+  block->AddInstruction(comparison);
+  block->AddInstruction(new (&allocator) HReturn(comparison));
+
+  auto hook_before_codegen = [](HGraph*) {
+  };
+  RunCodeOptimized(graph, hook_before_codegen, true, expected_result);
+}
+
+TEST(CodegenTest, ComparisonsInt) {
+  for (int64_t i = -1; i <= 1; i++) {
+    for (int64_t j = -1; j <= 1; j++) {
+      TestComparison(kCondEQ, i, j, Primitive::kPrimInt);
+      TestComparison(kCondNE, i, j, Primitive::kPrimInt);
+      TestComparison(kCondLT, i, j, Primitive::kPrimInt);
+      TestComparison(kCondLE, i, j, Primitive::kPrimInt);
+      TestComparison(kCondGT, i, j, Primitive::kPrimInt);
+      TestComparison(kCondGE, i, j, Primitive::kPrimInt);
+      TestComparison(kCondB,  i, j, Primitive::kPrimInt);
+      TestComparison(kCondBE, i, j, Primitive::kPrimInt);
+      TestComparison(kCondA,  i, j, Primitive::kPrimInt);
+      TestComparison(kCondAE, i, j, Primitive::kPrimInt);
+    }
+  }
+}
+
+TEST(CodegenTest, ComparisonsLong) {
+  // TODO: make MIPS work for long
+  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
+    return;
+  }
+
+  for (int64_t i = -1; i <= 1; i++) {
+    for (int64_t j = -1; j <= 1; j++) {
+      TestComparison(kCondEQ, i, j, Primitive::kPrimLong);
+      TestComparison(kCondNE, i, j, Primitive::kPrimLong);
+      TestComparison(kCondLT, i, j, Primitive::kPrimLong);
+      TestComparison(kCondLE, i, j, Primitive::kPrimLong);
+      TestComparison(kCondGT, i, j, Primitive::kPrimLong);
+      TestComparison(kCondGE, i, j, Primitive::kPrimLong);
+      TestComparison(kCondB,  i, j, Primitive::kPrimLong);
+      TestComparison(kCondBE, i, j, Primitive::kPrimLong);
+      TestComparison(kCondA,  i, j, Primitive::kPrimLong);
+      TestComparison(kCondAE, i, j, Primitive::kPrimLong);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index f545475..e1a8c9c 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -203,17 +203,23 @@
 
   int64_t value = CodeGenerator::GetInt64ValueOf(constant);
 
-  if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() ||
-      instr->IsCompare() || instr->IsBoundsCheck()) {
-    // Uses aliases of ADD/SUB instructions.
-    return vixl::Assembler::IsImmAddSub(value);
-  } else if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
+  if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
     return vixl::Assembler::IsImmLogical(value, vixl::kXRegSize);
-  } else {
-    DCHECK(instr->IsNeg());
+  } else if (instr->IsNeg()) {
     // Uses mov -immediate.
     return vixl::Assembler::IsImmMovn(value, vixl::kXRegSize);
+  } else {
+    DCHECK(instr->IsAdd() ||
+           instr->IsArm64IntermediateAddress() ||
+           instr->IsBoundsCheck() ||
+           instr->IsCompare() ||
+           instr->IsCondition() ||
+           instr->IsSub());
+    // Uses aliases of ADD/SUB instructions.
+    // If `value` does not fit but `-value` does, VIXL will automatically use
+    // the 'opposite' instruction.
+    return vixl::Assembler::IsImmAddSub(value) || vixl::Assembler::IsImmAddSub(-value);
   }
 }
 
diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/constant_area_fixups_x86.cc
new file mode 100644
index 0000000..c347000
--- /dev/null
+++ b/compiler/optimizing/constant_area_fixups_x86.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "constant_area_fixups_x86.h"
+
+namespace art {
+namespace x86 {
+
+/**
+ * Finds instructions that need the constant area base as an input.
+ */
+class ConstantHandlerVisitor : public HGraphVisitor {
+ public:
+  explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+
+ private:
+  void VisitAdd(HAdd* add) OVERRIDE {
+    BinaryFP(add);
+  }
+
+  void VisitSub(HSub* sub) OVERRIDE {
+    BinaryFP(sub);
+  }
+
+  void VisitMul(HMul* mul) OVERRIDE {
+    BinaryFP(mul);
+  }
+
+  void VisitDiv(HDiv* div) OVERRIDE {
+    BinaryFP(div);
+  }
+
+  void VisitReturn(HReturn* ret) OVERRIDE {
+    HConstant* value = ret->InputAt(0)->AsConstant();
+    if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) {
+      ReplaceInput(ret, value, 0, true);
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void BinaryFP(HBinaryOperation* bin) {
+    HConstant* rhs = bin->InputAt(1)->AsConstant();
+    if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) {
+      ReplaceInput(bin, rhs, 1, false);
+    }
+  }
+
+  void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
+    // address the constant area.
+    InitializeConstantAreaPointer(switch_insn);
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = switch_insn->GetBlock();
+    HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
+        switch_insn->GetStartValue(),
+        switch_insn->GetNumEntries(),
+        switch_insn->InputAt(0),
+        base_,
+        switch_insn->GetDexPc());
+    block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
+  }
+
+  void InitializeConstantAreaPointer(HInstruction* user) {
+    // Ensure we only initialize the pointer once.
+    if (base_ != nullptr) {
+      return;
+    }
+
+    HGraph* graph = GetGraph();
+    HBasicBlock* entry = graph->GetEntryBlock();
+    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
+    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
+    entry->InsertInstructionBefore(base_, insert_pos);
+    DCHECK(base_ != nullptr);
+  }
+
+  void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
+    InitializeConstantAreaPointer(insn);
+    HGraph* graph = GetGraph();
+    HBasicBlock* block = insn->GetBlock();
+    HX86LoadFromConstantTable* load_constant =
+        new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
+    block->InsertInstructionBefore(load_constant, insn);
+    insn->ReplaceInput(load_constant, input_index);
+  }
+
+  void HandleInvoke(HInvoke* invoke) {
+    // Ensure that we can load FP arguments from the constant area.
+    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
+      HConstant* input = invoke->InputAt(i)->AsConstant();
+      if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
+        ReplaceInput(invoke, input, i, true);
+      }
+    }
+  }
+
+  // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
+  // input to the HX86LoadFromConstantTable instructions.
+  HX86ComputeBaseMethodAddress* base_;
+};
+
+void ConstantAreaFixups::Run() {
+  ConstantHandlerVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index e0aa4ff..57452cc 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -27,6 +27,11 @@
  private:
   void VisitShift(HBinaryOperation* shift);
 
+  void VisitAbove(HAbove* instruction) OVERRIDE;
+  void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE;
+  void VisitBelow(HBelow* instruction) OVERRIDE;
+  void VisitBelowOrEqual(HBelowOrEqual* instruction) OVERRIDE;
+
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitCompare(HCompare* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
@@ -105,6 +110,54 @@
   }
 }
 
+void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
+  if (instruction->GetLeft()->IsConstant() &&
+      instruction->GetLeft()->AsConstant()->IsZero()) {
+    // Replace code looking like
+    //    ABOVE dst, 0, src  // unsigned 0 > src is always false
+    // with
+    //    CONSTANT false
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+  if (instruction->GetRight()->IsConstant() &&
+      instruction->GetRight()->AsConstant()->IsZero()) {
+    // Replace code looking like
+    //    ABOVE_OR_EQUAL dst, src, 0  // unsigned src >= 0 is always true
+    // with
+    //    CONSTANT true
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) {
+  if (instruction->GetRight()->IsConstant() &&
+      instruction->GetRight()->AsConstant()->IsZero()) {
+    // Replace code looking like
+    //    BELOW dst, src, 0  // unsigned src < 0 is always false
+    // with
+    //    CONSTANT false
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+  if (instruction->GetLeft()->IsConstant() &&
+      instruction->GetLeft()->AsConstant()->IsZero()) {
+    // Replace code looking like
+    //    BELOW_OR_EQUAL dst, 0, src  // unsigned 0 <= src is always true
+    // with
+    //    CONSTANT true
+    instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
 void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   if ((input_cst != nullptr) && input_cst->IsZero()) {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 10e4bc9..e469c8d 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -29,50 +29,70 @@
 
 namespace art {
 
-static void TestCode(const uint16_t* data,
-                     const std::string& expected_before,
-                     const std::string& expected_after_cf,
-                     const std::string& expected_after_dce,
-                     std::function<void(HGraph*)> check_after_cf,
-                     Primitive::Type return_type = Primitive::kPrimInt) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateCFG(&allocator, data, return_type);
-  ASSERT_NE(graph, nullptr);
+/**
+ * Fixture class for the constant folding and dce tests.
+ */
+class ConstantFoldingTest : public testing::Test {
+ public:
+  ConstantFoldingTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+  }
 
-  graph->TryBuildingSsa();
+  void TestCode(const uint16_t* data,
+                const std::string& expected_before,
+                const std::string& expected_after_cf,
+                const std::string& expected_after_dce,
+                std::function<void(HGraph*)> check_after_cf,
+                Primitive::Type return_type = Primitive::kPrimInt) {
+    graph_ = CreateCFG(&allocator_, data, return_type);
+    TestCodeOnReadyGraph(expected_before,
+                         expected_after_cf,
+                         expected_after_dce,
+                         check_after_cf);
+  }
 
-  StringPrettyPrinter printer_before(graph);
-  printer_before.VisitInsertionOrder();
-  std::string actual_before = printer_before.str();
-  ASSERT_EQ(expected_before, actual_before);
+  void TestCodeOnReadyGraph(const std::string& expected_before,
+                            const std::string& expected_after_cf,
+                            const std::string& expected_after_dce,
+                            std::function<void(HGraph*)> check_after_cf) {
+    ASSERT_NE(graph_, nullptr);
+    graph_->TryBuildingSsa();
 
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
-  HConstantFolding(graph).Run();
-  SSAChecker ssa_checker_cf(&allocator, graph);
-  ssa_checker_cf.Run();
-  ASSERT_TRUE(ssa_checker_cf.IsValid());
+    StringPrettyPrinter printer_before(graph_);
+    printer_before.VisitInsertionOrder();
+    std::string actual_before = printer_before.str();
+    EXPECT_EQ(expected_before, actual_before);
 
-  StringPrettyPrinter printer_after_cf(graph);
-  printer_after_cf.VisitInsertionOrder();
-  std::string actual_after_cf = printer_after_cf.str();
-  ASSERT_EQ(expected_after_cf, actual_after_cf);
+    std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+        X86InstructionSetFeatures::FromCppDefines());
+    x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
+    HConstantFolding(graph_).Run();
+    SSAChecker ssa_checker_cf(graph_);
+    ssa_checker_cf.Run();
+    ASSERT_TRUE(ssa_checker_cf.IsValid());
 
-  check_after_cf(graph);
+    StringPrettyPrinter printer_after_cf(graph_);
+    printer_after_cf.VisitInsertionOrder();
+    std::string actual_after_cf = printer_after_cf.str();
+    EXPECT_EQ(expected_after_cf, actual_after_cf);
 
-  HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker_dce(&allocator, graph);
-  ssa_checker_dce.Run();
-  ASSERT_TRUE(ssa_checker_dce.IsValid());
+    check_after_cf(graph_);
 
-  StringPrettyPrinter printer_after_dce(graph);
-  printer_after_dce.VisitInsertionOrder();
-  std::string actual_after_dce = printer_after_dce.str();
-  ASSERT_EQ(expected_after_dce, actual_after_dce);
-}
+    HDeadCodeElimination(graph_).Run();
+    SSAChecker ssa_checker_dce(graph_);
+    ssa_checker_dce.Run();
+    ASSERT_TRUE(ssa_checker_dce.IsValid());
 
+    StringPrettyPrinter printer_after_dce(graph_);
+    printer_after_dce.VisitInsertionOrder();
+    std::string actual_after_dce = printer_after_dce.str();
+    EXPECT_EQ(expected_after_dce, actual_after_dce);
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
 
 /**
  * Tiny three-register program exercising int constant folding on negation.
@@ -84,7 +104,7 @@
  *     v1 <- -v0                1.      neg-int v1, v0
  *     return v1                2.      return v1
  */
-TEST(ConstantFolding, IntConstantFoldingNegation) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingNegation) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::NEG_INT | 1 << 8 | 0 << 12,
@@ -113,7 +133,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), -1);
   };
@@ -141,7 +161,7 @@
  *     (v2, v3) <- -(v0, v1)    1.      neg-long v2, v0
  *     return (v2, v3)          2.      return-wide v2
  */
-TEST(ConstantFolding, LongConstantFoldingNegation) {
+TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -175,7 +195,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsLongConstant());
     ASSERT_EQ(inst->AsLongConstant()->GetValue(), INT64_C(-4294967296));
   };
@@ -205,7 +225,7 @@
  *     v2 <- v0 + v1            2.      add-int v2, v0, v1
  *     return v2                4.      return v2
  */
-TEST(ConstantFolding, IntConstantFoldingOnAddition1) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -237,7 +257,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 3);
   };
@@ -271,7 +291,7 @@
  *     v2 <- v0 + v1            6.      add-int v2, v0, v1
  *     return v2                8.      return v2
  */
-TEST(ConstantFolding, IntConstantFoldingOnAddition2) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -317,7 +337,7 @@
 
   // Check the values of the computed constants.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst1 = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst1->IsIntConstant());
     ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 12);
     HInstruction* inst2 = inst1->GetPrevious();
@@ -357,7 +377,7 @@
  *     v2 <- v0 - v1            2.      sub-int v2, v0, v1
  *     return v2                4.      return v2
  */
-TEST(ConstantFolding, IntConstantFoldingOnSubtraction) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 3 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -389,7 +409,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
   };
@@ -421,7 +441,7 @@
  *       (v0, v1) + (v1, v2)    4.      add-long v4, v0, v2
  *     return (v4, v5)          6.      return-wide v4
  */
-TEST(ConstantFolding, LongConstantFoldingOnAddition) {
+TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) {
   const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
     Instruction::CONST_WIDE_16 | 0 << 8, 1,
     Instruction::CONST_WIDE_16 | 2 << 8, 2,
@@ -453,7 +473,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsLongConstant());
     ASSERT_EQ(inst->AsLongConstant()->GetValue(), 3);
   };
@@ -486,7 +506,7 @@
  *       (v0, v1) - (v1, v2)    4.      sub-long v4, v0, v2
  *     return (v4, v5)          6.      return-wide v4
  */
-TEST(ConstantFolding, LongConstantFoldingOnSubtraction) {
+TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) {
   const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
     Instruction::CONST_WIDE_16 | 0 << 8, 3,
     Instruction::CONST_WIDE_16 | 2 << 8, 2,
@@ -518,7 +538,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsLongConstant());
     ASSERT_EQ(inst->AsLongConstant()->GetValue(), 1);
   };
@@ -560,7 +580,7 @@
  * L3: v2 <- v1 + 8             11.     add-int/lit16 v2, v1, #+8
  *     return v2                13.     return v2
  */
-TEST(ConstantFolding, IntConstantFoldingAndJumps) {
+TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 << 8 | 1 << 12,
     Instruction::CONST_4 | 1 << 8 | 2 << 12,
@@ -569,7 +589,7 @@
     Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 5,
     Instruction::GOTO | 4 << 8,
     Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 4,
-    static_cast<uint16_t>(Instruction::GOTO | -5 << 8),
+    static_cast<uint16_t>(Instruction::GOTO | 0xFFFFFFFB << 8),
     Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 8,
     Instruction::RETURN | 2 << 8);
 
@@ -620,7 +640,7 @@
 
   // Check the values of the computed constants.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst1 = graph->GetBlock(4)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst1 = graph->GetBlocks()[4]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst1->IsIntConstant());
     ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 20);
     HInstruction* inst2 = inst1->GetPrevious();
@@ -656,7 +676,6 @@
            check_after_cf);
 }
 
-
 /**
  * Three-register program with a constant (static) condition.
  *
@@ -670,7 +689,7 @@
  * L1: v2 <- v0 + v1            5.      add-int v2, v0, v1
  *     return-void              7.      return
  */
-TEST(ConstantFolding, ConstantCondition) {
+TEST_F(ConstantFoldingTest, ConstantCondition) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::CONST_4 | 0 << 8 | 0 << 12,
@@ -710,7 +729,7 @@
 
   // Check the values of the computed constants.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
   };
@@ -732,4 +751,109 @@
            check_after_cf);
 }
 
+/**
+ * Unsigned comparisons with zero. Since these instructions are not present
+ * in the bytecode, we need to set up the graph explicitly.
+ */
+TEST_F(ConstantFoldingTest, UnsignedComparisonsWithZero) {
+  graph_ = CreateGraph(&allocator_);
+  HBasicBlock* entry_block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry_block);
+  graph_->SetEntryBlock(entry_block);
+  HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block);
+  HBasicBlock* exit_block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit_block);
+  graph_->SetExitBlock(exit_block);
+  entry_block->AddSuccessor(block);
+  block->AddSuccessor(exit_block);
+
+  // Make various unsigned comparisons with zero against a parameter.
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt, true);
+  entry_block->AddInstruction(parameter);
+  HInstruction* zero = graph_->GetIntConstant(0);
+  HInstruction* last;
+  block->AddInstruction(last = new (&allocator_) HAbove(zero, parameter));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HAbove(parameter, zero));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HAboveOrEqual(zero, parameter));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HAboveOrEqual(parameter, zero));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HBelow(zero, parameter));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HBelow(parameter, zero));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HBelowOrEqual(zero, parameter));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(last = new (&allocator_) HBelowOrEqual(parameter, zero));
+  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+
+  entry_block->AddInstruction(new (&allocator_) HGoto());
+  block->AddInstruction(new (&allocator_) HReturn(zero));
+  exit_block->AddInstruction(new (&allocator_) HExit());
+
+  const std::string expected_before =
+      "BasicBlock 0, succ: 1\n"
+      "  0: ParameterValue [16, 14, 12, 10, 8, 6, 4, 2]\n"
+      "  1: IntConstant [19, 16, 14, 12, 10, 8, 6, 4, 2]\n"
+      "  18: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Above(1, 0) [3]\n"
+      "  3: Deoptimize(2)\n"
+      "  4: Above(0, 1) [5]\n"
+      "  5: Deoptimize(4)\n"
+      "  6: AboveOrEqual(1, 0) [7]\n"
+      "  7: Deoptimize(6)\n"
+      "  8: AboveOrEqual(0, 1) [9]\n"
+      "  9: Deoptimize(8)\n"
+      "  10: Below(1, 0) [11]\n"
+      "  11: Deoptimize(10)\n"
+      "  12: Below(0, 1) [13]\n"
+      "  13: Deoptimize(12)\n"
+      "  14: BelowOrEqual(1, 0) [15]\n"
+      "  15: Deoptimize(14)\n"
+      "  16: BelowOrEqual(0, 1) [17]\n"
+      "  17: Deoptimize(16)\n"
+      "  19: Return(1)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  20: Exit\n";
+
+  const std::string expected_after_cf =
+      "BasicBlock 0, succ: 1\n"
+      "  0: ParameterValue [16, 10, 6, 4]\n"
+      "  1: IntConstant [13, 3, 19, 16, 10, 6, 4]\n"
+      "  21: IntConstant [15, 9]\n"
+      "  18: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  3: Deoptimize(1)\n"
+      "  4: Above(0, 1) [5]\n"
+      "  5: Deoptimize(4)\n"
+      "  6: AboveOrEqual(1, 0) [7]\n"
+      "  7: Deoptimize(6)\n"
+      "  9: Deoptimize(21)\n"
+      "  10: Below(1, 0) [11]\n"
+      "  11: Deoptimize(10)\n"
+      "  13: Deoptimize(1)\n"
+      "  15: Deoptimize(21)\n"
+      "  16: BelowOrEqual(0, 1) [17]\n"
+      "  17: Deoptimize(16)\n"
+      "  19: Return(1)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  20: Exit\n";
+
+  const std::string expected_after_dce = expected_after_cf;
+
+  auto check_after_cf = [](HGraph* graph) {
+    CHECK(graph != nullptr);
+  };
+
+  TestCodeOnReadyGraph(expected_before,
+                       expected_after_cf,
+                       expected_after_dce,
+                       check_after_cf);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 007d0e3..9754043 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -63,7 +63,7 @@
             static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value);
         if (switch_index < switch_instruction->GetNumEntries()) {
           live_successors = live_successors.SubArray(switch_index, 1u);
-          DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index));
+          DCHECK_EQ(live_successors[0], block->GetSuccessors()[switch_index]);
         } else {
           live_successors = live_successors.SubArray(switch_instruction->GetNumEntries(), 1u);
           DCHECK_EQ(live_successors[0], switch_instruction->GetDefaultBlock());
@@ -136,7 +136,7 @@
       it.Advance();
       continue;
     }
-    HBasicBlock* successor = block->GetSuccessor(0);
+    HBasicBlock* successor = block->GetSuccessors()[0];
     if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
       it.Advance();
       continue;
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index ee3a61a..2c6a1ef 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -45,7 +45,7 @@
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker(&allocator, graph);
+  SSAChecker ssa_checker(graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
 
@@ -140,7 +140,7 @@
     Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 3,
     Instruction::GOTO | 4 << 8,
     Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 2,
-    static_cast<uint16_t>(Instruction::GOTO | -5 << 8),
+    static_cast<uint16_t>(Instruction::GOTO | 0xFFFFFFFB << 8),
     Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 4,
     Instruction::RETURN_VOID);
 
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 6b18650..91e4a99 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -36,16 +36,16 @@
   ASSERT_EQ(graph->GetBlocks().size(), blocks_length);
   for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == kInvalidBlockId) {
-      if (graph->GetBlock(i) == nullptr) {
+      if (graph->GetBlocks()[i] == nullptr) {
         // Dead block.
       } else {
         // Only the entry block has no dominator.
-        ASSERT_EQ(nullptr, graph->GetBlock(i)->GetDominator());
-        ASSERT_TRUE(graph->GetBlock(i)->IsEntryBlock());
+        ASSERT_EQ(nullptr, graph->GetBlocks()[i]->GetDominator());
+        ASSERT_TRUE(graph->GetBlocks()[i]->IsEntryBlock());
       }
     } else {
-      ASSERT_NE(nullptr, graph->GetBlock(i)->GetDominator());
-      ASSERT_EQ(blocks[i], graph->GetBlock(i)->GetDominator()->GetBlockId());
+      ASSERT_NE(nullptr, graph->GetBlocks()[i]->GetDominator());
+      ASSERT_EQ(blocks[i], graph->GetBlocks()[i]->GetDominator()->GetBlockId());
     }
   }
 }
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 9e0d352..9b0eb70 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -118,7 +118,7 @@
                       uint32_t parent_loop_header_id,
                       const int* blocks_in_loop = nullptr,
                       size_t number_of_blocks = 0) {
-  HBasicBlock* block = graph->GetBlock(block_id);
+  HBasicBlock* block = graph->GetBlocks()[block_id];
   ASSERT_EQ(block->IsLoopHeader(), is_loop_header);
   if (parent_loop_header_id == kInvalidBlockId) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
@@ -296,10 +296,10 @@
   TestBlock(graph, 7, false, kInvalidBlockId);  // exit block
   TestBlock(graph, 8, false, 2);                // synthesized block as pre header of inner loop
 
-  ASSERT_TRUE(graph->GetBlock(3)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(2)->GetLoopInformation()));
-  ASSERT_FALSE(graph->GetBlock(2)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(3)->GetLoopInformation()));
+  ASSERT_TRUE(graph->GetBlocks()[3]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[2]->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks()[2]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[3]->GetLoopInformation()));
 }
 
 TEST(FindLoopsTest, TwoLoops) {
@@ -326,10 +326,10 @@
   TestBlock(graph, 6, false, kInvalidBlockId);  // return block
   TestBlock(graph, 7, false, kInvalidBlockId);  // exit block
 
-  ASSERT_FALSE(graph->GetBlock(4)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(2)->GetLoopInformation()));
-  ASSERT_FALSE(graph->GetBlock(2)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(4)->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks()[4]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[2]->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks()[2]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[4]->GetLoopInformation()));
 }
 
 TEST(FindLoopsTest, NonNaturalLoop) {
@@ -344,8 +344,8 @@
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
   HGraph* graph = TestCode(data, &allocator);
-  ASSERT_TRUE(graph->GetBlock(3)->IsLoopHeader());
-  HLoopInformation* info = graph->GetBlock(3)->GetLoopInformation();
+  ASSERT_TRUE(graph->GetBlocks()[3]->IsLoopHeader());
+  HLoopInformation* info = graph->GetBlocks()[3]->GetLoopInformation();
   ASSERT_EQ(1u, info->NumberOfBackEdges());
   ASSERT_FALSE(info->GetHeader()->Dominates(info->GetBackEdges()[0]));
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4e1cafe..3de96b5 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -16,10 +16,12 @@
 
 #include "graph_checker.h"
 
+#include <algorithm>
 #include <map>
 #include <string>
 #include <sstream>
 
+#include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
 
@@ -29,19 +31,21 @@
   current_block_ = block;
 
   // Check consistency with respect to predecessors of `block`.
-  std::map<HBasicBlock*, size_t> predecessors_count;
+  ArenaSafeMap<HBasicBlock*, size_t> predecessors_count(
+      std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
   for (HBasicBlock* p : block->GetPredecessors()) {
-    ++predecessors_count[p];
+    auto it = predecessors_count.find(p);
+    if (it != predecessors_count.end()) {
+      ++it->second;
+    } else {
+      predecessors_count.Put(p, 1u);
+    }
   }
   for (auto& pc : predecessors_count) {
     HBasicBlock* p = pc.first;
     size_t p_count_in_block_predecessors = pc.second;
-    size_t block_count_in_p_successors = 0;
-    for (HBasicBlock* p_successor : p->GetSuccessors()) {
-      if (p_successor == block) {
-        ++block_count_in_p_successors;
-      }
-    }
+    size_t block_count_in_p_successors =
+        std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
     if (p_count_in_block_predecessors != block_count_in_p_successors) {
       AddError(StringPrintf(
           "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
@@ -52,19 +56,21 @@
   }
 
   // Check consistency with respect to successors of `block`.
-  std::map<HBasicBlock*, size_t> successors_count;
+  ArenaSafeMap<HBasicBlock*, size_t> successors_count(
+      std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
   for (HBasicBlock* s : block->GetSuccessors()) {
-    ++successors_count[s];
+    auto it = successors_count.find(s);
+    if (it != successors_count.end()) {
+      ++it->second;
+    } else {
+      successors_count.Put(s, 1u);
+    }
   }
   for (auto& sc : successors_count) {
     HBasicBlock* s = sc.first;
     size_t s_count_in_block_successors = sc.second;
-    size_t block_count_in_s_predecessors = 0;
-    for (HBasicBlock* s_predecessor : s->GetPredecessors()) {
-      if (s_predecessor == block) {
-        ++block_count_in_s_predecessors;
-      }
-    }
+    size_t block_count_in_s_predecessors =
+        std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
     if (s_count_in_block_successors != block_count_in_s_predecessors) {
       AddError(StringPrintf(
           "Block %d lists %zu occurrences of block %d in its successors, whereas "
@@ -351,7 +357,7 @@
   // never exceptional successors.
   const size_t num_normal_successors = block->NumberOfNormalSuccessors();
   for (size_t j = 0; j < num_normal_successors; ++j) {
-    HBasicBlock* successor = block->GetSuccessor(j);
+    HBasicBlock* successor = block->GetSuccessors()[j];
     if (successor->IsCatchBlock()) {
       AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
                             successor->GetBlockId(),
@@ -359,7 +365,7 @@
     }
   }
   for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) {
-    HBasicBlock* successor = block->GetSuccessor(j);
+    HBasicBlock* successor = block->GetSuccessors()[j];
     if (!successor->IsCatchBlock()) {
       AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
                             successor->GetBlockId(),
@@ -373,7 +379,7 @@
   // not accounted for.
   if (block->NumberOfNormalSuccessors() > 1) {
     for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
-      HBasicBlock* successor = block->GetSuccessor(j);
+      HBasicBlock* successor = block->GetSuccessors()[j];
       if (successor->GetPredecessors().size() > 1) {
         AddError(StringPrintf("Critical edge between blocks %d and %d.",
                               block->GetBlockId(),
@@ -456,14 +462,14 @@
         id,
         num_preds));
   } else {
-    HBasicBlock* first_predecessor = loop_header->GetPredecessor(0);
+    HBasicBlock* first_predecessor = loop_header->GetPredecessors()[0];
     if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
     for (size_t i = 1, e = loop_header->GetPredecessors().size(); i < e; ++i) {
-      HBasicBlock* predecessor = loop_header->GetPredecessor(i);
+      HBasicBlock* predecessor = loop_header->GetPredecessors()[i];
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
             "Loop header %d has multiple incoming (non back edge) blocks.",
@@ -493,7 +499,7 @@
 
   // Ensure all blocks in the loop are live and dominated by the loop header.
   for (uint32_t i : loop_blocks.Indexes()) {
-    HBasicBlock* loop_block = GetGraph()->GetBlock(i);
+    HBasicBlock* loop_block = GetGraph()->GetBlocks()[i];
     if (loop_block == nullptr) {
       AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
                             id,
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 7ddffc1..abf3659 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -26,12 +26,11 @@
 // A control-flow graph visitor performing various checks.
 class GraphChecker : public HGraphDelegateVisitor {
  public:
-  GraphChecker(ArenaAllocator* allocator, HGraph* graph,
-               const char* dump_prefix = "art::GraphChecker: ")
+  explicit GraphChecker(HGraph* graph, const char* dump_prefix = "art::GraphChecker: ")
     : HGraphDelegateVisitor(graph),
-      allocator_(allocator),
+      errors_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)),
       dump_prefix_(dump_prefix),
-      seen_ids_(allocator, graph->GetCurrentInstructionId(), false) {}
+      seen_ids_(graph->GetArena(), graph->GetCurrentInstructionId(), false) {}
 
   // Check the whole graph (in insertion order).
   virtual void Run() { VisitInsertionOrder(); }
@@ -65,7 +64,7 @@
   }
 
   // Get the list of detected errors.
-  const std::vector<std::string>& GetErrors() const {
+  const ArenaVector<std::string>& GetErrors() const {
     return errors_;
   }
 
@@ -82,11 +81,10 @@
     errors_.push_back(error);
   }
 
-  ArenaAllocator* const allocator_;
   // The block currently visited.
   HBasicBlock* current_block_ = nullptr;
   // Errors encountered while checking the graph.
-  std::vector<std::string> errors_;
+  ArenaVector<std::string> errors_;
 
  private:
   // String displayed before dumped errors.
@@ -102,9 +100,8 @@
  public:
   typedef GraphChecker super_type;
 
-  // TODO: There's no need to pass a separate allocator as we could get it from the graph.
-  SSAChecker(ArenaAllocator* allocator, HGraph* graph)
-    : GraphChecker(allocator, graph, "art::SSAChecker: ") {}
+  explicit SSAChecker(HGraph* graph)
+    : GraphChecker(graph, "art::SSAChecker: ") {}
 
   // Check the whole graph (in reverse post-order).
   void Run() OVERRIDE {
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 0f66775..fee56c7 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -50,7 +50,7 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  GraphChecker graph_checker(&allocator, graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
 }
@@ -64,7 +64,7 @@
   graph->BuildDominatorTree();
   graph->TransformToSsa();
 
-  SSAChecker ssa_checker(&allocator, graph);
+  SSAChecker ssa_checker(graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
 }
@@ -112,7 +112,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = CreateSimpleCFG(&allocator);
-  GraphChecker graph_checker(&allocator, graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
 
@@ -130,7 +130,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = CreateSimpleCFG(&allocator);
-  GraphChecker graph_checker(&allocator, graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
 
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 7968e88..d4b9b71 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -99,7 +99,7 @@
   ASSERT_NE(false_block, return_block);
 
   // Ensure the new block branches to the join block.
-  ASSERT_EQ(false_block->GetSuccessor(0), return_block);
+  ASSERT_EQ(false_block->GetSuccessors()[0], return_block);
 }
 
 // Test that the successors of an if block stay consistent after a SimplifyCFG.
@@ -134,7 +134,7 @@
   ASSERT_NE(true_block, return_block);
 
   // Ensure the new block branches to the join block.
-  ASSERT_EQ(true_block->GetSuccessor(0), return_block);
+  ASSERT_EQ(true_block->GetSuccessors()[0], return_block);
 }
 
 // Test that the successors of an if block stay consistent after a SimplifyCFG.
@@ -164,11 +164,11 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessor(0), entry_block);
-  ASSERT_NE(if_block->GetPredecessor(1), if_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
-  ASSERT_EQ(if_block->GetPredecessor(1),
+  ASSERT_EQ(if_block->GetPredecessors()[1],
             if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor());
 }
 
@@ -199,11 +199,11 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessor(0), entry_block);
-  ASSERT_NE(if_block->GetPredecessor(1), if_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
-  ASSERT_EQ(if_block->GetPredecessor(1),
+  ASSERT_EQ(if_block->GetPredecessors()[1],
             if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor());
 }
 
@@ -242,7 +242,7 @@
 
   // Ensure the new block is the successor of the true block.
   ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().size(), 1u);
-  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessor(0),
+  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors()[0],
             loop_block->GetLoopInformation()->GetPreHeader());
 }
 
@@ -280,7 +280,7 @@
 
   // Ensure the new block is the successor of the false block.
   ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().size(), 1u);
-  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessor(0),
+  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors()[0],
             loop_block->GetLoopInformation()->GetPreHeader());
 }
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index d38f4c8..4111671 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -253,7 +253,7 @@
     AddIndent();
     output_ << "successors";
     for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
-      HBasicBlock* successor = block->GetSuccessor(i);
+      HBasicBlock* successor = block->GetSuccessors()[i];
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
     output_<< std::endl;
@@ -263,7 +263,7 @@
     AddIndent();
     output_ << "xhandlers";
     for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) {
-      HBasicBlock* handler = block->GetSuccessor(i);
+      HBasicBlock* handler = block->GetSuccessors()[i];
       output_ << " \"B" << handler->GetBlockId() << "\" ";
     }
     if (block->IsExitBlock() &&
@@ -362,6 +362,8 @@
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     StartAttributeStream("gen_clinit_check") << std::boolalpha
         << load_class->MustGenerateClinitCheck() << std::noboolalpha;
+    StartAttributeStream("needs_access_check") << std::boolalpha
+        << load_class->NeedsAccessCheck() << std::noboolalpha;
   }
 
   void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 7cf0617..c36de84 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -16,11 +16,11 @@
 
 #include "gvn.h"
 
+#include "base/arena_bit_vector.h"
 #include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "side_effects_analysis.h"
 #include "utils.h"
-#include "utils/arena_bit_vector.h"
 
 namespace art {
 
@@ -351,7 +351,7 @@
     HBasicBlock* dominator = block->GetDominator();
     ValueSet* dominator_set = sets_[dominator->GetBlockId()];
     if (dominator->GetSuccessors().size() == 1) {
-      DCHECK_EQ(dominator->GetSuccessor(0), block);
+      DCHECK_EQ(dominator->GetSuccessors()[0], block);
       set = dominator_set;
     } else {
       // We have to copy if the dominator has other successors, or `block` is not a successor
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 56f2718..de60cf2 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -34,7 +34,10 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (&allocator) HBasicBlock(graph);
@@ -46,6 +49,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -54,6 +58,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -63,6 +68,7 @@
                                                            MemberOffset(43),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -74,6 +80,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -82,6 +89,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -111,7 +119,10 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (&allocator) HBasicBlock(graph);
@@ -122,6 +133,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -144,6 +156,7 @@
                                                           MemberOffset(42),
                                                           false,
                                                           kUnknownFieldIndex,
+                                                          kUnknownClassDefIndex,
                                                           graph->GetDexFile(),
                                                           dex_cache,
                                                           0));
@@ -153,6 +166,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -162,6 +176,7 @@
                                                           MemberOffset(42),
                                                           false,
                                                           kUnknownFieldIndex,
+                                                          kUnknownClassDefIndex,
                                                           graph->GetDexFile(),
                                                           dex_cache,
                                                           0));
@@ -188,7 +203,10 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
 
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (&allocator) HBasicBlock(graph);
@@ -199,6 +217,7 @@
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
+                                                           kUnknownClassDefIndex,
                                                            graph->GetDexFile(),
                                                            dex_cache,
                                                            0));
@@ -221,6 +240,7 @@
                                                                  MemberOffset(42),
                                                                  false,
                                                                  kUnknownFieldIndex,
+                                                                 kUnknownClassDefIndex,
                                                                  graph->GetDexFile(),
                                                                  dex_cache,
                                                                  0));
@@ -235,6 +255,7 @@
                                                                MemberOffset(42),
                                                                false,
                                                                kUnknownFieldIndex,
+                                                               kUnknownClassDefIndex,
                                                                graph->GetDexFile(),
                                                                dex_cache,
                                                                0));
@@ -244,6 +265,7 @@
                                                                MemberOffset(42),
                                                                false,
                                                                kUnknownFieldIndex,
+                                                               kUnknownClassDefIndex,
                                                                graph->GetDexFile(),
                                                                dex_cache,
                                                                0));
@@ -255,6 +277,7 @@
                                                           MemberOffset(42),
                                                           false,
                                                           kUnknownFieldIndex,
+                                                          kUnknownClassDefIndex,
                                                           graph->GetDexFile(),
                                                           dex_cache,
                                                           0));
@@ -328,7 +351,10 @@
   inner_loop_body->AddSuccessor(inner_loop_header);
   inner_loop_exit->AddSuccessor(outer_loop_header);
 
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimBoolean);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
   outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
@@ -352,6 +378,7 @@
                                                              MemberOffset(42),
                                                              false,
                                                              kUnknownFieldIndex,
+                                                             kUnknownClassDefIndex,
                                                              graph->GetDexFile(),
                                                              dex_cache,
                                                              0));
@@ -376,6 +403,7 @@
                                            MemberOffset(42),
                                            false,
                                            kUnknownFieldIndex,
+                                           kUnknownClassDefIndex,
                                            graph->GetDexFile(),
                                            dex_cache,
                                            0),
@@ -401,6 +429,7 @@
                                            MemberOffset(42),
                                            false,
                                            kUnknownFieldIndex,
+                                           kUnknownClassDefIndex,
                                            graph->GetDexFile(),
                                            dex_cache,
                                            0),
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index e5123de..fdf8cc9 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -20,19 +20,6 @@
 namespace art {
 
 /**
- * Returns true if instruction is invariant within the given loop.
- */
-static bool IsLoopInvariant(HLoopInformation* loop, HInstruction* instruction) {
-  HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation();
-  if (other_loop != loop) {
-    // If instruction does not occur in same loop, it is invariant
-    // if it appears in an outer loop (including no loop at all).
-    return other_loop == nullptr || loop->IsIn(*other_loop);
-  }
-  return false;
-}
-
-/**
  * Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
  * along dependences, viz. any of (a, b, c, d), (d, a, b, c)  (c, d, a, b), (b, c, d, a) assuming
  * a chain of dependences (mutual independent items may occur in arbitrary order). For proper
@@ -47,7 +34,7 @@
   size_t phi_pos = -1;
   const size_t size = scc->size();
   for (size_t i = 0; i < size; i++) {
-    HInstruction* other = scc->at(i);
+    HInstruction* other = (*scc)[i];
     if (other->IsLoopHeaderPhi() && (phi == nullptr || phis.FoundBefore(other, phi))) {
       phi = other;
       phi_pos = i;
@@ -58,8 +45,7 @@
   if (phi != nullptr) {
     new_scc->clear();
     for (size_t i = 0; i < size; i++) {
-      DCHECK_LT(phi_pos, size);
-      new_scc->push_back(scc->at(phi_pos));
+      new_scc->push_back((*scc)[phi_pos]);
       if (++phi_pos >= size) phi_pos = 0;
     }
     DCHECK_EQ(size, new_scc->size());
@@ -602,15 +588,16 @@
   //     an unsigned entity, for example, as in the following loop that uses the full range:
   //     for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX
   // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in:
-  //     for (int i = 12; i < U; i++) // TC = 0 when U >= 12
+  //     for (int i = 12; i < U; i++) // TC = 0 when U < 12
   //     If this cannot be determined at compile-time, the TC is only valid within the
-  //     loop-body proper, not the loop-header unless enforced with an explicit condition.
+  //     loop-body proper, not the loop-header unless enforced with an explicit taken-test.
   // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in:
   //     for (int i = 0; i <= U; i++) // TC = Inf when U = INT_MAX
   //     If this cannot be determined at compile-time, the TC is only valid when enforced
-  //     with an explicit condition.
+  //     with an explicit finite-test.
   // (4) For loops which early-exits, the TC forms an upper bound, as in:
   //     for (int i = 0; i < 10 && ....; i++) // TC <= 10
+  InductionInfo* trip_count = upper_expr;
   const bool is_taken = IsTaken(lower_expr, upper_expr, cmp);
   const bool is_finite = IsFinite(upper_expr, stride_value, type, cmp);
   const bool cancels = (cmp == kCondLT || cmp == kCondGT) && std::abs(stride_value) == 1;
@@ -618,26 +605,36 @@
     // Convert exclusive integral inequality into inclusive integral inequality,
     // viz. condition i < U is i <= U - 1 and condition i > U is i >= U + 1.
     if (cmp == kCondLT) {
-      upper_expr = CreateInvariantOp(kSub, upper_expr, CreateConstant(1, type));
+      trip_count = CreateInvariantOp(kSub, trip_count, CreateConstant(1, type));
     } else if (cmp == kCondGT) {
-      upper_expr = CreateInvariantOp(kAdd, upper_expr, CreateConstant(1, type));
+      trip_count = CreateInvariantOp(kAdd, trip_count, CreateConstant(1, type));
     }
     // Compensate for stride.
-    upper_expr = CreateInvariantOp(kAdd, upper_expr, stride);
+    trip_count = CreateInvariantOp(kAdd, trip_count, stride);
   }
-  InductionInfo* trip_count
-      = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, upper_expr, lower_expr), stride);
+  trip_count = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, trip_count, lower_expr), stride);
   // Assign the trip-count expression to the loop control. Clients that use the information
   // should be aware that the expression is only valid under the conditions listed above.
-  InductionOp tcKind = kTripCountInBodyUnsafe;
+  InductionOp tcKind = kTripCountInBodyUnsafe;  // needs both tests
   if (is_taken && is_finite) {
-    tcKind = kTripCountInLoop;
+    tcKind = kTripCountInLoop;  // needs neither test
   } else if (is_finite) {
-    tcKind = kTripCountInBody;
+    tcKind = kTripCountInBody;  // needs taken-test
   } else if (is_taken) {
-    tcKind = kTripCountInLoopUnsafe;
+    tcKind = kTripCountInLoopUnsafe;  // needs finite-test
   }
-  AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), CreateTripCount(tcKind, trip_count));
+  InductionOp op = kNop;
+  switch (cmp) {
+    case kCondLT: op = kLT; break;
+    case kCondLE: op = kLE; break;
+    case kCondGT: op = kGT; break;
+    case kCondGE: op = kGE; break;
+    default:      LOG(FATAL) << "CONDITION UNREACHABLE";
+  }
+  InductionInfo* taken_test = CreateInvariantOp(op, lower_expr, upper_expr);
+  AssignInfo(loop,
+             loop->GetHeader()->GetLastInstruction(),
+             CreateTripCount(tcKind, trip_count, taken_test));
 }
 
 bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr,
@@ -651,8 +648,7 @@
       case kCondLE: return lower_value <= upper_value;
       case kCondGT: return lower_value >  upper_value;
       case kCondGE: return lower_value >= upper_value;
-      case kCondEQ:
-      case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE";
+      default:      LOG(FATAL) << "CONDITION UNREACHABLE";
     }
   }
   return false;  // not certain, may be untaken
@@ -681,8 +677,8 @@
           (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1));
     case kCondGE:
       return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value));
-    case kCondEQ:
-    case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE";
+    default:
+      LOG(FATAL) << "CONDITION UNREACHABLE";
   }
   return false;  // not certain, may be infinite
 }
@@ -709,7 +705,7 @@
       return loop_it->second;
     }
   }
-  if (IsLoopInvariant(loop, instruction)) {
+  if (loop->IsLoopInvariant(instruction, true)) {
     InductionInfo* info = CreateInvariantFetch(instruction);
     AssignInfo(loop, instruction, info);
     return info;
@@ -831,12 +827,16 @@
       std::string inv = "(";
       inv += InductionToString(info->op_a);
       switch (info->operation) {
-        case kNop:   inv += " @ "; break;
-        case kAdd:   inv += " + "; break;
+        case kNop:   inv += " @ ";  break;
+        case kAdd:   inv += " + ";  break;
         case kSub:
-        case kNeg:   inv += " - "; break;
-        case kMul:   inv += " * "; break;
-        case kDiv:   inv += " / "; break;
+        case kNeg:   inv += " - ";  break;
+        case kMul:   inv += " * ";  break;
+        case kDiv:   inv += " / ";  break;
+        case kLT:    inv += " < ";  break;
+        case kLE:    inv += " <= "; break;
+        case kGT:    inv += " > ";  break;
+        case kGE:    inv += " >= "; break;
         case kFetch:
           DCHECK(info->fetch);
           if (IsIntAndGet(info, &value)) {
@@ -845,10 +845,10 @@
             inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName();
           }
           break;
-        case kTripCountInLoop:       inv += "TC-loop:"; break;
-        case kTripCountInBody:       inv += "TC-body:"; break;
-        case kTripCountInLoopUnsafe: inv += "TC-loop-unsafe:"; break;
-        case kTripCountInBodyUnsafe: inv += "TC-body-unsafe:"; break;
+        case kTripCountInLoop:       inv += " (TC-loop) ";        break;
+        case kTripCountInBody:       inv += " (TC-body) ";        break;
+        case kTripCountInLoopUnsafe: inv += " (TC-loop-unsafe) "; break;
+        case kTripCountInBodyUnsafe: inv += " (TC-body-unsafe) "; break;
       }
       inv += InductionToString(info->op_b);
       return inv + ")";
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 7ab80cd..cf35409 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -65,11 +65,16 @@
     kMul,
     kDiv,
     kFetch,
-    // Trip counts (valid in full loop or only body proper; unsafe implies loop may be infinite).
-    kTripCountInLoop,
-    kTripCountInBody,
-    kTripCountInLoopUnsafe,
-    kTripCountInBodyUnsafe
+    // Trip-counts.
+    kTripCountInLoop,        // valid in full loop; loop is finite
+    kTripCountInBody,        // valid in body only; loop is finite
+    kTripCountInLoopUnsafe,  // valid in full loop; loop may be infinite
+    kTripCountInBodyUnsafe,  // valid in body only; loop may be infinite
+    // Comparisons for trip-count tests.
+    kLT,
+    kLE,
+    kGT,
+    kGE
   };
 
   /**
@@ -85,7 +90,7 @@
    *   (4) periodic
    *         nop: a, then defined by b (repeated when exhausted)
    *   (5) trip-count:
-   *         tc: defined by b
+   *         tc: defined by a, taken-test in b
    */
   struct InductionInfo : public ArenaObject<kArenaAllocInductionVarAnalysis> {
     InductionInfo(InductionClass ic,
@@ -119,8 +124,9 @@
     return new (graph_->GetArena()) InductionInfo(kInvariant, kFetch, nullptr, nullptr, f);
   }
 
-  InductionInfo* CreateTripCount(InductionOp op, InductionInfo* b) {
-    return new (graph_->GetArena()) InductionInfo(kInvariant, op, nullptr, b, nullptr);
+  InductionInfo* CreateTripCount(InductionOp op, InductionInfo* a, InductionInfo* b) {
+    DCHECK(a != nullptr);
+    return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr);
   }
 
   InductionInfo* CreateInduction(InductionClass ic, InductionInfo* a, InductionInfo* b) {
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 20492e7..b7262f6 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -20,7 +20,6 @@
 #include "builder.h"
 #include "gtest/gtest.h"
 #include "induction_var_analysis.h"
-#include "induction_var_range.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 
@@ -78,7 +77,8 @@
     graph_->SetExitBlock(exit_);
 
     // Provide entry and exit instructions.
-    parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot, true);
+    parameter_ = new (&allocator_) HParameterValue(
+        graph_->GetDexFile(), 0, 0, Primitive::kPrimNot, true);
     entry_->AddInstruction(parameter_);
     constant0_ = graph_->GetIntConstant(0);
     constant1_ = graph_->GetIntConstant(1);
@@ -234,7 +234,7 @@
   EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[0], 0).c_str());
 
   // Trip-count.
-  EXPECT_STREQ("(TC-loop:(100))",
+  EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
                GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
 }
 
@@ -522,36 +522,6 @@
   EXPECT_STREQ("periodic(( - (1)), (0))", GetInductionInfo(neg, 0).c_str());
 }
 
-TEST_F(InductionVarAnalysisTest, FindRange) {
-  // Setup:
-  // for (int i = 0; i < 100; i++) {
-  //   k = i << 1;
-  //   k = k + 1;
-  //   a[k] = 0;
-  // }
-  BuildLoopNest(1);
-  HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0);
-  InsertLocalStore(induc_, shl, 0);
-  HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(induc_, add, 0);
-  HInstruction* store = InsertArrayStore(induc_, 0);
-  PerformInductionVarAnalysis();
-
-  EXPECT_STREQ("((2) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
-
-  InductionVarRange range(iva_);
-  InductionVarRange::Value v_min = range.GetMinInduction(store, store->InputAt(1));
-  InductionVarRange::Value v_max = range.GetMaxInduction(store, store->InputAt(1));
-  ASSERT_TRUE(v_min.is_known);
-  EXPECT_EQ(0, v_min.a_constant);
-  EXPECT_EQ(1, v_min.b_constant);
-  ASSERT_TRUE(v_max.is_known);
-  EXPECT_EQ(0, v_max.a_constant);
-  EXPECT_EQ(199, v_max.b_constant);
-}
-
 TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) {
   // Setup:
   // k = 0;
@@ -582,7 +552,7 @@
     }
     EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[d], d).c_str());
     // Trip-count.
-    EXPECT_STREQ("(TC-loop:(100))",
+    EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))",
                  GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str());
   }
 }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index db12819..5530d26 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -75,6 +75,13 @@
   return v;
 }
 
+static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) {
+  DCHECK(preheader != nullptr);
+  DCHECK(instruction != nullptr);
+  preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction());
+  return instruction;
+}
+
 //
 // Public class methods.
 //
@@ -94,6 +101,21 @@
   return SimplifyMax(GetInduction(context, instruction, /* is_min */ false));
 }
 
+bool InductionVarRange::CanGenerateCode(HInstruction* context,
+                                        HInstruction* instruction,
+                                        /*out*/bool* top_test) {
+  return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test);
+}
+
+bool InductionVarRange::GenerateCode(HInstruction* context,
+                                     HInstruction* instruction,
+                                     HGraph* graph,
+                                     HBasicBlock* block,
+                                     /*out*/HInstruction** lower,
+                                     /*out*/HInstruction** upper) {
+  return GenerateCode(context, instruction, graph, block, lower, upper, nullptr);
+}
+
 //
 // Private class methods.
 //
@@ -130,7 +152,7 @@
     }
   } else if (is_min) {
     // Special case for finding minimum: minimum of trip-count in loop-body is 1.
-    if (trip != nullptr && in_body && instruction == trip->op_b->fetch) {
+    if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
       return Value(1);
     }
   }
@@ -162,15 +184,15 @@
           case HInductionVarAnalysis::kFetch:
             return GetFetch(info->fetch, trip, in_body, is_min);
           case HInductionVarAnalysis::kTripCountInLoop:
-            if (!in_body) {
-              return is_min ? Value(0)
-                            : GetVal(info->op_b, trip, in_body, is_min);   // one extra!
+            if (!in_body && !is_min) {  // one extra!
+              return GetVal(info->op_a, trip, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
-            if (in_body) {
-              return is_min ? Value(0)
-                            : SubValue(GetVal(info->op_b, trip, in_body, is_min), Value(1));
+            if (is_min) {
+              return Value(0);
+            } else if (in_body) {
+              return SubValue(GetVal(info->op_a, trip, in_body, is_min), Value(1));
             }
             break;
           default:
@@ -256,9 +278,11 @@
 bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) {
   Value v_min = GetVal(info, nullptr, false, /* is_min */ true);
   Value v_max = GetVal(info, nullptr, false, /* is_min */ false);
-  if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) {
-    *value = v_min.b_constant;
-    return true;
+  if (v_min.is_known && v_max.is_known) {
+    if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) {
+      *value = v_min.b_constant;
+      return true;
+    }
   }
   return false;
 }
@@ -326,4 +350,129 @@
   return Value();
 }
 
+bool InductionVarRange::GenerateCode(HInstruction* context,
+                                     HInstruction* instruction,
+                                     HGraph* graph,
+                                     HBasicBlock* block,
+                                     /*out*/HInstruction** lower,
+                                     /*out*/HInstruction** upper,
+                                     /*out*/bool* top_test) {
+  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (loop != nullptr) {
+    HBasicBlock* header = loop->GetHeader();
+    bool in_body = context->GetBlock() != header;
+    HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
+    HInductionVarAnalysis::InductionInfo* trip =
+        induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+    if (info != nullptr && trip != nullptr) {
+      if (top_test != nullptr) {
+        *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop;
+      }
+      return
+        // Success on lower if invariant (not set), or code can be generated.
+        ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
+            GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
+        // And success on upper.
+        GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
+                                     HInductionVarAnalysis::InductionInfo* trip,
+                                     HGraph* graph,  // when set, code is generated
+                                     HBasicBlock* block,
+                                     /*out*/HInstruction** result,
+                                     bool in_body,
+                                     bool is_min) {
+  if (info != nullptr) {
+    Primitive::Type type = Primitive::kPrimInt;
+    HInstruction* opa = nullptr;
+    HInstruction* opb = nullptr;
+    int32_t value = 0;
+    switch (info->induction_class) {
+      case HInductionVarAnalysis::kInvariant:
+        // Invariants.
+        switch (info->operation) {
+          case HInductionVarAnalysis::kAdd:
+            if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+              }
+              return true;
+            }
+            break;
+          case HInductionVarAnalysis::kSub:  // second reversed!
+            if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
+              if (graph != nullptr) {
+                *result = Insert(block, new (graph->GetArena()) HSub(type, opa, opb));
+              }
+              return true;
+            }
+            break;
+          case HInductionVarAnalysis::kNeg:  // reversed!
+            if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
+              if (graph != nullptr) {
+                *result = Insert(block, new (graph->GetArena()) HNeg(type, opb));
+              }
+              return true;
+            }
+            break;
+          case HInductionVarAnalysis::kFetch:
+            if (graph != nullptr) {
+              *result = info->fetch;  // already in HIR
+            }
+            return true;
+          case HInductionVarAnalysis::kTripCountInLoop:
+            if (!in_body && !is_min) {  // one extra!
+              return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min);
+            }
+            FALLTHROUGH_INTENDED;
+          case HInductionVarAnalysis::kTripCountInBody:
+            if (is_min) {
+              if (graph != nullptr) {
+                *result = graph->GetIntConstant(0);
+              }
+              return true;
+            } else if (in_body) {
+              if (GenerateCode(info->op_a, trip, graph, block, &opb, in_body, is_min)) {
+                if (graph != nullptr) {
+                  *result = Insert(block,
+                                   new (graph->GetArena())
+                                       HSub(type, opb, graph->GetIntConstant(1)));
+                }
+                return true;
+              }
+            }
+            break;
+          default:
+            break;
+        }
+        break;
+      case HInductionVarAnalysis::kLinear:
+        // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
+        // to avoid arithmetic wrap-around situations that are hard to guard against.
+        if (GetConstant(info->op_a, &value)) {
+          if (value == 1 || value == -1) {
+            const bool is_min_a = value == 1 ? is_min : !is_min;
+            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+              }
+              return true;
+            }
+          }
+        }
+        break;
+      default:  // TODO(ajcbik): add more cases
+        break;
+    }
+  }
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index dbdd2ee..7fa5a26 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -68,6 +68,33 @@
    */
   Value GetMaxInduction(HInstruction* context, HInstruction* instruction);
 
+  /**
+   * Returns true if range analysis is able to generate code for the lower and upper bound
+   * expressions on the instruction in the given context. Output parameter top_test denotes
+   * whether a top test is needed to protect the trip-count expression evaluation.
+   */
+  bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test);
+
+  /**
+   * Generates the actual code in the HIR for the lower and upper bound expressions on the
+   * instruction in the given context. Code for the lower and upper bound expression are
+   * generated in given block and graph and are returned in lower and upper, respectively.
+   * For a loop invariant, lower is not set.
+   *
+   * For example, given expression x+i with range [0, 5] for i, calling this method
+   * will generate the following sequence:
+   *
+   * block:
+   *   lower: add x, 0
+   *   upper: add x, 5
+   */
+  bool GenerateCode(HInstruction* context,
+                    HInstruction* instruction,
+                    HGraph* graph,
+                    HBasicBlock* block,
+                    /*out*/HInstruction** lower,
+                    /*out*/HInstruction** upper);
+
  private:
   //
   // Private helper methods.
@@ -102,6 +129,27 @@
   static Value DivValue(Value v1, Value v2);
   static Value MergeVal(Value v1, Value v2, bool is_min);
 
+  /**
+   * Generates code for lower/upper expression in the HIR. Returns true on success.
+   * With graph == nullptr, the method can be used to determine if code generation
+   * would be successful without generating actual code yet.
+   */
+  bool GenerateCode(HInstruction* context,
+                    HInstruction* instruction,
+                    HGraph* graph,
+                    HBasicBlock* block,
+                    /*out*/HInstruction** lower,
+                    /*out*/HInstruction** upper,
+                    bool* top_test);
+
+  static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
+                           HInductionVarAnalysis::InductionInfo* trip,
+                           HGraph* graph,
+                           HBasicBlock* block,
+                           /*out*/HInstruction** result,
+                           bool in_body,
+                           bool is_min);
+
   /** Results of prior induction variable analysis. */
   HInductionVarAnalysis *induction_analysis_;
 
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 4497a88..ce8926a 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -49,12 +49,52 @@
   /** Constructs bare minimum graph. */
   void BuildGraph() {
     graph_->SetNumberOfVRegs(1);
-    HBasicBlock* entry_block = new (&allocator_) HBasicBlock(graph_);
-    HBasicBlock* exit_block = new (&allocator_) HBasicBlock(graph_);
-    graph_->AddBlock(entry_block);
-    graph_->AddBlock(exit_block);
-    graph_->SetEntryBlock(entry_block);
-    graph_->SetExitBlock(exit_block);
+    entry_block_ = new (&allocator_) HBasicBlock(graph_);
+    exit_block_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(entry_block_);
+    graph_->AddBlock(exit_block_);
+    graph_->SetEntryBlock(entry_block_);
+    graph_->SetExitBlock(exit_block_);
+  }
+
+  /** Constructs loop with given upper bound. */
+  void BuildLoop(HInstruction* upper) {
+    // Control flow.
+    loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(loop_preheader_);
+    HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(loop_header);
+    HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(loop_body);
+    entry_block_->AddSuccessor(loop_preheader_);
+    loop_preheader_->AddSuccessor(loop_header);
+    loop_header->AddSuccessor(loop_body);
+    loop_header->AddSuccessor(exit_block_);
+    loop_body->AddSuccessor(loop_header);
+    // Instructions.
+    HLocal* induc = new (&allocator_) HLocal(0);
+    entry_block_->AddInstruction(induc);
+    loop_preheader_->AddInstruction(
+        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0)));  // i = 0
+    loop_preheader_->AddInstruction(new (&allocator_) HGoto());
+    HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
+          loop_header->AddInstruction(load);
+    condition_ = new (&allocator_) HLessThan(load, upper);
+    loop_header->AddInstruction(condition_);
+    loop_header->AddInstruction(new (&allocator_) HIf(condition_));  // i < u
+    load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
+    loop_body->AddInstruction(load);
+    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1));
+    loop_body->AddInstruction(increment_);
+    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i++
+    loop_body->AddInstruction(new (&allocator_) HGoto());
+    exit_block_->AddInstruction(new (&allocator_) HReturnVoid());
+  }
+
+  /** Performs induction variable analysis. */
+  void PerformInductionVarAnalysis() {
+    ASSERT_TRUE(graph_->TryBuildingSsa());
+    iva_->Run();
   }
 
   /** Constructs an invariant. */
@@ -85,7 +125,7 @@
 
   /** Constructs a trip-count. */
   HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) {
-    return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc));
+    return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
   }
 
   /** Constructs a linear a * i + b induction. */
@@ -146,15 +186,20 @@
   ArenaPool pool_;
   ArenaAllocator allocator_;
   HGraph* graph_;
+  HBasicBlock* entry_block_;
+  HBasicBlock* exit_block_;
+  HBasicBlock* loop_preheader_;
   HInductionVarAnalysis* iva_;
 
-  // Two dummy instructions.
+  // Instructions.
+  HInstruction* condition_;
+  HInstruction* increment_;
   HReturnVoid x_;
   HReturnVoid y_;
 };
 
 //
-// The actual InductionVarRange tests.
+// Tests on static methods.
 //
 
 TEST_F(InductionVarRangeTest, GetMinMaxNull) {
@@ -349,4 +394,83 @@
   ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50)));
 }
 
+//
+// Tests on instance methods.
+//
+
+TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) {
+  BuildLoop(graph_->GetIntConstant(1000));
+  PerformInductionVarAnalysis();
+  InductionVarRange range(iva_);
+
+  // In context of header: known.
+  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
+  ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+
+  // In context of loop-body: known.
+  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
+  ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0)));
+  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
+  ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_));
+}
+
+TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) {
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  entry_block_->AddInstruction(parameter);
+  BuildLoop(parameter);
+  PerformInductionVarAnalysis();
+  InductionVarRange range(iva_);
+
+  // In context of header: full range unknown.
+  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
+  ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+
+  // In context of loop-body: known.
+  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
+  ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0)));
+  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
+  ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_));
+}
+
+TEST_F(InductionVarRangeTest, CodeGeneration) {
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  entry_block_->AddInstruction(parameter);
+  BuildLoop(parameter);
+  PerformInductionVarAnalysis();
+  InductionVarRange range(iva_);
+
+  HInstruction* lower = nullptr;
+  HInstruction* upper = nullptr;
+  bool top_test = false;
+
+  // Can generate code in context of loop-body only.
+  EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test));
+  ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test));
+  EXPECT_TRUE(top_test);
+
+  // Generates code.
+  EXPECT_TRUE(range.GenerateCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper));
+
+  // Verify lower is 0+0.
+  ASSERT_TRUE(lower != nullptr);
+  ASSERT_TRUE(lower->IsAdd());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(0, lower->InputAt(0)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify upper is (V-1)+0
+  ASSERT_TRUE(upper != nullptr);
+  ASSERT_TRUE(upper->IsAdd());
+  ASSERT_TRUE(upper->InputAt(0)->IsSub());
+  EXPECT_TRUE(upper->InputAt(0)->InputAt(0)->IsParameterValue());
+  ASSERT_TRUE(upper->InputAt(0)->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f3b5f08..353881e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -32,6 +32,7 @@
 #include "optimizing_compiler.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "sharpening.h"
 #include "ssa_phi_elimination.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -396,12 +397,14 @@
   HDeadCodeElimination dce(callee_graph, stats_);
   HConstantFolding fold(callee_graph);
   ReferenceTypePropagation type_propagation(callee_graph, handles_);
+  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
     &type_propagation,
+    &sharpening,
     &simplify,
     &dce,
     &fold,
@@ -415,6 +418,7 @@
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
+                     codegen_,
                      outer_compilation_unit_,
                      dex_compilation_unit,
                      compiler_driver_,
@@ -484,17 +488,40 @@
         return false;
       }
 
-      if (!same_dex_file && current->NeedsDexCache()) {
+      if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                        << " could not be inlined because " << current->DebugName()
                        << " it is in a different dex file and requires access to the dex cache";
         return false;
       }
+
+      if (current->IsNewInstance() &&
+          (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) {
+        // Allocation entrypoint does not handle inlined frames.
+        return false;
+      }
+
+      if (current->IsNewArray() &&
+          (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) {
+        // Allocation entrypoint does not handle inlined frames.
+        return false;
+      }
+
+      if (current->IsUnresolvedStaticFieldGet() ||
+          current->IsUnresolvedInstanceFieldGet() ||
+          current->IsUnresolvedStaticFieldSet() ||
+          current->IsUnresolvedInstanceFieldSet()) {
+        // Entrypoint for unresolved fields does not handle inlined frames.
+        return false;
+      }
     }
   }
   number_of_inlined_instructions_ += number_of_instructions;
 
   HInstruction* return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+  if (return_replacement != nullptr) {
+    DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
+  }
 
   // When merging the graph we might create a new NullConstant in the caller graph which does
   // not have the chance to be typed. We assign the correct type here so that we can keep the
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index bce5915..0f6a945 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -22,6 +22,7 @@
 
 namespace art {
 
+class CodeGenerator;
 class CompilerDriver;
 class DexCompilationUnit;
 class HGraph;
@@ -31,6 +32,7 @@
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
            CompilerDriver* compiler_driver,
@@ -40,6 +42,7 @@
       : HOptimization(outer_graph, kInlinerPassName, stats),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
+        codegen_(codegen),
         compiler_driver_(compiler_driver),
         depth_(depth),
         number_of_inlined_instructions_(0),
@@ -57,6 +60,7 @@
 
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
+  CodeGenerator* const codegen_;
   CompilerDriver* const compiler_driver_;
   const size_t depth_;
   size_t number_of_inlined_instructions_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 86a3ad9..b97dc1a 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,15 +16,16 @@
 
 #include "instruction_simplifier.h"
 
+#include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
 
-class InstructionSimplifierVisitor : public HGraphVisitor {
+class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
  public:
   InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
-      : HGraphVisitor(graph),
+      : HGraphDelegateVisitor(graph),
         stats_(stats) {}
 
   void Run();
@@ -71,9 +72,14 @@
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitFakeString(HFakeString* fake_string) OVERRIDE;
+  void VisitInvoke(HInvoke* invoke) OVERRIDE;
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifySystemArrayCopy(HInvoke* invoke);
+  void SimplifyStringEquals(HInvoke* invoke);
+
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -240,6 +246,12 @@
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
+  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+  if (load_class->NeedsAccessCheck()) {
+    // If we need to perform an access check we cannot remove the instruction.
+    return;
+  }
+
   if (CanEnsureNotNullAt(object, check_cast)) {
     check_cast->ClearMustDoNullCheck();
   }
@@ -253,7 +265,6 @@
   }
 
   bool outcome;
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
@@ -275,6 +286,12 @@
 
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
+  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
+  if (load_class->NeedsAccessCheck()) {
+    // If we need to perform an access check we cannot remove the instruction.
+    return;
+  }
+
   bool can_be_null = true;
   if (CanEnsureNotNullAt(object, instruction)) {
     can_be_null = false;
@@ -290,7 +307,6 @@
   }
 
   bool outcome;
-  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
@@ -603,13 +619,15 @@
   VisitCondition(condition);
 }
 
+// TODO: unsigned comparisons too?
+
 void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
   // Try to fold an HCompare into this HCondition.
 
   // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS64.
+  // TODO: Implement it for MIPS and MIPS64.
   InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  if (instruction_set == kMips64) {
+  if (instruction_set == kMips || instruction_set == kMips64) {
     return;
   }
 
@@ -1037,4 +1055,113 @@
   instruction->GetBlock()->RemoveInstruction(instruction);
 }
 
+void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
+  HInstruction* argument = instruction->InputAt(1);
+  HInstruction* receiver = instruction->InputAt(0);
+  if (receiver == argument) {
+    // Because String.equals is an instance call, the receiver is
+    // a null check if we don't know it's null. The argument however, will
+    // be the actual object. So we cannot end up in a situation where both
+    // are equal but could be null.
+    DCHECK(CanEnsureNotNullAt(argument, instruction));
+    instruction->ReplaceWith(GetGraph()->GetIntConstant(1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  } else {
+    StringEqualsOptimizations optimizations(instruction);
+    if (CanEnsureNotNullAt(argument, instruction)) {
+      optimizations.SetArgumentNotNull();
+    }
+    ScopedObjectAccess soa(Thread::Current());
+    ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo();
+    if (argument_rti.IsValid() && argument_rti.IsStringClass()) {
+      optimizations.SetArgumentIsString();
+    }
+  }
+}
+
+static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
+  if (potential_length->IsArrayLength()) {
+    return potential_length->InputAt(0) == potential_array;
+  }
+
+  if (potential_array->IsNewArray()) {
+    return potential_array->InputAt(0) == potential_length;
+  }
+
+  return false;
+}
+
+void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) {
+  HInstruction* source = instruction->InputAt(0);
+  HInstruction* destination = instruction->InputAt(2);
+  HInstruction* count = instruction->InputAt(4);
+  SystemArrayCopyOptimizations optimizations(instruction);
+  if (CanEnsureNotNullAt(source, instruction)) {
+    optimizations.SetSourceIsNotNull();
+  }
+  if (CanEnsureNotNullAt(destination, instruction)) {
+    optimizations.SetDestinationIsNotNull();
+  }
+  if (destination == source) {
+    optimizations.SetDestinationIsSource();
+  }
+
+  if (IsArrayLengthOf(count, source)) {
+    optimizations.SetCountIsSourceLength();
+  }
+
+  if (IsArrayLengthOf(count, destination)) {
+    optimizations.SetCountIsDestinationLength();
+  }
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
+    if (destination_rti.IsValid()) {
+      if (destination_rti.IsObjectArray()) {
+        if (destination_rti.IsExact()) {
+          optimizations.SetDoesNotNeedTypeCheck();
+        }
+        optimizations.SetDestinationIsTypedObjectArray();
+      }
+      if (destination_rti.IsPrimitiveArrayClass()) {
+        optimizations.SetDestinationIsPrimitiveArray();
+      } else if (destination_rti.IsNonPrimitiveArrayClass()) {
+        optimizations.SetDestinationIsNonPrimitiveArray();
+      }
+    }
+    ReferenceTypeInfo source_rti = source->GetReferenceTypeInfo();
+    if (source_rti.IsValid()) {
+      if (destination_rti.IsValid() && destination_rti.CanArrayHoldValuesOf(source_rti)) {
+        optimizations.SetDoesNotNeedTypeCheck();
+      }
+      if (source_rti.IsPrimitiveArrayClass()) {
+        optimizations.SetSourceIsPrimitiveArray();
+      } else if (source_rti.IsNonPrimitiveArrayClass()) {
+        optimizations.SetSourceIsNonPrimitiveArray();
+      }
+    }
+  }
+}
+
+void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
+  if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
+    SimplifyStringEquals(instruction);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
+    SimplifySystemArrayCopy(instruction);
+  }
+}
+
+void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) {
+  HInstruction* cond = deoptimize->InputAt(0);
+  if (cond->IsConstant()) {
+    if (cond->AsIntConstant()->IsZero()) {
+      // Never deopt: instruction can be removed.
+      deoptimize->GetBlock()->RemoveInstruction(deoptimize);
+    } else {
+      // Always deopt.
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 4b2d36f..eb79f46 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -16,8 +16,65 @@
 
 #include "instruction_simplifier_arm64.h"
 
+#include "mirror/array-inl.h"
+
 namespace art {
 namespace arm64 {
 
+void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
+                                                                     HInstruction* array,
+                                                                     HInstruction* index,
+                                                                     int access_size) {
+  if (index->IsConstant() ||
+      (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+    // When the index is a constant all the addressing can be fitted in the
+    // memory access instruction, so do not split the access.
+    return;
+  }
+  if (access->IsArraySet() &&
+      access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+    // The access may require a runtime call or the original array pointer.
+    return;
+  }
+
+  // Proceed to extract the base address computation.
+  ArenaAllocator* arena = GetGraph()->GetArena();
+
+  HIntConstant* offset =
+      GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
+  HArm64IntermediateAddress* address =
+      new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
+  access->GetBlock()->InsertInstructionBefore(address, access);
+  access->ReplaceInput(address, 0);
+  // Both instructions must depend on GC to prevent any instruction that can
+  // trigger GC to be inserted between the two.
+  access->AddSideEffects(SideEffects::DependsOnGC());
+  DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+  // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+  // is an HArm64IntermediateAddress and generate appropriate code.
+  // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+  // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
+  // not bring any advantages yet.
+  // Also see the comments in
+  // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
+  // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+  RecordSimplification();
+}
+
+void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
+  TryExtractArrayAccessAddress(instruction,
+                               instruction->GetArray(),
+                               instruction->GetIndex(),
+                               Primitive::ComponentSize(instruction->GetType()));
+}
+
+void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
+  TryExtractArrayAccessAddress(instruction,
+                               instruction->GetArray(),
+                               instruction->GetIndex(),
+                               Primitive::ComponentSize(instruction->GetComponentType()));
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index d7f4eae..4b697db 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,6 +35,14 @@
     }
   }
 
+  void TryExtractArrayAccessAddress(HInstruction* access,
+                                    HInstruction* array,
+                                    HInstruction* index,
+                                    int access_size);
+
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+  void VisitArraySet(HArraySet* instruction) OVERRIDE;
+
   OptimizingCompilerStats* stats_;
 };
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 9564622..dbe7524 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -210,6 +210,9 @@
     case kIntrinsicSystemArrayCopyCharArray:
       return Intrinsics::kSystemArrayCopyChar;
 
+    case kIntrinsicSystemArrayCopy:
+      return Intrinsics::kSystemArrayCopy;
+
     // Thread.currentThread.
     case kIntrinsicCurrentThread:
       return  Intrinsics::kThreadCurrentThread;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index d1a17b6..e459516 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -116,6 +116,80 @@
   DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
 };
 
+#define GENERIC_OPTIMIZATION(name, bit)                \
+public:                                                \
+void Set##name() { SetBit(k##name); }                  \
+bool Get##name() const { return IsBitSet(k##name); }   \
+private:                                               \
+static constexpr int k##name = bit
+
+class IntrinsicOptimizations : public ValueObject {
+ public:
+  explicit IntrinsicOptimizations(HInvoke* invoke) : value_(invoke->GetIntrinsicOptimizations()) {}
+  explicit IntrinsicOptimizations(const HInvoke& invoke)
+      : value_(invoke.GetIntrinsicOptimizations()) {}
+
+  static constexpr int kNumberOfGenericOptimizations = 2;
+  GENERIC_OPTIMIZATION(DoesNotNeedDexCache, 0);
+  GENERIC_OPTIMIZATION(DoesNotNeedEnvironment, 1);
+
+ protected:
+  bool IsBitSet(uint32_t bit) const {
+    return (*value_ & (1 << bit)) != 0u;
+  }
+
+  void SetBit(uint32_t bit) {
+    *(const_cast<uint32_t*>(value_)) |= (1 << bit);
+  }
+
+ private:
+  const uint32_t *value_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicOptimizations);
+};
+
+#undef GENERIC_OPTIMIZATION
+
+#define INTRINSIC_OPTIMIZATION(name, bit)                             \
+public:                                                               \
+void Set##name() { SetBit(k##name); }                                 \
+bool Get##name() const { return IsBitSet(k##name); }                  \
+private:                                                              \
+static constexpr int k##name = bit + kNumberOfGenericOptimizations
+
+class StringEqualsOptimizations : public IntrinsicOptimizations {
+ public:
+  explicit StringEqualsOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
+
+  INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0);
+  INTRINSIC_OPTIMIZATION(ArgumentIsString, 1);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations);
+};
+
+class SystemArrayCopyOptimizations : public IntrinsicOptimizations {
+ public:
+  explicit SystemArrayCopyOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
+
+  INTRINSIC_OPTIMIZATION(SourceIsNotNull, 0);
+  INTRINSIC_OPTIMIZATION(DestinationIsNotNull, 1);
+  INTRINSIC_OPTIMIZATION(DestinationIsSource, 2);
+  INTRINSIC_OPTIMIZATION(CountIsSourceLength, 3);
+  INTRINSIC_OPTIMIZATION(CountIsDestinationLength, 4);
+  INTRINSIC_OPTIMIZATION(DoesNotNeedTypeCheck, 5);
+  INTRINSIC_OPTIMIZATION(DestinationIsTypedObjectArray, 6);
+  INTRINSIC_OPTIMIZATION(DestinationIsNonPrimitiveArray, 7);
+  INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8);
+  INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9);
+  INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations);
+};
+
+#undef INTRISIC_OPTIMIZATION
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 938c78e..0a5acc3 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -961,6 +961,14 @@
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic does not always work when heap
+  // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
+  // off temporarily as a quick fix.
+  // TODO(rpl): Fix it and turn it back on.
+  if (kPoisonHeapReferences) {
+    return;
+  }
+
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1307,6 +1315,308 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
+    locations->SetInAt(3, Location::RequiresRegister());
+  }
+  if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+}
+
+static void CheckPosition(ArmAssembler* assembler,
+                          Location pos,
+                          Register input,
+                          Location length,
+                          SlowPathCode* slow_path,
+                          Register input_len,
+                          Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+        if (length.IsConstant()) {
+          __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
+        }
+        __ b(slow_path->GetEntryLabel(), LT);
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ LoadFromOffset(kLoadWord, input_len, input, length_offset);
+      __ subs(temp, input_len, ShifterOperand(pos_const));
+      __ b(slow_path->GetEntryLabel(), LT);
+
+      // Check that (length(input) - pos) >= length.
+      if (length.IsConstant()) {
+        __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
+      }
+      __ b(slow_path->GetEntryLabel(), LT);
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ cmp(pos_reg, ShifterOperand(0));
+    __ b(slow_path->GetEntryLabel(), LT);
+
+    // Check that pos <= length(input).
+    __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+    __ subs(temp, temp, ShifterOperand(pos_reg));
+    __ b(slow_path->GetEntryLabel(), LT);
+
+    // Check that (length(input) - pos) >= length.
+    if (length.IsConstant()) {
+      __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
+    }
+    __ b(slow_path->GetEntryLabel(), LT);
+  }
+}
+
+void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  Register src = locations->InAt(0).AsRegister<Register>();
+  Location src_pos = locations->InAt(1);
+  Register dest = locations->InAt(2).AsRegister<Register>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  Label ok;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource()) {
+    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
+      __ cmp(src, ShifterOperand(dest));
+    }
+  }
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ b(&ok, NE);
+      }
+      __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
+      __ b(slow_path->GetEntryLabel(), GT);
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ b(&ok, NE);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos_constant));
+    } else {
+      __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
+    }
+    __ b(slow_path->GetEntryLabel(), LT);
+  }
+
+  __ Bind(&ok);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ cmp(length.AsRegister<Register>(), ShifterOperand(0));
+    __ b(slow_path->GetEntryLabel(), LT);
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+    __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+    __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+    bool did_unpoison = false;
+    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+        !optimizations.GetSourceIsNonPrimitiveArray()) {
+      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // both to make the identity check valid.
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ MaybeUnpoisonHeapReference(temp2);
+      did_unpoison = true;
+    }
+
+    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+      // Bail out if the destination is not a non primitive array.
+      __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp3);
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+    }
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      // Bail out if the source is not a non primitive array.
+      // Bail out if the destination is not a non primitive array.
+      __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+      __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp3);
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+    }
+
+    __ cmp(temp1, ShifterOperand(temp2));
+
+    if (optimizations.GetDestinationIsTypedObjectArray()) {
+      Label do_copy;
+      __ b(&do_copy, EQ);
+      if (!did_unpoison) {
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+      // No need to unpoison the result, we're comparing against null.
+      __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+      __ Bind(&do_copy);
+    } else {
+      __ b(slow_path->GetEntryLabel(), NE);
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+    __ MaybeUnpoisonHeapReference(temp1);
+    __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+    __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
+    __ MaybeUnpoisonHeapReference(temp3);
+    __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+    static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+    __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+  }
+
+  // Compute base source address, base destination address, and end source address.
+
+  uint32_t element_size = sizeof(int32_t);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(temp1, src, element_size * constant + offset);
+  } else {
+    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+    __ AddConstant(temp1, offset);
+  }
+
+  if (dest_pos.IsConstant()) {
+    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(temp2, dest, element_size * constant + offset);
+  } else {
+    __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
+    __ AddConstant(temp2, offset);
+  }
+
+  if (length.IsConstant()) {
+    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(temp3, temp1, element_size * constant);
+  } else {
+    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+  }
+
+  // Iterate over the arrays and do a raw copy of the objects. We don't need to
+  // poison/unpoison, nor do any read barrier as the next uses of the destination
+  // array will do it.
+  Label loop, done;
+  __ cmp(temp1, ShifterOperand(temp3));
+  __ b(&done, EQ);
+  __ Bind(&loop);
+  __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+  __ str(IP, Address(temp2, element_size, Address::PostIndex));
+  __ cmp(temp1, ShifterOperand(temp3));
+  __ b(&loop, NE);
+  __ Bind(&done);
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       Register(kNoRegister),
+                       false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 2abb605..127e9a4 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -33,8 +33,10 @@
 
 class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor {
  public:
-  IntrinsicLocationsBuilderARM(ArenaAllocator* arena, const ArmInstructionSetFeatures& features)
-      : arena_(arena), features_(features) {}
+  IntrinsicLocationsBuilderARM(ArenaAllocator* arena,
+                               ArmAssembler* assembler,
+                               const ArmInstructionSetFeatures& features)
+      : arena_(arena), assembler_(assembler), features_(features) {}
 
   // Define visitor methods.
 
@@ -52,6 +54,7 @@
 
  private:
   ArenaAllocator* arena_;
+  ArmAssembler* assembler_;
 
   const ArmInstructionSetFeatures& features_;
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index b0cfd0d..059abf0 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1087,6 +1087,14 @@
   CreateIntIntIntIntIntToInt(arena_, invoke);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic does not always work when heap
+  // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
+  // off temporarily as a quick fix.
+  // TODO(rpl): Fix it and turn it back on.
+  if (kPoisonHeapReferences) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, invoke);
 }
 
@@ -1447,6 +1455,7 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index bfe5e55..8f1d5e1 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -58,6 +58,7 @@
   V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \
   V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \
   V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \
+  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \
   V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \
   V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \
   V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
new file mode 100644
index 0000000..5efcf4e
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_mips.h"
+
+#include "arch/mips/instruction_set_features_mips.h"
+#include "art_method.h"
+#include "code_generator_mips.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/mips/assembler_mips.h"
+#include "utils/mips/constants_mips.h"
+
+namespace art {
+
+namespace mips {
+
+IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen)
+  : arena_(codegen->GetGraph()->GetArena()) {
+}
+
+MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() {
+  return reinterpret_cast<MipsAssembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+#define __ codegen->GetAssembler()->
+
+static void MoveFromReturnRegister(Location trg,
+                                   Primitive::Type type,
+                                   CodeGeneratorMIPS* codegen) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
+    Register trg_reg = trg.AsRegister<Register>();
+    if (trg_reg != V0) {
+      __ Move(V0, trg_reg);
+    }
+  } else {
+    FRegister trg_reg = trg.AsFpuRegister<FRegister>();
+    if (trg_reg != F0) {
+      if (type == Primitive::kPrimFloat) {
+        __ MovS(F0, trg_reg);
+      } else {
+        __ MovD(F0, trg_reg);
+      }
+    }
+  }
+}
+
+static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
+  InvokeDexCallingConventionVisitorMIPS calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
+}
+
+// Slow-path for fallback (calling the managed code to handle the
+// intrinsic) in an intrinsified call. This will copy the arguments
+// into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations
+//       given by the invoke's location summary. If an intrinsic
+//       modifies those locations before a slowpath call, they must be
+//       restored!
+class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : invoke_(invoke) { }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in);
+
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    MoveArguments(invoke_, codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
+                                          Location::RegisterLocation(A0));
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+    } else {
+      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+      UNREACHABLE();
+    }
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathMIPS);
+};
+
+#undef __
+
+bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
+void IntrinsicLocationsBuilderMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                      \
+void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
+UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
+UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat)
+UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble)
+UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits)
+UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits)
+UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
+UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
+UNIMPLEMENTED_INTRINSIC(MathAbsInt)
+UNIMPLEMENTED_INTRINSIC(MathAbsLong)
+UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMinIntInt)
+UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(MathMaxIntInt)
+UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(MathSqrt)
+UNIMPLEMENTED_INTRINSIC(MathCeil)
+UNIMPLEMENTED_INTRINSIC(MathFloor)
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+UNIMPLEMENTED_INTRINSIC(MemoryPeekByte)
+UNIMPLEMENTED_INTRINSIC(MemoryPeekIntNative)
+UNIMPLEMENTED_INTRINSIC(MemoryPeekLongNative)
+UNIMPLEMENTED_INTRINSIC(MemoryPeekShortNative)
+UNIMPLEMENTED_INTRINSIC(MemoryPokeByte)
+UNIMPLEMENTED_INTRINSIC(MemoryPokeIntNative)
+UNIMPLEMENTED_INTRINSIC(MemoryPokeLongNative)
+UNIMPLEMENTED_INTRINSIC(MemoryPokeShortNative)
+UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread)
+UNIMPLEMENTED_INTRINSIC(UnsafeGet)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetObject)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePut)
+UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObject)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLong)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(StringCharAt)
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringEquals)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromString)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
+
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
+
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
new file mode 100644
index 0000000..c71b3c6
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace mips {
+
+class CodeGeneratorMIPS;
+class MipsAssembler;
+
+class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS);
+};
+
+class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorMIPS(CodeGeneratorMIPS* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  MipsAssembler* GetAssembler();
+
+  ArenaAllocator* GetAllocator();
+
+  CodeGeneratorMIPS* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS);
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b60905d..05c7eb0 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -43,6 +43,93 @@
   return codegen_->GetGraph()->GetArena();
 }
 
+#define __ codegen->GetAssembler()->
+
+static void MoveFromReturnRegister(Location trg,
+                                   Primitive::Type type,
+                                   CodeGeneratorMIPS64* codegen) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
+    GpuRegister trg_reg = trg.AsRegister<GpuRegister>();
+    if (trg_reg != V0) {
+      __ Move(V0, trg_reg);
+    }
+  } else {
+    FpuRegister trg_reg = trg.AsFpuRegister<FpuRegister>();
+    if (trg_reg != F0) {
+      if (type == Primitive::kPrimFloat) {
+        __ MovS(F0, trg_reg);
+      } else {
+        __ MovD(F0, trg_reg);
+      }
+    }
+  }
+}
+
+static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
+  InvokeDexCallingConventionVisitorMIPS64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
+}
+
+// Slow-path for fallback (calling the managed code to handle the
+// intrinsic) in an intrinsified call. This will copy the arguments
+// into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations
+//       given by the invoke's location summary. If an intrinsic
+//       modifies those locations before a slowpath call, they must be
+//       restored!
+class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : invoke_(invoke) { }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in);
+
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    MoveArguments(invoke_, codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
+                                          Location::RegisterLocation(A0));
+      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+    } else {
+      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+      UNREACHABLE();
+    }
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathMIPS64);
+};
+
+#undef __
+
 bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
@@ -185,7 +272,9 @@
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
 }
 
-static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+static void GenNumberOfLeadingZeroes(LocationSummary* locations,
+                                     bool is64bit,
+                                     Mips64Assembler* assembler) {
   GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
@@ -202,7 +291,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenCountZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfLeadingZeros(long i)
@@ -211,7 +300,168 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenCountZeroes(invoke->GetLocations(), true, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenNumberOfTrailingZeroes(LocationSummary* locations,
+                                      bool is64bit,
+                                      Mips64Assembler* assembler) {
+  Location in = locations->InAt(0);
+  Location out = locations->Out();
+
+  if (is64bit) {
+    __ Dsbh(out.AsRegister<GpuRegister>(), in.AsRegister<GpuRegister>());
+    __ Dshd(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>());
+    __ Dbitswap(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>());
+    __ Dclz(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>());
+  } else {
+    __ Rotr(out.AsRegister<GpuRegister>(), in.AsRegister<GpuRegister>(), 16);
+    __ Wsbh(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>());
+    __ Bitswap(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>());
+    __ Clz(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>());
+  }
+}
+
+// int java.lang.Integer.numberOfTrailingZeros(int i)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, GetAssembler());
+}
+
+// int java.lang.Long.numberOfTrailingZeros(long i)
+void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenRotateRight(HInvoke* invoke,
+                           Primitive::Type type,
+                           Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  LocationSummary* locations = invoke->GetLocations();
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    uint32_t shift = static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue());
+    if (type == Primitive::kPrimInt) {
+      shift &= 0x1f;
+      __ Rotr(out, in, shift);
+    } else {
+      shift &= 0x3f;
+      if (shift < 32) {
+        __ Drotr(out, in, shift);
+      } else {
+        shift &= 0x1f;
+        __ Drotr32(out, in, shift);
+      }
+    }
+  } else {
+    GpuRegister shamt = locations->InAt(1).AsRegister<GpuRegister>();
+    if (type == Primitive::kPrimInt) {
+      __ Rotrv(out, in, shamt);
+    } else {
+      __ Drotrv(out, in, shamt);
+    }
+  }
+}
+
+// int java.lang.Integer.rotateRight(int i, int distance)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerRotateRight(HInvoke* invoke) {
+  GenRotateRight(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.rotateRight(long i, int distance)
+void IntrinsicLocationsBuilderMIPS64::VisitLongRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongRotateRight(HInvoke* invoke) {
+  GenRotateRight(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenRotateLeft(HInvoke* invoke,
+                           Primitive::Type type,
+                           Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  LocationSummary* locations = invoke->GetLocations();
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    int32_t shift = -static_cast<int32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue());
+    if (type == Primitive::kPrimInt) {
+      shift &= 0x1f;
+      __ Rotr(out, in, shift);
+    } else {
+      shift &= 0x3f;
+      if (shift < 32) {
+        __ Drotr(out, in, shift);
+      } else {
+        shift &= 0x1f;
+        __ Drotr32(out, in, shift);
+      }
+    }
+  } else {
+    GpuRegister shamt = locations->InAt(1).AsRegister<GpuRegister>();
+    if (type == Primitive::kPrimInt) {
+      __ Subu(TMP, ZERO, shamt);
+      __ Rotrv(out, in, TMP);
+    } else {
+      __ Dsubu(TMP, ZERO, shamt);
+      __ Drotrv(out, in, TMP);
+    }
+  }
+}
+
+// int java.lang.Integer.rotateLeft(int i, int distance)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerRotateLeft(HInvoke* invoke) {
+  GenRotateLeft(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.rotateLeft(long i, int distance)
+void IntrinsicLocationsBuilderMIPS64::VisitLongRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongRotateLeft(HInvoke* invoke) {
+  GenRotateLeft(invoke, Primitive::kPrimLong, GetAssembler());
 }
 
 static void GenReverse(LocationSummary* locations,
@@ -508,17 +758,19 @@
   __ SqrtD(out, in);
 }
 
-static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPToFP(ArenaAllocator* arena,
+                         HInvoke* invoke,
+                         Location::OutputOverlap overlaps = Location::kOutputOverlap) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
-  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresFpuRegister(), overlaps);
 }
 
 // double java.lang.Math.rint(double)
 void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) {
-  CreateFPToFP(arena_, invoke);
+  CreateFPToFP(arena_, invoke, Location::kNoOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) {
@@ -542,15 +794,22 @@
                                              kQuietNaN |
                                              kSignalingNaN;
 
-void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
+enum FloatRoundingMode {
+  kFloor,
+  kCeil,
+};
+
+static void GenRoundingMode(LocationSummary* locations,
+                            FloatRoundingMode mode,
+                            Mips64Assembler* assembler) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
 
+  DCHECK_NE(in, out);
+
   Label done;
 
-  // double floor(double in) {
+  // double floor/ceil(double in) {
   //     if in.isNaN || in.isInfinite || in.isZero {
   //         return in;
   //     }
@@ -560,19 +819,23 @@
   __ MovD(out, in);
   __ Bnezc(AT, &done);
 
-  //     Long outLong = floor(in);
+  //     Long outLong = floor/ceil(in);
   //     if outLong == Long.MAX_VALUE {
-  //         // floor() has almost certainly returned a value which
-  //         // can't be successfully represented as a signed 64-bit
-  //         // number.  Java expects that the input value will be
-  //         // returned in these cases.
-  //         // There is also a small probability that floor(in)
-  //         // correctly truncates the input value to Long.MAX_VALUE.  In
-  //         // that case, this exception handling code still does the
-  //         // correct thing.
+  //         // floor()/ceil() has almost certainly returned a value
+  //         // which can't be successfully represented as a signed
+  //         // 64-bit number.  Java expects that the input value will
+  //         // be returned in these cases.
+  //         // There is also a small probability that floor(in)/ceil(in)
+  //         // correctly truncates/rounds up the input value to
+  //         // Long.MAX_VALUE.  In that case, this exception handling
+  //         // code still does the correct thing.
   //         return in;
   //     }
-  __ FloorLD(out, in);
+  if (mode == kFloor) {
+    __ FloorLD(out, in);
+  } else  if (mode == kCeil) {
+    __ CeilLD(out, in);
+  }
   __ Dmfc1(AT, out);
   __ MovD(out, in);
   __ LoadConst64(TMP, kPrimLongMax);
@@ -586,53 +849,17 @@
   // }
 }
 
+void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
+  GenRoundingMode(invoke->GetLocations(), kFloor, GetAssembler());
+}
+
 // double java.lang.Math.ceil(double)
 void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) {
   CreateFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
-  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
-  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
-  Label done;
-
-  // double ceil(double in) {
-  //     if in.isNaN || in.isInfinite || in.isZero {
-  //         return in;
-  //     }
-  __ ClassD(out, in);
-  __ Dmfc1(AT, out);
-  __ Andi(AT, AT, kFPLeaveUnchanged);   // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
-  __ MovD(out, in);
-  __ Bnezc(AT, &done);
-
-  //     Long outLong = ceil(in);
-  //     if outLong == Long.MAX_VALUE {
-  //         // ceil() has almost certainly returned a value which
-  //         // can't be successfully represented as a signed 64-bit
-  //         // number.  Java expects that the input value will be
-  //         // returned in these cases.
-  //         // There is also a small probability that ceil(in)
-  //         // correctly rounds up the input value to Long.MAX_VALUE.  In
-  //         // that case, this exception handling code still does the
-  //         // correct thing.
-  //         return in;
-  //     }
-  __ CeilLD(out, in);
-  __ Dmfc1(AT, out);
-  __ MovD(out, in);
-  __ LoadConst64(TMP, kPrimLongMax);
-  __ Beqc(AT, TMP, &done);
-
-  //     double out = outLong;
-  //     return out;
-  __ Dmtc1(AT, out);
-  __ Cvtdl(out, out);
-  __ Bind(&done);
-  // }
+  GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler());
 }
 
 // byte libcore.io.Memory.peekByte(long address)
@@ -765,6 +992,590 @@
                     Thread::PeerOffset<kMips64PointerSize>().Int32Value());
 }
 
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorMIPS64* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK((type == Primitive::kPrimInt) ||
+         (type == Primitive::kPrimLong) ||
+         (type == Primitive::kPrimNot));
+  Mips64Assembler* assembler = codegen->GetAssembler();
+  // Object pointer.
+  GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+  // Long offset.
+  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+  GpuRegister trg = locations->Out().AsRegister<GpuRegister>();
+
+  __ Daddu(TMP, base, offset);
+  if (is_volatile) {
+    __ Sync(0);
+  }
+  switch (type) {
+    case Primitive::kPrimInt:
+      __ Lw(trg, TMP, 0);
+      break;
+
+    case Primitive::kPrimNot:
+      __ Lwu(trg, TMP, 0);
+      break;
+
+    case Primitive::kPrimLong:
+      __ Ld(trg, TMP, 0);
+      break;
+
+    default:
+      LOG(FATAL) << "Unsupported op size " << type;
+      UNREACHABLE();
+  }
+}
+
+// int sun.misc.Unsafe.getInt(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+}
+
+// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+}
+
+// long sun.misc.Unsafe.getLong(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+}
+
+// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+}
+
+// Object sun.misc.Unsafe.getObject(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+}
+
+// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_ordered,
+                         CodeGeneratorMIPS64* codegen) {
+  DCHECK((type == Primitive::kPrimInt) ||
+         (type == Primitive::kPrimLong) ||
+         (type == Primitive::kPrimNot));
+  Mips64Assembler* assembler = codegen->GetAssembler();
+  // Object pointer.
+  GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+  // Long offset.
+  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+  GpuRegister value = locations->InAt(3).AsRegister<GpuRegister>();
+
+  __ Daddu(TMP, base, offset);
+  if (is_volatile || is_ordered) {
+    __ Sync(0);
+  }
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      __ Sw(value, TMP, 0);
+      break;
+
+    case Primitive::kPrimLong:
+      __ Sd(value, TMP, 0);
+      break;
+
+    default:
+      LOG(FATAL) << "Unsupported op size " << type;
+      UNREACHABLE();
+  }
+  if (is_volatile) {
+    __ Sync(0);
+  }
+
+  if (type == Primitive::kPrimNot) {
+    codegen->MarkGCCard(base, value);
+  }
+}
+
+// void sun.misc.Unsafe.putInt(Object o, long offset, int x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+}
+
+// void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+}
+
+// void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+}
+
+// void sun.misc.Unsafe.putObject(Object o, long offset, Object x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+}
+
+// void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+}
+
+// void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+}
+
+// void sun.misc.Unsafe.putLong(Object o, long offset, long x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+}
+
+// void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+}
+
+// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+}
+
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
+  Mips64Assembler* assembler = codegen->GetAssembler();
+  GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+  GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>();
+  GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  DCHECK_NE(base, out);
+  DCHECK_NE(offset, out);
+  DCHECK_NE(expected, out);
+
+  // do {
+  //   tmp_value = [tmp_ptr] - expected;
+  // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
+  // result = tmp_value != 0;
+
+  Label loop_head, exit_loop;
+  __ Daddu(TMP, base, offset);
+  __ Sync(0);
+  __ Bind(&loop_head);
+  if (type == Primitive::kPrimLong) {
+    __ Lld(out, TMP);
+  } else {
+    __ Ll(out, TMP);
+  }
+  __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
+  __ Sltiu(out, out, 1);                // value, set 'out' to false, and
+  __ Beqzc(out, &exit_loop);            // return.
+  __ Move(out, value);  // Use 'out' for the 'store conditional' instruction.
+                        // If we use 'value' directly, we would lose 'value'
+                        // in the case that the store fails.  Whether the
+                        // store succeeds, or fails, it will load the
+                        // correct boolean value into the 'out' register.
+  if (type == Primitive::kPrimLong) {
+    __ Scd(out, TMP);
+  } else {
+    __ Sc(out, TMP);
+  }
+  __ Beqzc(out, &loop_head);    // If we couldn't do the read-modify-write
+                                // cycle atomically then retry.
+  __ Bind(&exit_loop);
+  __ Sync(0);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+}
+
+// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
+void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+}
+
+// char java.lang.String.charAt(int index)
+void IntrinsicLocationsBuilderMIPS64::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+
+  // Location of reference to data array
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister idx = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  // TODO: Maybe we can support range check elimination. Overall,
+  //       though, I think it's not worth the cost.
+  // TODO: For simplicity, the index parameter is requested in a
+  //       register, so different from Quick we will not optimize the
+  //       code for constants (which would save a register).
+
+  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load the string size
+  __ Lw(TMP, obj, count_offset);
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Revert to slow path if idx is too large, or negative
+  __ Bgeuc(idx, TMP, slow_path->GetEntryLabel());
+
+  // out = obj[2*idx].
+  __ Sll(TMP, idx, 1);                  // idx * 2
+  __ Daddu(TMP, TMP, obj);              // Address of char at location idx
+  __ Lhu(out, TMP, value_offset);       // Load char at location idx
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// int java.lang.String.compareTo(String anotherString)
+void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  GpuRegister argument = locations->InAt(1).AsRegister<GpuRegister>();
+  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqzc(argument, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadDoubleword,
+                    TMP,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize,
+                                            pStringCompareTo).Int32Value());
+  __ Jalr(TMP);
+  __ Nop();
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  Mips64Assembler* assembler,
+                                  CodeGeneratorMIPS64* codegen,
+                                  ArenaAllocator* allocator,
+                                  bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+  GpuRegister tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<GpuRegister>() : TMP;
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we
+  // don't know statically, or directly dispatch if we have a constant.
+  SlowPathCodeMIPS64* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+      // Always needs the slow-path. We could directly dispatch to it,
+      // but this case should be rare, so for simplicity just put the
+      // full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>();
+    __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max());
+    slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ Bltuc(tmp_reg, char_reg, slow_path->GetEntryLabel());    // UTF-16 required
+  }
+
+  if (start_at_zero) {
+    DCHECK_EQ(tmp_reg, A2);
+    // Start-index = 0.
+    __ Clear(tmp_reg);
+  } else {
+    __ Slt(TMP, A2, ZERO);      // if fromIndex < 0
+    __ Seleqz(A2, A2, TMP);     //     fromIndex = 0
+  }
+
+  __ LoadFromOffset(kLoadDoubleword,
+                    TMP,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pIndexOf).Int32Value());
+  __ Jalr(TMP);
+  __ Nop();
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+// int java.lang.String.indexOf(int ch)
+void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime
+  // calling convention. So it's best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+
+  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+// int java.lang.String.indexOf(int ch, int fromIndex)
+void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime
+  // calling convention. So it's best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+// java.lang.String.String(byte[] bytes)
+void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister byte_array = locations->InAt(0).AsRegister<GpuRegister>();
+  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqzc(byte_array, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadDoubleword,
+                    TMP,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromBytes).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Jalr(TMP);
+  __ Nop();
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// java.lang.String.String(char[] value)
+void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+
+  __ LoadFromOffset(kLoadDoubleword,
+                    TMP,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromChars).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Jalr(TMP);
+  __ Nop();
+}
+
+// java.lang.String.String(String original)
+void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister string_to_copy = locations->InAt(0).AsRegister<GpuRegister>();
+  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqzc(string_to_copy, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadDoubleword,
+                    TMP,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromString).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Jalr(TMP);
+  __ Nop();
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -776,42 +1587,12 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
-UNIMPLEMENTED_INTRINSIC(UnsafeGet)
-UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
-UNIMPLEMENTED_INTRINSIC(UnsafeGetLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile)
-UNIMPLEMENTED_INTRINSIC(UnsafeGetObject)
-UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile)
-UNIMPLEMENTED_INTRINSIC(UnsafePut)
-UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered)
-UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile)
-UNIMPLEMENTED_INTRINSIC(UnsafePutObject)
-UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered)
-UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile)
-UNIMPLEMENTED_INTRINSIC(UnsafePutLong)
-UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered)
-UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
-UNIMPLEMENTED_INTRINSIC(StringCharAt)
-UNIMPLEMENTED_INTRINSIC(StringCompareTo)
 UNIMPLEMENTED_INTRINSIC(StringEquals)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
-UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
-UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars)
-UNIMPLEMENTED_INTRINSIC(StringNewStringFromString)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 318d3a6..040bf6a 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -45,7 +45,7 @@
 
 
 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
-  return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  return down_cast<X86Assembler*>(codegen_->GetAssembler());
 }
 
 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
@@ -1054,17 +1054,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ testl(arg, arg);
-  __ j(kEqual, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ testl(arg, arg);
+    __ j(kEqual, &return_false);
+  }
 
   // Instanceof check for the argument by comparing class fields.
   // All string objects must have the same type since String cannot be subclassed.
   // Receiver must be a string object, so its class field is equal to all strings' class fields.
   // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ movl(ecx, Address(str, class_offset));
-  __ cmpl(ecx, Address(arg, class_offset));
-  __ j(kNotEqual, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    __ movl(ecx, Address(str, class_offset));
+    __ cmpl(ecx, Address(arg, class_offset));
+    __ j(kNotEqual, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ cmpl(str, arg);
@@ -1723,7 +1728,7 @@
                          Primitive::Type type,
                          bool is_volatile,
                          CodeGeneratorX86* codegen) {
-  X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   Register base = locations->InAt(1).AsRegister<Register>();
   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
   Location value_loc = locations->InAt(3);
@@ -1817,7 +1822,7 @@
   locations->SetOut(Location::RegisterLocation(EAX));
   if (type == Primitive::kPrimNot) {
     // Need temp registers for card-marking.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     // Need a byte register for marking.
     locations->AddTemp(Location::RegisterLocation(ECX));
   }
@@ -1836,8 +1841,7 @@
 }
 
 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
-  X86Assembler* assembler =
-    reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
 
   Register base = locations->InAt(1).AsRegister<Register>();
@@ -1845,47 +1849,92 @@
   Location out = locations->Out();
   DCHECK_EQ(out.AsRegister<Register>(), EAX);
 
-  if (type == Primitive::kPrimLong) {
-    DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
-    DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
-    DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
-    DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
-    __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
-  } else {
-    // Integer or object.
+  if (type == Primitive::kPrimNot) {
     Register expected = locations->InAt(3).AsRegister<Register>();
+    // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
     DCHECK_EQ(expected, EAX);
     Register value = locations->InAt(4).AsRegister<Register>();
-    if (type == Primitive::kPrimNot) {
-      // Mark card for object assuming new value is stored.
-      bool value_can_be_null = true;  // TODO: Worth finding out this information?
-      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
-                          locations->GetTemp(1).AsRegister<Register>(),
-                          base,
-                          value,
-                          value_can_be_null);
 
-      if (kPoisonHeapReferences) {
-        __ PoisonHeapReference(expected);
-        __ PoisonHeapReference(value);
+    // Mark card for object assuming new value is stored.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+                        locations->GetTemp(1).AsRegister<Register>(),
+                        base,
+                        value,
+                        value_can_be_null);
+
+    bool base_equals_value = (base == value);
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // If `base` and `value` are the same register location, move
+        // `value` to a temporary register.  This way, poisoning
+        // `value` won't invalidate `base`.
+        value = locations->GetTemp(0).AsRegister<Register>();
+        __ movl(value, base);
       }
+
+      // Check that the register allocator did not assign the location
+      // of `expected` (EAX) to `value` nor to `base`, so that heap
+      // poisoning (when enabled) works as intended below.
+      // - If `value` were equal to `expected`, both references would
+      //   be poisoned twice, meaning they would not be poisoned at
+      //   all, as heap poisoning uses address negation.
+      // - If `base` were equal to `expected`, poisoning `expected`
+      //   would invalidate `base`.
+      DCHECK_NE(value, expected);
+      DCHECK_NE(base, expected);
+
+      __ PoisonHeapReference(expected);
+      __ PoisonHeapReference(value);
     }
 
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
-  }
 
-  // locked cmpxchg has full barrier semantics, and we don't need scheduling
-  // barriers at this time.
+    // locked cmpxchg has full barrier semantics, and we don't need
+    // scheduling barriers at this time.
 
-  // Convert ZF into the boolean result.
-  __ setb(kZero, out.AsRegister<Register>());
-  __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+    // Convert ZF into the boolean result.
+    __ setb(kZero, out.AsRegister<Register>());
+    __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
 
-  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    Register value = locations->InAt(4).AsRegister<Register>();
-    __ UnpoisonHeapReference(value);
-    // Do not unpoison the reference contained in register `expected`,
-    // as it is the same as register `out`.
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // `value` has been moved to a temporary register, no need to
+        // unpoison it.
+      } else {
+        // Ensure `value` is different from `out`, so that unpoisoning
+        // the former does not invalidate the latter.
+        DCHECK_NE(value, out.AsRegister<Register>());
+        __ UnpoisonHeapReference(value);
+      }
+      // Do not unpoison the reference contained in register
+      // `expected`, as it is the same as register `out` (EAX).
+    }
+  } else {
+    if (type == Primitive::kPrimInt) {
+      // Ensure the expected value is in EAX (required by the CMPXCHG
+      // instruction).
+      DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+      __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
+                      locations->InAt(4).AsRegister<Register>());
+    } else if (type == Primitive::kPrimLong) {
+      // Ensure the expected value is in EAX:EDX and that the new
+      // value is in EBX:ECX (required by the CMPXCHG8B instruction).
+      DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+      DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+      DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+      DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+      __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+    } else {
+      LOG(FATAL) << "Unexpected CAS type " << type;
+    }
+
+    // locked cmpxchg has full barrier semantics, and we don't need
+    // scheduling barriers at this time.
+
+    // Convert ZF into the boolean result.
+    __ setb(kZero, out.AsRegister<Register>());
+    __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
   }
 }
 
@@ -1923,8 +1972,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
-  X86Assembler* assembler =
-    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg = locations->InAt(0).AsRegister<Register>();
@@ -1955,8 +2003,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
-  X86Assembler* assembler =
-    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
 
   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
@@ -2250,6 +2297,7 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1a13b69..14c65c9 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -41,7 +41,7 @@
 
 
 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
-  return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
 }
 
 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
@@ -620,7 +620,6 @@
   codegen_->Load64BitValue(out, kPrimIntMax);
 
   // if inPlusPointFive >= maxInt goto done
-  __ movl(out, Immediate(kPrimIntMax));
   __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
   __ j(kAboveEqual, &done);
 
@@ -668,7 +667,6 @@
   codegen_->Load64BitValue(out, kPrimLongMax);
 
   // if inPlusPointFive >= maxLong goto done
-  __ movq(out, Immediate(kPrimLongMax));
   __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
   __ j(kAboveEqual, &done);
 
@@ -754,7 +752,7 @@
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCallOnSlowPath,
                                                             kIntrinsified);
-  // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   locations->SetInAt(2, Location::RequiresRegister());
@@ -770,19 +768,27 @@
 static void CheckPosition(X86_64Assembler* assembler,
                           Location pos,
                           CpuRegister input,
-                          CpuRegister length,
+                          Location length,
                           SlowPathCode* slow_path,
                           CpuRegister input_len,
-                          CpuRegister temp) {
-  // Where is the length in the String?
+                          CpuRegister temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
   if (pos.IsConstant()) {
     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
     if (pos_const == 0) {
-      // Check that length(input) >= length.
-      __ cmpl(Address(input, length_offset), length);
-      __ j(kLess, slow_path->GetEntryLabel());
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        if (length.IsConstant()) {
+          __ cmpl(Address(input, length_offset),
+                  Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
+        }
+        __ j(kLess, slow_path->GetEntryLabel());
+      }
     } else {
       // Check that length(input) >= pos.
       __ movl(input_len, Address(input, length_offset));
@@ -791,9 +797,18 @@
 
       // Check that (length(input) - pos) >= length.
       __ leal(temp, Address(input_len, -pos_const));
-      __ cmpl(temp, length);
+      if (length.IsConstant()) {
+        __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmpl(temp, length.AsRegister<CpuRegister>());
+      }
       __ j(kLess, slow_path->GetEntryLabel());
     }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
@@ -807,7 +822,11 @@
     // Check that (length(input) - pos) >= length.
     __ movl(temp, Address(input, length_offset));
     __ subl(temp, pos_reg);
-    __ cmpl(temp, length);
+    if (length.IsConstant()) {
+      __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(temp, length.AsRegister<CpuRegister>());
+    }
     __ j(kLess, slow_path->GetEntryLabel());
   }
 }
@@ -817,9 +836,9 @@
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
-  Location srcPos = locations->InAt(1);
+  Location src_pos = locations->InAt(1);
   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
-  Location destPos = locations->InAt(3);
+  Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
 
   // Temporaries that we need for MOVSW.
@@ -852,6 +871,12 @@
     __ j(kLess, slow_path->GetEntryLabel());
   }
 
+  // Validity checks: source.
+  CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
+
+  // Validity checks: dest.
+  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
+
   // We need the count in RCX.
   if (length.IsConstant()) {
     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
@@ -859,12 +884,6 @@
     __ movl(count, length.AsRegister<CpuRegister>());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
-
-  // Validity checks: dest.
-  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
-
   // Okay, everything checks out.  Finally time to do the copy.
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
@@ -872,18 +891,18 @@
 
   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
 
-  if (srcPos.IsConstant()) {
-    int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
   } else {
-    __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(),
+    __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
                               ScaleFactor::TIMES_2, data_offset));
   }
-  if (destPos.IsConstant()) {
-    int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
+  if (dest_pos.IsConstant()) {
+    int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
   } else {
-    __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(),
+    __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
                                ScaleFactor::TIMES_2, data_offset));
   }
 
@@ -893,6 +912,231 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+
+void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
+  Location src_pos = locations->InAt(1);
+  CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+  CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  NearLabel ok;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource()) {
+    if (!src_pos.IsConstant() || !dest_pos.IsConstant()) {
+      __ cmpl(src, dest);
+    }
+  }
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ j(kNotEqual, &ok);
+      }
+      __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
+      __ j(kGreater, slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ j(kNotEqual, &ok);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
+      __ j(kLess, slow_path->GetEntryLabel());
+    }
+  }
+
+  __ Bind(&ok);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ testl(src, src);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ testl(dest, dest);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+    __ movl(temp1, Address(dest, class_offset));
+    __ movl(temp2, Address(src, class_offset));
+    bool did_unpoison = false;
+    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+        !optimizations.GetSourceIsNonPrimitiveArray()) {
+      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // both to make the identity check valid.
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ MaybeUnpoisonHeapReference(temp2);
+      did_unpoison = true;
+    }
+
+    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+      // Bail out if the destination is not a non primitive array.
+      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      // Bail out if the source is not a non primitive array.
+      __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    __ cmpl(temp1, temp2);
+
+    if (optimizations.GetDestinationIsTypedObjectArray()) {
+      NearLabel do_copy;
+      __ j(kEqual, &do_copy);
+      if (!did_unpoison) {
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ movl(temp1, Address(temp1, component_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ movl(temp1, Address(temp1, super_offset));
+      // No need to unpoison the result, we're comparing against null.
+      __ testl(temp1, temp1);
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(&do_copy);
+    } else {
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    __ movl(temp1, Address(src, class_offset));
+    __ MaybeUnpoisonHeapReference(temp1);
+    __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+    __ testl(CpuRegister(TMP), CpuRegister(TMP));
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Compute base source address, base destination address, and end source address.
+
+  uint32_t element_size = sizeof(int32_t);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp1, Address(src, element_size * constant + offset));
+  } else {
+    __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (dest_pos.IsConstant()) {
+    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp2, Address(dest, element_size * constant + offset));
+  } else {
+    __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (length.IsConstant()) {
+    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp3, Address(temp1, element_size * constant));
+  } else {
+    __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
+  }
+
+  // Iterate over the arrays and do a raw copy of the objects. We don't need to
+  // poison/unpoison, nor do any read barrier as the next uses of the destination
+  // array will do it.
+  NearLabel loop, done;
+  __ cmpl(temp1, temp3);
+  __ j(kEqual, &done);
+  __ Bind(&loop);
+  __ movl(CpuRegister(TMP), Address(temp1, 0));
+  __ movl(Address(temp2, 0), CpuRegister(TMP));
+  __ addl(temp1, Immediate(element_size));
+  __ addl(temp2, Immediate(element_size));
+  __ cmpl(temp1, temp3);
+  __ j(kNotEqual, &loop);
+  __ Bind(&done);
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       CpuRegister(kNoRegister),
+                       false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
@@ -1578,7 +1822,7 @@
 // memory model.
 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
                          CodeGeneratorX86_64* codegen) {
-  X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
@@ -1651,7 +1895,7 @@
   locations->SetOut(Location::RequiresRegister());
   if (type == Primitive::kPrimNot) {
     // Need temp registers for card-marking.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -1669,49 +1913,91 @@
 }
 
 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
-  X86_64Assembler* assembler =
-    reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
   CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+  // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
   DCHECK_EQ(expected.AsRegister(), RAX);
   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
 
-  if (type == Primitive::kPrimLong) {
-    __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
-  } else {
-    // Integer or object.
-    if (type == Primitive::kPrimNot) {
-      // Mark card for object assuming new value is stored.
-      bool value_can_be_null = true;  // TODO: Worth finding out this information?
-      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
-                          locations->GetTemp(1).AsRegister<CpuRegister>(),
-                          base,
-                          value,
-                          value_can_be_null);
+  if (type == Primitive::kPrimNot) {
+    // Mark card for object assuming new value is stored.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                        locations->GetTemp(1).AsRegister<CpuRegister>(),
+                        base,
+                        value,
+                        value_can_be_null);
 
-      if (kPoisonHeapReferences) {
-        __ PoisonHeapReference(expected);
-        __ PoisonHeapReference(value);
+    bool base_equals_value = (base.AsRegister() == value.AsRegister());
+    Register value_reg = value.AsRegister();
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // If `base` and `value` are the same register location, move
+        // `value_reg` to a temporary register.  This way, poisoning
+        // `value_reg` won't invalidate `base`.
+        value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
+        __ movl(CpuRegister(value_reg), base);
       }
+
+      // Check that the register allocator did not assign the location
+      // of `expected` (RAX) to `value` nor to `base`, so that heap
+      // poisoning (when enabled) works as intended below.
+      // - If `value` were equal to `expected`, both references would
+      //   be poisoned twice, meaning they would not be poisoned at
+      //   all, as heap poisoning uses address negation.
+      // - If `base` were equal to `expected`, poisoning `expected`
+      //   would invalidate `base`.
+      DCHECK_NE(value_reg, expected.AsRegister());
+      DCHECK_NE(base.AsRegister(), expected.AsRegister());
+
+      __ PoisonHeapReference(expected);
+      __ PoisonHeapReference(CpuRegister(value_reg));
     }
 
-    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
-  }
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
 
-  // locked cmpxchg has full barrier semantics, and we don't need scheduling
-  // barriers at this time.
+    // locked cmpxchg has full barrier semantics, and we don't need
+    // scheduling barriers at this time.
 
-  // Convert ZF into the boolean result.
-  __ setcc(kZero, out);
-  __ movzxb(out, out);
+    // Convert ZF into the boolean result.
+    __ setcc(kZero, out);
+    __ movzxb(out, out);
 
-  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    __ UnpoisonHeapReference(value);
-    __ UnpoisonHeapReference(expected);
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // `value_reg` has been moved to a temporary register, no need
+        // to unpoison it.
+      } else {
+        // Ensure `value` is different from `out`, so that unpoisoning
+        // the former does not invalidate the latter.
+        DCHECK_NE(value_reg, out.AsRegister());
+        __ UnpoisonHeapReference(CpuRegister(value_reg));
+      }
+      // Ensure `expected` is different from `out`, so that unpoisoning
+      // the former does not invalidate the latter.
+      DCHECK_NE(expected.AsRegister(), out.AsRegister());
+      __ UnpoisonHeapReference(expected);
+    }
+  } else {
+    if (type == Primitive::kPrimInt) {
+      __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+    } else if (type == Primitive::kPrimLong) {
+      __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+    } else {
+      LOG(FATAL) << "Unexpected CAS type " << type;
+    }
+
+    // locked cmpxchg has full barrier semantics, and we don't need
+    // scheduling barriers at this time.
+
+    // Convert ZF into the boolean result.
+    __ setcc(kZero, out);
+    __ movzxb(out, out);
   }
 }
 
@@ -1749,8 +2035,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
-  X86_64Assembler* assembler =
-    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
@@ -1794,8 +2079,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
-  X86_64Assembler* assembler =
-    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 558892d..47457de 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -61,7 +61,7 @@
     loop_body_->AddSuccessor(loop_header_);
 
     // Provide boiler-plate instructions.
-    parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
+    parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
     entry_->AddInstruction(parameter_);
     constant_ = graph_->GetIntConstant(42);
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
@@ -104,13 +104,19 @@
 
   // Populate the loop with instructions: set/get field with different types.
   NullHandle<mirror::DexCache> dex_cache;
-  HInstruction* get_field = new (&allocator_) HInstanceFieldGet(
-      parameter_, Primitive::kPrimLong, MemberOffset(10),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
+  HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
+                                                                Primitive::kPrimLong,
+                                                                MemberOffset(10),
+                                                                false,
+                                                                kUnknownFieldIndex,
+                                                                kUnknownClassDefIndex,
+                                                                graph_->GetDexFile(),
+                                                                dex_cache,
+                                                                0);
   loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction());
   HInstruction* set_field = new (&allocator_) HInstanceFieldSet(
       parameter_, constant_, Primitive::kPrimInt, MemberOffset(20),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
+      false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_field->GetBlock(), loop_body_);
@@ -125,13 +131,26 @@
 
   // Populate the loop with instructions: set/get field with same types.
   NullHandle<mirror::DexCache> dex_cache;
-  HInstruction* get_field = new (&allocator_) HInstanceFieldGet(
-      parameter_, Primitive::kPrimLong, MemberOffset(10),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
+  HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
+                                                                Primitive::kPrimLong,
+                                                                MemberOffset(10),
+                                                                false,
+                                                                kUnknownFieldIndex,
+                                                                kUnknownClassDefIndex,
+                                                                graph_->GetDexFile(),
+                                                                dex_cache,
+                                                                0);
   loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction());
-  HInstruction* set_field = new (&allocator_) HInstanceFieldSet(
-      parameter_, get_field, Primitive::kPrimLong, MemberOffset(10),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
+  HInstruction* set_field = new (&allocator_) HInstanceFieldSet(parameter_,
+                                                                get_field,
+                                                                Primitive::kPrimLong,
+                                                                MemberOffset(10),
+                                                                false,
+                                                                kUnknownFieldIndex,
+                                                                kUnknownClassDefIndex,
+                                                                graph_->GetDexFile(),
+                                                                dex_cache,
+                                                                0);
   loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_field->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index b9ab290..7f67560 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -77,7 +77,7 @@
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
   ASSERT_EQ(8u, range->GetEnd());
-  HBasicBlock* block = graph->GetBlock(1);
+  HBasicBlock* block = graph->GetBlocks()[1];
   ASSERT_TRUE(block->GetLastInstruction()->IsReturn());
   ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition());
   ASSERT_TRUE(range->GetNext() == nullptr);
@@ -125,7 +125,7 @@
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
   ASSERT_EQ(22u, range->GetEnd());
-  HBasicBlock* block = graph->GetBlock(3);
+  HBasicBlock* block = graph->GetBlocks()[3];
   ASSERT_TRUE(block->GetLastInstruction()->IsReturn());
   ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition());
   ASSERT_TRUE(range->GetNext() == nullptr);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
new file mode 100644
index 0000000..6fbb682
--- /dev/null
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -0,0 +1,913 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_elimination.h"
+#include "side_effects_analysis.h"
+
+#include <iostream>
+
+namespace art {
+
+class ReferenceInfo;
+
+// A cap for the number of heap locations to prevent pathological time/space consumption.
+// The number of heap locations for most of the methods stays below this threshold.
+constexpr size_t kMaxNumberOfHeapLocations = 32;
+
+// A ReferenceInfo contains additional info about a reference such as
+// whether it's a singleton, returned, etc.
+class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
+ public:
+  ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos) {
+    is_singleton_ = true;
+    is_singleton_and_not_returned_ = true;
+    if (!reference_->IsNewInstance() && !reference_->IsNewArray()) {
+      // For references not allocated in the method, don't assume anything.
+      is_singleton_ = false;
+      is_singleton_and_not_returned_ = false;
+      return;
+    }
+
+    // Visit all uses to determine if this reference can spread into the heap,
+    // a method call, etc.
+    for (HUseIterator<HInstruction*> use_it(reference_->GetUses());
+         !use_it.Done();
+         use_it.Advance()) {
+      HInstruction* use = use_it.Current()->GetUser();
+      DCHECK(!use->IsNullCheck()) << "NullCheck should have been eliminated";
+      if (use->IsBoundType()) {
+        // BoundType shouldn't normally be necessary for a NewInstance.
+        // Just be conservative for the uncommon cases.
+        is_singleton_ = false;
+        is_singleton_and_not_returned_ = false;
+        return;
+      }
+      if (use->IsPhi() || use->IsInvoke() ||
+          (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
+          (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
+          (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
+          (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) ||
+          (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
+        // reference_ is merged to a phi, passed to a callee, or stored to heap.
+        // reference_ isn't the only name that can refer to its value anymore.
+        is_singleton_ = false;
+        is_singleton_and_not_returned_ = false;
+        return;
+      }
+      if (use->IsReturn()) {
+        is_singleton_and_not_returned_ = false;
+      }
+    }
+  }
+
+  HInstruction* GetReference() const {
+    return reference_;
+  }
+
+  size_t GetPosition() const {
+    return position_;
+  }
+
+  // Returns true if reference_ is the only name that can refer to its value during
+  // the lifetime of the method. So it's guaranteed to not have any alias in
+  // the method (including its callees).
+  bool IsSingleton() const {
+    return is_singleton_;
+  }
+
+  // Returns true if reference_ is a singleton and not returned to the caller.
+  // The allocation and stores into reference_ may be eliminated for such cases.
+  bool IsSingletonAndNotReturned() const {
+    return is_singleton_and_not_returned_;
+  }
+
+ private:
+  HInstruction* const reference_;
+  const size_t position_;     // position in HeapLocationCollector's ref_info_array_.
+  bool is_singleton_;         // can only be referred to by a single name in the method.
+  bool is_singleton_and_not_returned_;  // reference_ is singleton and not returned to caller.
+
+  DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
+};
+
+// A heap location is a reference-offset/index pair that a value can be loaded from
+// or stored to.
+class HeapLocation : public ArenaObject<kArenaAllocMisc> {
+ public:
+  static constexpr size_t kInvalidFieldOffset = -1;
+
+  // TODO: more fine-grained array types.
+  static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
+
+  HeapLocation(ReferenceInfo* ref_info,
+               size_t offset,
+               HInstruction* index,
+               int16_t declaring_class_def_index)
+      : ref_info_(ref_info),
+        offset_(offset),
+        index_(index),
+        declaring_class_def_index_(declaring_class_def_index),
+        may_become_unknown_(true) {
+    DCHECK(ref_info != nullptr);
+    DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
+           (offset != kInvalidFieldOffset && index == nullptr));
+
+    if (ref_info->IsSingletonAndNotReturned()) {
+      // We try to track stores to singletons that aren't returned to eliminate the stores
+      // since values in singleton's fields cannot be killed due to aliasing. Those values
+      // can still be killed due to merging values since we don't build phi for merging heap
+      // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible.
+      may_become_unknown_ = false;
+    }
+  }
+
+  ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
+  size_t GetOffset() const { return offset_; }
+  HInstruction* GetIndex() const { return index_; }
+
+  // Returns the definition of declaring class' dex index.
+  // It's kDeclaringClassDefIndexForArrays for an array element.
+  int16_t GetDeclaringClassDefIndex() const {
+    return declaring_class_def_index_;
+  }
+
+  bool IsArrayElement() const {
+    return index_ != nullptr;
+  }
+
+  // Returns true if this heap location's value may become unknown after it's
+  // set to a value, due to merge of values, or killed due to aliasing.
+  bool MayBecomeUnknown() const {
+    return may_become_unknown_;
+  }
+  void SetMayBecomeUnknown(bool val) {
+    may_become_unknown_ = val;
+  }
+
+ private:
+  ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
+  const size_t offset_;                // offset of static/instance field.
+  HInstruction* const index_;          // index of an array element.
+  const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
+  bool may_become_unknown_;            // value may become kUnknownHeapValue.
+
+  DISALLOW_COPY_AND_ASSIGN(HeapLocation);
+};
+
+static HInstruction* HuntForOriginalReference(HInstruction* ref) {
+  DCHECK(ref != nullptr);
+  while (ref->IsNullCheck() || ref->IsBoundType()) {
+    ref = ref->InputAt(0);
+  }
+  return ref;
+}
+
+// A HeapLocationCollector collects all relevant heap locations and keeps
+// an aliasing matrix for all locations.
+class HeapLocationCollector : public HGraphVisitor {
+ public:
+  static constexpr size_t kHeapLocationNotFound = -1;
+  // Start with a single uint32_t word. That's enough bits for pair-wise
+  // aliasing matrix of 8 heap locations.
+  static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
+
+  explicit HeapLocationCollector(HGraph* graph)
+      : HGraphVisitor(graph),
+        ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        aliasing_matrix_(graph->GetArena(), kInitialAliasingMatrixBitVectorSize, true),
+        has_heap_stores_(false),
+        has_volatile_(false),
+        has_monitor_operations_(false),
+        may_deoptimize_(false) {}
+
+  size_t GetNumberOfHeapLocations() const {
+    return heap_locations_.size();
+  }
+
+  HeapLocation* GetHeapLocation(size_t index) const {
+    return heap_locations_[index];
+  }
+
+  ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
+    for (size_t i = 0; i < ref_info_array_.size(); i++) {
+      ReferenceInfo* ref_info = ref_info_array_[i];
+      if (ref_info->GetReference() == ref) {
+        DCHECK_EQ(i, ref_info->GetPosition());
+        return ref_info;
+      }
+    }
+    return nullptr;
+  }
+
+  bool HasHeapStores() const {
+    return has_heap_stores_;
+  }
+
+  bool HasVolatile() const {
+    return has_volatile_;
+  }
+
+  bool HasMonitorOps() const {
+    return has_monitor_operations_;
+  }
+
+  // Returns whether this method may be deoptimized.
+  // Currently we don't have meta data support for deoptimizing
+  // a method that eliminates allocations/stores.
+  bool MayDeoptimize() const {
+    return may_deoptimize_;
+  }
+
+  // Find and return the heap location index in heap_locations_.
+  size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
+                               size_t offset,
+                               HInstruction* index,
+                               int16_t declaring_class_def_index) const {
+    for (size_t i = 0; i < heap_locations_.size(); i++) {
+      HeapLocation* loc = heap_locations_[i];
+      if (loc->GetReferenceInfo() == ref_info &&
+          loc->GetOffset() == offset &&
+          loc->GetIndex() == index &&
+          loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
+        return i;
+      }
+    }
+    return kHeapLocationNotFound;
+  }
+
+  // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
+  bool MayAlias(size_t index1, size_t index2) const {
+    if (index1 < index2) {
+      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
+    } else if (index1 > index2) {
+      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
+    } else {
+      DCHECK(false) << "index1 and index2 are expected to be different";
+      return true;
+    }
+  }
+
+  void BuildAliasingMatrix() {
+    const size_t number_of_locations = heap_locations_.size();
+    if (number_of_locations == 0) {
+      return;
+    }
+    size_t pos = 0;
+    // Compute aliasing info between every pair of different heap locations.
+    // Save the result in a matrix represented as a BitVector.
+    for (size_t i = 0; i < number_of_locations - 1; i++) {
+      for (size_t j = i + 1; j < number_of_locations; j++) {
+        if (ComputeMayAlias(i, j)) {
+          aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
+        }
+        pos++;
+      }
+    }
+  }
+
+ private:
+  // An allocation cannot alias with a name which already exists at the point
+  // of the allocation, such as a parameter or a load happening before the allocation.
+  bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+    if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
+      // Any reference that can alias with the allocation must appear after it in the block/in
+      // the block's successors. In reverse post order, those instructions will be visited after
+      // the allocation.
+      return ref_info2->GetPosition() >= ref_info1->GetPosition();
+    }
+    return true;
+  }
+
+  bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+    if (ref_info1 == ref_info2) {
+      return true;
+    } else if (ref_info1->IsSingleton()) {
+      return false;
+    } else if (ref_info2->IsSingleton()) {
+      return false;
+    } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
+        !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
+      return false;
+    }
+    return true;
+  }
+
+  // `index1` and `index2` are indices in the array of collected heap locations.
+  // Returns the position in the bit vector that tracks whether the two heap
+  // locations may alias.
+  size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
+    DCHECK(index2 > index1);
+    const size_t number_of_locations = heap_locations_.size();
+    // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
+    return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
+  }
+
+  // An additional position is passed in to make sure the calculated position is correct.
+  size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
+    size_t calculated_position = AliasingMatrixPosition(index1, index2);
+    DCHECK_EQ(calculated_position, position);
+    return calculated_position;
+  }
+
+  // Compute if two locations may alias to each other.
+  bool ComputeMayAlias(size_t index1, size_t index2) const {
+    HeapLocation* loc1 = heap_locations_[index1];
+    HeapLocation* loc2 = heap_locations_[index2];
+    if (loc1->GetOffset() != loc2->GetOffset()) {
+      // Either two different instance fields, or one is an instance
+      // field and the other is an array element.
+      return false;
+    }
+    if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
+      // Different types.
+      return false;
+    }
+    if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
+      return false;
+    }
+    if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
+      HInstruction* array_index1 = loc1->GetIndex();
+      HInstruction* array_index2 = loc2->GetIndex();
+      DCHECK(array_index1 != nullptr);
+      DCHECK(array_index2 != nullptr);
+      if (array_index1->IsIntConstant() &&
+          array_index2->IsIntConstant() &&
+          array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
+        // Different constant indices do not alias.
+        return false;
+      }
+    }
+    return true;
+  }
+
+  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) {
+    ReferenceInfo* ref_info = FindReferenceInfoOf(ref);
+    if (ref_info == nullptr) {
+      size_t pos = ref_info_array_.size();
+      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos);
+      ref_info_array_.push_back(ref_info);
+    }
+    return ref_info;
+  }
+
+  HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
+                                        size_t offset,
+                                        HInstruction* index,
+                                        int16_t declaring_class_def_index) {
+    HInstruction* original_ref = HuntForOriginalReference(ref);
+    ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
+    size_t heap_location_idx = FindHeapLocationIndex(
+        ref_info, offset, index, declaring_class_def_index);
+    if (heap_location_idx == kHeapLocationNotFound) {
+      HeapLocation* heap_loc = new (GetGraph()->GetArena())
+          HeapLocation(ref_info, offset, index, declaring_class_def_index);
+      heap_locations_.push_back(heap_loc);
+      return heap_loc;
+    }
+    return heap_locations_[heap_location_idx];
+  }
+
+  void VisitFieldAccess(HInstruction* field_access,
+                        HInstruction* ref,
+                        const FieldInfo& field_info,
+                        bool is_store) {
+    if (field_info.IsVolatile()) {
+      has_volatile_ = true;
+    }
+    const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
+    const size_t offset = field_info.GetFieldOffset().SizeValue();
+    HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+    // A store of a value may be eliminated if all future loads for that value can be eliminated.
+    // For a value that's stored into a singleton field, the value will not be killed due
+    // to aliasing. However if the value is set in a block that doesn't post dominate the definition,
+    // the value may be killed due to merging later. Before we have post dominating info, we check
+    // if the store is in the same block as the definition just to be conservative.
+    if (is_store &&
+        location->GetReferenceInfo()->IsSingletonAndNotReturned() &&
+        field_access->GetBlock() != ref->GetBlock()) {
+      location->SetMayBecomeUnknown(true);
+    }
+  }
+
+  void VisitArrayAccess(HInstruction* array, HInstruction* index) {
+    GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
+        index, HeapLocation::kDeclaringClassDefIndexForArrays);
+  }
+
+  void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+  }
+
+  void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    has_heap_stores_ = true;
+  }
+
+  void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+  }
+
+  void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    has_heap_stores_ = true;
+  }
+
+  // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
+  // since we cannot accurately track the fields.
+
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
+    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+  }
+
+  void VisitArraySet(HArraySet* instruction) OVERRIDE {
+    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    has_heap_stores_ = true;
+  }
+
+  void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
+    // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
+    GetOrCreateReferenceInfo(new_instance);
+  }
+
+  void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
+    may_deoptimize_ = true;
+  }
+
+  void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
+    has_monitor_operations_ = true;
+  }
+
+  ArenaVector<ReferenceInfo*> ref_info_array_;   // All references used for heap accesses.
+  ArenaVector<HeapLocation*> heap_locations_;    // All heap locations.
+  ArenaBitVector aliasing_matrix_;    // aliasing info between each pair of locations.
+  bool has_heap_stores_;    // If there is no heap stores, LSE acts as GVN with better
+                            // alias analysis and won't be as effective.
+  bool has_volatile_;       // If there are volatile field accesses.
+  bool has_monitor_operations_;    // If there are monitor operations.
+  bool may_deoptimize_;
+
+  DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
+};
+
+// An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
+static HInstruction* const kUnknownHeapValue =
+    reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1));
+// Default heap value after an allocation.
+static HInstruction* const kDefaultHeapValue =
+    reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2));
+
+class LSEVisitor : public HGraphVisitor {
+ public:
+  LSEVisitor(HGraph* graph,
+             const HeapLocationCollector& heap_locations_collector,
+             const SideEffectsAnalysis& side_effects)
+      : HGraphVisitor(graph),
+        heap_location_collector_(heap_locations_collector),
+        side_effects_(side_effects),
+        heap_values_for_(graph->GetBlocks().size(),
+                         ArenaVector<HInstruction*>(heap_locations_collector.
+                                                        GetNumberOfHeapLocations(),
+                                                    kUnknownHeapValue,
+                                                    graph->GetArena()->Adapter(kArenaAllocLSE)),
+                         graph->GetArena()->Adapter(kArenaAllocLSE)),
+        removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
+  }
+
+  void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+    int block_id = block->GetBlockId();
+    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    // TODO: try to reuse the heap_values array from one predecessor if possible.
+    if (block->IsLoopHeader()) {
+      // We do a single pass in reverse post order. For loops, use the side effects as a hint
+      // to see if the heap values should be killed.
+      if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
+        // Leave all values as kUnknownHeapValue.
+      } else {
+        // Inherit the values from pre-header.
+        HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+        ArenaVector<HInstruction*>& pre_header_heap_values =
+            heap_values_for_[pre_header->GetBlockId()];
+        for (size_t i = 0; i < heap_values.size(); i++) {
+          heap_values[i] = pre_header_heap_values[i];
+        }
+      }
+    } else {
+      MergePredecessorValues(block);
+    }
+    HGraphVisitor::VisitBasicBlock(block);
+  }
+
+  // Remove recorded instructions that should be eliminated.
+  void RemoveInstructions() {
+    size_t size = removed_instructions_.size();
+    DCHECK_EQ(size, substitute_instructions_.size());
+    for (size_t i = 0; i < size; i++) {
+      HInstruction* instruction = removed_instructions_[i];
+      DCHECK(instruction != nullptr);
+      HInstruction* substitute = substitute_instructions_[i];
+      if (substitute != nullptr) {
+        // Keep tracing substitute till one that's not removed.
+        HInstruction* sub_sub = FindSubstitute(substitute);
+        while (sub_sub != substitute) {
+          substitute = sub_sub;
+          sub_sub = FindSubstitute(substitute);
+        }
+        instruction->ReplaceWith(substitute);
+      }
+      instruction->GetBlock()->RemoveInstruction(instruction);
+    }
+    // TODO: remove unnecessary allocations.
+    // Eliminate instructions in singleton_new_instances_ that:
+    // - don't have uses,
+    // - don't have finalizers,
+    // - are instantiable and accessible,
+    // - have no/separate clinit check.
+  }
+
+ private:
+  void MergePredecessorValues(HBasicBlock* block) {
+    const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
+    if (predecessors.size() == 0) {
+      return;
+    }
+    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i];
+      if (value != kUnknownHeapValue) {
+        for (size_t j = 1; j < predecessors.size(); j++) {
+          if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) {
+            value = kUnknownHeapValue;
+            break;
+          }
+        }
+      }
+      heap_values[i] = value;
+    }
+  }
+
+  // `instruction` is being removed. Try to see if the null check on it
+  // can be removed. This can happen if the same value is set in two branches
+  // but not in dominators. Such as:
+  //   int[] a = foo();
+  //   if () {
+  //     a[0] = 2;
+  //   } else {
+  //     a[0] = 2;
+  //   }
+  //   // a[0] can now be replaced with constant 2, and the null check on it can be removed.
+  void TryRemovingNullCheck(HInstruction* instruction) {
+    HInstruction* prev = instruction->GetPrevious();
+    if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
+      // Previous instruction is a null check for this instruction. Remove the null check.
+      prev->ReplaceWith(prev->InputAt(0));
+      prev->GetBlock()->RemoveInstruction(prev);
+    }
+  }
+
+  HInstruction* GetDefaultValue(Primitive::Type type) {
+    switch (type) {
+      case Primitive::kPrimNot:
+        return GetGraph()->GetNullConstant();
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+        return GetGraph()->GetIntConstant(0);
+      case Primitive::kPrimLong:
+        return GetGraph()->GetLongConstant(0);
+      case Primitive::kPrimFloat:
+        return GetGraph()->GetFloatConstant(0);
+      case Primitive::kPrimDouble:
+        return GetGraph()->GetDoubleConstant(0);
+      default:
+        UNREACHABLE();
+    }
+  }
+
+  void VisitGetLocation(HInstruction* instruction,
+                        HInstruction* ref,
+                        size_t offset,
+                        HInstruction* index,
+                        int16_t declaring_class_def_index) {
+    HInstruction* original_ref = HuntForOriginalReference(ref);
+    ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
+    size_t idx = heap_location_collector_.FindHeapLocationIndex(
+        ref_info, offset, index, declaring_class_def_index);
+    DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
+    ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[instruction->GetBlock()->GetBlockId()];
+    HInstruction* heap_value = heap_values[idx];
+    if (heap_value == kDefaultHeapValue) {
+      HInstruction* constant = GetDefaultValue(instruction->GetType());
+      removed_instructions_.push_back(instruction);
+      substitute_instructions_.push_back(constant);
+      heap_values[idx] = constant;
+      return;
+    }
+    if ((heap_value != kUnknownHeapValue) &&
+        // Keep the load due to possible I/F, J/D array aliasing.
+        // See b/22538329 for details.
+        (heap_value->GetType() == instruction->GetType())) {
+      removed_instructions_.push_back(instruction);
+      substitute_instructions_.push_back(heap_value);
+      TryRemovingNullCheck(instruction);
+      return;
+    }
+
+    if (heap_value == kUnknownHeapValue) {
+      // Put the load as the value into the HeapLocation.
+      // This acts like GVN but with better aliasing analysis.
+      heap_values[idx] = instruction;
+    }
+  }
+
+  bool Equal(HInstruction* heap_value, HInstruction* value) {
+    if (heap_value == value) {
+      return true;
+    }
+    if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) {
+      return true;
+    }
+    return false;
+  }
+
+  void VisitSetLocation(HInstruction* instruction,
+                        HInstruction* ref,
+                        size_t offset,
+                        HInstruction* index,
+                        int16_t declaring_class_def_index,
+                        HInstruction* value) {
+    HInstruction* original_ref = HuntForOriginalReference(ref);
+    ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
+    size_t idx = heap_location_collector_.FindHeapLocationIndex(
+        ref_info, offset, index, declaring_class_def_index);
+    DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
+    ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[instruction->GetBlock()->GetBlockId()];
+    HInstruction* heap_value = heap_values[idx];
+    bool redundant_store = false;
+    if (Equal(heap_value, value)) {
+      // Store into the heap location with the same value.
+      redundant_store = true;
+    } else if (index != nullptr) {
+      // For array element, don't eliminate stores since it can be easily aliased
+      // with non-constant index.
+    } else if (!heap_location_collector_.MayDeoptimize() &&
+               ref_info->IsSingletonAndNotReturned() &&
+               !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) {
+      // Store into a field of a singleton that's not returned. And that value cannot be
+      // killed due to merge. It's redundant since future loads will get the value
+      // set by this instruction.
+      Primitive::Type type = Primitive::kPrimVoid;
+      if (instruction->IsInstanceFieldSet()) {
+        type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType();
+      } else if (instruction->IsStaticFieldSet()) {
+        type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType();
+      } else {
+        DCHECK(false) << "Must be an instance/static field set instruction.";
+      }
+      if (value->GetType() != type) {
+        // I/F, J/D aliasing should not happen for fields.
+        DCHECK(Primitive::IsIntegralType(value->GetType()));
+        DCHECK(!Primitive::Is64BitType(value->GetType()));
+        DCHECK(Primitive::IsIntegralType(type));
+        DCHECK(!Primitive::Is64BitType(type));
+        // Keep the store since the corresponding load isn't eliminated due to different types.
+        // TODO: handle the different int types so that we can eliminate this store.
+        redundant_store = false;
+      } else {
+        redundant_store = true;
+      }
+      // TODO: eliminate the store if the singleton object is not finalizable.
+      redundant_store = false;
+    }
+    if (redundant_store) {
+      removed_instructions_.push_back(instruction);
+      substitute_instructions_.push_back(nullptr);
+      TryRemovingNullCheck(instruction);
+    }
+
+    heap_values[idx] = value;
+    // This store may kill values in other heap locations due to aliasing.
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      if (heap_values[i] == value) {
+        // Same value should be kept even if aliasing happens.
+        continue;
+      }
+      if (heap_values[i] == kUnknownHeapValue) {
+        // Value is already unknown, no need for aliasing check.
+        continue;
+      }
+      if (heap_location_collector_.MayAlias(i, idx)) {
+        // Kill heap locations that may alias.
+        heap_values[i] = kUnknownHeapValue;
+      }
+    }
+  }
+
+  void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
+    HInstruction* obj = instruction->InputAt(0);
+    size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
+    int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
+    VisitGetLocation(instruction, obj, offset, nullptr, declaring_class_def_index);
+  }
+
+  void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+    HInstruction* obj = instruction->InputAt(0);
+    size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
+    int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
+    HInstruction* value = instruction->InputAt(1);
+    VisitSetLocation(instruction, obj, offset, nullptr, declaring_class_def_index, value);
+  }
+
+  void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+    HInstruction* cls = instruction->InputAt(0);
+    size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
+    int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
+    VisitGetLocation(instruction, cls, offset, nullptr, declaring_class_def_index);
+  }
+
+  void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+    HInstruction* cls = instruction->InputAt(0);
+    size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
+    int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
+    HInstruction* value = instruction->InputAt(1);
+    VisitSetLocation(instruction, cls, offset, nullptr, declaring_class_def_index, value);
+  }
+
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
+    HInstruction* array = instruction->InputAt(0);
+    HInstruction* index = instruction->InputAt(1);
+    VisitGetLocation(instruction,
+                     array,
+                     HeapLocation::kInvalidFieldOffset,
+                     index,
+                     HeapLocation::kDeclaringClassDefIndexForArrays);
+  }
+
+  void VisitArraySet(HArraySet* instruction) OVERRIDE {
+    HInstruction* array = instruction->InputAt(0);
+    HInstruction* index = instruction->InputAt(1);
+    HInstruction* value = instruction->InputAt(2);
+    VisitSetLocation(instruction,
+                     array,
+                     HeapLocation::kInvalidFieldOffset,
+                     index,
+                     HeapLocation::kDeclaringClassDefIndexForArrays,
+                     value);
+  }
+
+  void HandleInvoke(HInstruction* invoke) {
+    ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[invoke->GetBlock()->GetBlockId()];
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+      if (ref_info->IsSingleton()) {
+        // Singleton references cannot be seen by the callee.
+      } else {
+        heap_values[i] = kUnknownHeapValue;
+      }
+    }
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE {
+    HandleInvoke(clinit);
+  }
+
+  void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE {
+    // Conservatively treat it as an invocation.
+    HandleInvoke(instruction);
+  }
+
+  void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE {
+    // Conservatively treat it as an invocation.
+    HandleInvoke(instruction);
+  }
+
+  void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE {
+    // Conservatively treat it as an invocation.
+    HandleInvoke(instruction);
+  }
+
+  void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE {
+    // Conservatively treat it as an invocation.
+    HandleInvoke(instruction);
+  }
+
+  void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
+    ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
+    if (ref_info == nullptr) {
+      // new_instance isn't used for field accesses. No need to process it.
+      return;
+    }
+    if (!heap_location_collector_.MayDeoptimize() &&
+        ref_info->IsSingletonAndNotReturned()) {
+      // The allocation might be eliminated.
+      singleton_new_instances_.push_back(new_instance);
+    }
+    ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[new_instance->GetBlock()->GetBlockId()];
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      HInstruction* ref =
+          heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->GetReference();
+      size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
+      if (ref == new_instance && offset >= mirror::kObjectHeaderSize) {
+        // Instance fields except the header fields are set to default heap values.
+        heap_values[i] = kDefaultHeapValue;
+      }
+    }
+  }
+
+  // Find an instruction's substitute if it should be removed.
+  // Return the same instruction if it should not be removed.
+  HInstruction* FindSubstitute(HInstruction* instruction) {
+    size_t size = removed_instructions_.size();
+    for (size_t i = 0; i < size; i++) {
+      if (removed_instructions_[i] == instruction) {
+        return substitute_instructions_[i];
+      }
+    }
+    return instruction;
+  }
+
+  const HeapLocationCollector& heap_location_collector_;
+  const SideEffectsAnalysis& side_effects_;
+
+  // One array of heap values for each block.
+  ArenaVector<ArenaVector<HInstruction*>> heap_values_for_;
+
+  // We record the instructions that should be eliminated but may be
+  // used by heap locations. They'll be removed in the end.
+  ArenaVector<HInstruction*> removed_instructions_;
+  ArenaVector<HInstruction*> substitute_instructions_;
+  ArenaVector<HInstruction*> singleton_new_instances_;
+
+  DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
+};
+
+void LoadStoreElimination::Run() {
+  if (graph_->IsDebuggable()) {
+    // Debugger may set heap values or trigger deoptimization of callers.
+    // Skip this optimization.
+    return;
+  }
+  HeapLocationCollector heap_location_collector(graph_);
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    heap_location_collector.VisitBasicBlock(it.Current());
+  }
+  if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
+    // Bail out if there are too many heap locations to deal with.
+    return;
+  }
+  if (!heap_location_collector.HasHeapStores()) {
+    // Without heap stores, this pass would act mostly as GVN on heap accesses.
+    return;
+  }
+  if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) {
+    // Don't do load/store elimination if the method has volatile field accesses or
+    // monitor operations, for now.
+    // TODO: do it right.
+    return;
+  }
+  heap_location_collector.BuildAliasingMatrix();
+  LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    lse_visitor.VisitBasicBlock(it.Current());
+  }
+  lse_visitor.RemoveInstructions();
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
new file mode 100644
index 0000000..1d9e5c8
--- /dev/null
+++ b/compiler/optimizing/load_store_elimination.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class SideEffectsAnalysis;
+
+class LoadStoreElimination : public HOptimization {
+ public:
+  LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects)
+      : HOptimization(graph, kLoadStoreEliminationPassName),
+        side_effects_(side_effects) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kLoadStoreEliminationPassName = "load_store_elimination";
+
+ private:
+  const SideEffectsAnalysis& side_effects_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index de4fb7e..d014379 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -481,12 +481,10 @@
                   bool intrinsified = false);
 
   void SetInAt(uint32_t at, Location location) {
-    DCHECK_LT(at, GetInputCount());
     inputs_[at] = location;
   }
 
   Location InAt(uint32_t at) const {
-    DCHECK_LT(at, GetInputCount());
     return inputs_[at];
   }
 
@@ -514,12 +512,10 @@
   }
 
   Location GetTemp(uint32_t at) const {
-    DCHECK_LT(at, GetTempCount());
     return temps_[at];
   }
 
   void SetTempAt(uint32_t at, Location location) {
-    DCHECK_LT(at, GetTempCount());
     DCHECK(temps_[at].IsUnallocated() || temps_[at].IsInvalid());
     temps_[at] = location;
   }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 989970f..68fb0ac 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -21,6 +21,7 @@
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
 #include "base/stl_util.h"
+#include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -54,7 +55,6 @@
       visiting.ClearBit(current_id);
       worklist.pop_back();
     } else {
-      DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size());
       HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
       uint32_t successor_id = successor->GetBlockId();
       if (visiting.IsBitSet(successor_id)) {
@@ -88,7 +88,7 @@
 void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
-      HBasicBlock* block = GetBlock(i);
+      HBasicBlock* block = blocks_[i];
       DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         RemoveAsUser(it.Current());
@@ -100,7 +100,7 @@
 void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) {
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
-      HBasicBlock* block = GetBlock(i);
+      HBasicBlock* block = blocks_[i];
       // We only need to update the successor, which might be live.
       for (HBasicBlock* successor : block->GetSuccessors()) {
         successor->RemovePredecessor(block);
@@ -174,7 +174,6 @@
     if (successors_visited[current_id] == current->GetSuccessors().size()) {
       worklist.pop_back();
     } else {
-      DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size());
       HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
 
       if (successor->GetDominator() == nullptr) {
@@ -185,7 +184,6 @@
 
       // Once all the forward edges have been visited, we know the immediate
       // dominator of the block. We can then start visiting its successors.
-      DCHECK_LT(successor->GetBlockId(), visits.size());
       if (++visits[successor->GetBlockId()] ==
           successor->GetPredecessors().size() - successor->NumberOfBackEdges()) {
         successor->GetDominator()->AddDominatedBlock(successor);
@@ -257,7 +255,7 @@
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
 
     for (size_t pred = 0; pred < header->GetPredecessors().size(); ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessor(pred);
+      HBasicBlock* predecessor = header->GetPredecessors()[pred];
       if (!info->IsBackEdge(*predecessor)) {
         predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
@@ -267,10 +265,10 @@
   }
 
   // Make sure the first predecessor of a loop header is the incoming block.
-  if (info->IsBackEdge(*header->GetPredecessor(0))) {
-    HBasicBlock* to_swap = header->GetPredecessor(0);
+  if (info->IsBackEdge(*header->GetPredecessors()[0])) {
+    HBasicBlock* to_swap = header->GetPredecessors()[0];
     for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessor(pred);
+      HBasicBlock* predecessor = header->GetPredecessors()[pred];
       if (!info->IsBackEdge(*predecessor)) {
         header->predecessors_[pred] = to_swap;
         header->predecessors_[0] = predecessor;
@@ -293,7 +291,7 @@
 }
 
 static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
-  HBasicBlock* predecessor = block.GetPredecessor(pred_idx);
+  HBasicBlock* predecessor = block.GetPredecessors()[pred_idx];
   if (!predecessor->EndsWithTryBoundary()) {
     // Only edges from HTryBoundary can be exceptional.
     return false;
@@ -343,7 +341,7 @@
       HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
       for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
         if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessor(j)->ReplaceSuccessor(catch_block, normal_block);
+          catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
           --j;
         }
       }
@@ -365,7 +363,7 @@
     // Infer try membership from the first predecessor. Having simplified loops,
     // the first predecessor can never be a back edge and therefore it must have
     // been visited already and had its try membership set.
-    HBasicBlock* first_predecessor = block->GetPredecessor(0);
+    HBasicBlock* first_predecessor = block->GetPredecessors()[0];
     DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
     const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
     if (try_entry != nullptr) {
@@ -385,7 +383,7 @@
     if (block == nullptr) continue;
     if (block->NumberOfNormalSuccessors() > 1) {
       for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
-        HBasicBlock* successor = block->GetSuccessor(j);
+        HBasicBlock* successor = block->GetSuccessors()[j];
         DCHECK(!successor->IsCatchBlock());
         if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
@@ -534,7 +532,7 @@
 void HLoopInformation::Update() {
   HGraph* graph = header_->GetGraph();
   for (uint32_t id : blocks_.Indexes()) {
-    HBasicBlock* block = graph->GetBlock(id);
+    HBasicBlock* block = graph->GetBlocks()[id];
     // Reset loop information of non-header blocks inside the loop, except
     // members of inner nested loops because those should already have been
     // updated by their own LoopInformation.
@@ -576,6 +574,17 @@
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
+bool HLoopInformation::IsLoopInvariant(HInstruction* instruction, bool must_dominate) const {
+  HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation();
+  if (other_loop != this && (other_loop == nullptr || !other_loop->IsIn(*this))) {
+    if (must_dominate) {
+      return instruction->GetBlock()->Dominates(GetHeader());
+    }
+    return true;
+  }
+  return false;
+}
+
 size_t HLoopInformation::GetLifetimeEnd() const {
   size_t last_position = 0;
   for (HBasicBlock* back_edge : GetBackEdges()) {
@@ -608,8 +617,23 @@
 void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                                   HInstruction* replacement) {
   DCHECK(initial->GetBlock() == this);
-  InsertInstructionBefore(replacement, initial);
-  initial->ReplaceWith(replacement);
+  if (initial->IsControlFlow()) {
+    // We can only replace a control flow instruction with another control flow instruction.
+    DCHECK(replacement->IsControlFlow());
+    DCHECK_EQ(replacement->GetId(), -1);
+    DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid);
+    DCHECK_EQ(initial->GetBlock(), this);
+    DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid);
+    DCHECK(initial->GetUses().IsEmpty());
+    DCHECK(initial->GetEnvUses().IsEmpty());
+    replacement->SetBlock(this);
+    replacement->SetId(GetGraph()->GetNextInstructionId());
+    instructions_.InsertInstructionBefore(replacement, initial);
+    UpdateInputsUsers(replacement);
+  } else {
+    InsertInstructionBefore(replacement, initial);
+    initial->ReplaceWith(replacement);
+  }
   RemoveInstruction(initial);
 }
 
@@ -743,7 +767,6 @@
 }
 
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
-  DCHECK_LT(index, Size());
   const HUserRecord<HEnvironment*>& user_record = vregs_[index];
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
 }
@@ -1435,7 +1458,7 @@
   // Update links to the successors of `other`.
   successors_.clear();
   while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessor(0);
+    HBasicBlock* successor = other->GetSuccessors()[0];
     successor->ReplacePredecessor(other, this);
   }
 
@@ -1472,7 +1495,7 @@
   // Update links to the successors of `other`.
   successors_.clear();
   while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessor(0);
+    HBasicBlock* successor = other->GetSuccessors()[0];
     successor->ReplacePredecessor(other, this);
   }
 
@@ -1488,11 +1511,11 @@
 
 void HBasicBlock::ReplaceWith(HBasicBlock* other) {
   while (!GetPredecessors().empty()) {
-    HBasicBlock* predecessor = GetPredecessor(0);
+    HBasicBlock* predecessor = GetPredecessors()[0];
     predecessor->ReplaceSuccessor(this, other);
   }
   while (!GetSuccessors().empty()) {
-    HBasicBlock* successor = GetSuccessor(0);
+    HBasicBlock* successor = GetSuccessors()[0];
     successor->ReplacePredecessor(this, other);
   }
   for (HBasicBlock* dominated : GetDominatedBlocks()) {
@@ -1567,9 +1590,9 @@
   if (GetBlocks().size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
     // Put the body block's instruction into `invoke`'s block.
-    HBasicBlock* body = GetBlock(1);
-    DCHECK(GetBlock(0)->IsEntryBlock());
-    DCHECK(GetBlock(2)->IsExitBlock());
+    HBasicBlock* body = GetBlocks()[1];
+    DCHECK(GetBlocks()[0]->IsEntryBlock());
+    DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
     HInstruction* last = body->GetLastInstruction();
 
@@ -1579,7 +1602,6 @@
     // Replace the invoke with the return value of the inlined graph.
     if (last->IsReturn()) {
       return_value = last->InputAt(0);
-      invoke->ReplaceWith(return_value);
     } else {
       DCHECK(last->IsReturnVoid());
     }
@@ -1594,16 +1616,16 @@
     HBasicBlock* at = invoke->GetBlock();
     HBasicBlock* to = at->SplitAfter(invoke);
 
-    HBasicBlock* first = entry_block_->GetSuccessor(0);
+    HBasicBlock* first = entry_block_->GetSuccessors()[0];
     DCHECK(!first->IsInLoop());
     at->MergeWithInlined(first);
     exit_block_->ReplaceWith(to);
 
     // Update all predecessors of the exit block (now the `to` block)
     // to not `HReturn` but `HGoto` instead.
-    bool returns_void = to->GetPredecessor(0)->GetLastInstruction()->IsReturnVoid();
+    bool returns_void = to->GetPredecessors()[0]->GetLastInstruction()->IsReturnVoid();
     if (to->GetPredecessors().size() == 1) {
-      HBasicBlock* predecessor = to->GetPredecessor(0);
+      HBasicBlock* predecessor = to->GetPredecessors()[0];
       HInstruction* last = predecessor->GetLastInstruction();
       if (!returns_void) {
         return_value = last->InputAt(0);
@@ -1627,14 +1649,11 @@
       }
     }
 
-    if (return_value != nullptr) {
-      invoke->ReplaceWith(return_value);
-    }
-
     // Update the meta information surrounding blocks:
     // (1) the graph they are now in,
     // (2) the reverse post order of that graph,
-    // (3) the potential loop information they are now in.
+    // (3) the potential loop information they are now in,
+    // (4) try block membership.
 
     // We don't add the entry block, the exit block, and the first block, which
     // has been merged with `at`.
@@ -1650,41 +1669,47 @@
     size_t index_of_at = IndexOfElement(outer_graph->reverse_post_order_, at);
     MakeRoomFor(&outer_graph->reverse_post_order_, blocks_added, index_of_at);
 
-    // Do a reverse post order of the blocks in the callee and do (1), (2),
-    // and (3) to the blocks that apply.
-    HLoopInformation* info = at->GetLoopInformation();
+    HLoopInformation* loop_info = at->GetLoopInformation();
+    // Copy TryCatchInformation if `at` is a try block, not if it is a catch block.
+    TryCatchInformation* try_catch_info = at->IsTryBlock() ? at->GetTryCatchInformation() : nullptr;
+
+    // Do a reverse post order of the blocks in the callee and do (1), (2), (3)
+    // and (4) to the blocks that apply.
     for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
       HBasicBlock* current = it.Current();
       if (current != exit_block_ && current != entry_block_ && current != first) {
         DCHECK(!current->IsInLoop());
+        DCHECK(current->GetTryCatchInformation() == nullptr);
         DCHECK(current->GetGraph() == this);
         current->SetGraph(outer_graph);
         outer_graph->AddBlock(current);
         outer_graph->reverse_post_order_[++index_of_at] = current;
-        if (info != nullptr) {
-          current->SetLoopInformation(info);
+        if (loop_info != nullptr) {
+          current->SetLoopInformation(loop_info);
           for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
             loop_it.Current()->Add(current);
           }
         }
+        current->SetTryCatchInformation(try_catch_info);
       }
     }
 
-    // Do (1), (2), and (3) to `to`.
+    // Do (1), (2), (3) and (4) to `to`.
     to->SetGraph(outer_graph);
     outer_graph->AddBlock(to);
     outer_graph->reverse_post_order_[++index_of_at] = to;
-    if (info != nullptr) {
-      to->SetLoopInformation(info);
+    if (loop_info != nullptr) {
+      to->SetLoopInformation(loop_info);
       for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
         loop_it.Current()->Add(to);
       }
-      if (info->IsBackEdge(*at)) {
+      if (loop_info->IsBackEdge(*at)) {
         // Only `to` can become a back edge, as the inlined blocks
         // are predecessors of `to`.
-        info->ReplaceBackEdge(at, to);
+        loop_info->ReplaceBackEdge(at, to);
       }
     }
+    to->SetTryCatchInformation(try_catch_info);
   }
 
   // Update the next instruction id of the outer graph, so that instructions
@@ -1700,20 +1725,21 @@
   size_t parameter_index = 0;
   for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* current = it.Current();
+    HInstruction* replacement = nullptr;
     if (current->IsNullConstant()) {
-      current->ReplaceWith(outer_graph->GetNullConstant(current->GetDexPc()));
+      replacement = outer_graph->GetNullConstant(current->GetDexPc());
     } else if (current->IsIntConstant()) {
-      current->ReplaceWith(outer_graph->GetIntConstant(
-          current->AsIntConstant()->GetValue(), current->GetDexPc()));
+      replacement = outer_graph->GetIntConstant(
+          current->AsIntConstant()->GetValue(), current->GetDexPc());
     } else if (current->IsLongConstant()) {
-      current->ReplaceWith(outer_graph->GetLongConstant(
-          current->AsLongConstant()->GetValue(), current->GetDexPc()));
+      replacement = outer_graph->GetLongConstant(
+          current->AsLongConstant()->GetValue(), current->GetDexPc());
     } else if (current->IsFloatConstant()) {
-      current->ReplaceWith(outer_graph->GetFloatConstant(
-          current->AsFloatConstant()->GetValue(), current->GetDexPc()));
+      replacement = outer_graph->GetFloatConstant(
+          current->AsFloatConstant()->GetValue(), current->GetDexPc());
     } else if (current->IsDoubleConstant()) {
-      current->ReplaceWith(outer_graph->GetDoubleConstant(
-          current->AsDoubleConstant()->GetValue(), current->GetDexPc()));
+      replacement = outer_graph->GetDoubleConstant(
+          current->AsDoubleConstant()->GetValue(), current->GetDexPc());
     } else if (current->IsParameterValue()) {
       if (kIsDebugBuild
           && invoke->IsInvokeStaticOrDirect()
@@ -1723,13 +1749,25 @@
         size_t last_input_index = invoke->InputCount() - 1;
         DCHECK(parameter_index != last_input_index);
       }
-      current->ReplaceWith(invoke->InputAt(parameter_index++));
+      replacement = invoke->InputAt(parameter_index++);
     } else if (current->IsCurrentMethod()) {
-      current->ReplaceWith(outer_graph->GetCurrentMethod());
+      replacement = outer_graph->GetCurrentMethod();
     } else {
       DCHECK(current->IsGoto() || current->IsSuspendCheck());
       entry_block_->RemoveInstruction(current);
     }
+    if (replacement != nullptr) {
+      current->ReplaceWith(replacement);
+      // If the current is the return value then we need to update the latter.
+      if (current == return_value) {
+        DCHECK_EQ(entry_block_, return_value->GetBlock());
+        return_value = replacement;
+      }
+    }
+  }
+
+  if (return_value != nullptr) {
+    invoke->ReplaceWith(return_value);
   }
 
   // Finally remove the invoke from the caller.
@@ -1873,6 +1911,35 @@
   return false;
 }
 
+void HInvoke::SetIntrinsic(Intrinsics intrinsic,
+                           IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) {
+  intrinsic_ = intrinsic;
+  IntrinsicOptimizations opt(this);
+  if (needs_env_or_cache == kNoEnvironmentOrCache) {
+    opt.SetDoesNotNeedDexCache();
+    opt.SetDoesNotNeedEnvironment();
+  }
+}
+
+bool HInvoke::NeedsEnvironment() const {
+  if (!IsIntrinsic()) {
+    return true;
+  }
+  IntrinsicOptimizations opt(*this);
+  return !opt.GetDoesNotNeedEnvironment();
+}
+
+bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const {
+  if (GetMethodLoadKind() != MethodLoadKind::kDexCacheViaMethod) {
+    return false;
+  }
+  if (!IsIntrinsic()) {
+    return true;
+  }
+  IntrinsicOptimizations opt(*this);
+  return !opt.GetDoesNotNeedDexCache();
+}
+
 void HInstruction::RemoveEnvironmentUsers() {
   for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
     HUseListNode<HEnvironment*>* user_node = use_it.Current();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index dbf46ce..0f2c1cf 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -21,6 +21,7 @@
 #include <array>
 #include <type_traits>
 
+#include "base/arena_bit_vector.h"
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/stl_util.h"
@@ -34,7 +35,6 @@
 #include "mirror/class.h"
 #include "offsets.h"
 #include "primitive.h"
-#include "utils/arena_bit_vector.h"
 
 namespace art {
 
@@ -75,18 +75,26 @@
 static constexpr uint64_t kMaxLongShiftValue = 0x3f;
 
 static constexpr uint32_t kUnknownFieldIndex = static_cast<uint32_t>(-1);
+static constexpr uint16_t kUnknownClassDefIndex = static_cast<uint16_t>(-1);
 
 static constexpr InvokeType kInvalidInvokeType = static_cast<InvokeType>(-1);
 
 static constexpr uint32_t kNoDexPc = -1;
 
 enum IfCondition {
-  kCondEQ,
-  kCondNE,
-  kCondLT,
-  kCondLE,
-  kCondGT,
-  kCondGE,
+  // All types.
+  kCondEQ,  // ==
+  kCondNE,  // !=
+  // Signed integers and floating-point numbers.
+  kCondLT,  // <
+  kCondLE,  // <=
+  kCondGT,  // >
+  kCondGE,  // >=
+  // Unsigned integers.
+  kCondB,   // <
+  kCondBE,  // <=
+  kCondA,   // >
+  kCondAE,  // >=
 };
 
 class HInstructionList : public ValueObject {
@@ -177,11 +185,6 @@
   ArenaAllocator* GetArena() const { return arena_; }
   const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; }
 
-  HBasicBlock* GetBlock(size_t id) const {
-    DCHECK_LT(id, blocks_.size());
-    return blocks_[id];
-  }
-
   bool IsInSsaForm() const { return in_ssa_form_; }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
@@ -548,6 +551,12 @@
   // Note that `other` *must* be populated before entering this function.
   bool IsIn(const HLoopInformation& other) const;
 
+  // Returns true if instruction is not defined within this loop or any loop nested inside
+  // this loop. If must_dominate is set, only definitions that actually dominate the loop
+  // header can be invariant. Otherwise, any definition outside the loop, including
+  // definitions that appear after the loop, is invariant.
+  bool IsLoopInvariant(HInstruction* instruction, bool must_dominate) const;
+
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
   void Add(HBasicBlock* block);
@@ -648,20 +657,10 @@
     return predecessors_;
   }
 
-  HBasicBlock* GetPredecessor(size_t pred_idx) const {
-    DCHECK_LT(pred_idx, predecessors_.size());
-    return predecessors_[pred_idx];
-  }
-
   const ArenaVector<HBasicBlock*>& GetSuccessors() const {
     return successors_;
   }
 
-  HBasicBlock* GetSuccessor(size_t succ_idx) const {
-    DCHECK_LT(succ_idx, successors_.size());
-    return successors_[succ_idx];
-  }
-
   bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) {
     return ContainsElement(successors_, block, start_from);
   }
@@ -797,18 +796,18 @@
 
   HBasicBlock* GetSinglePredecessor() const {
     DCHECK_EQ(GetPredecessors().size(), 1u);
-    return GetPredecessor(0);
+    return GetPredecessors()[0];
   }
 
   HBasicBlock* GetSingleSuccessor() const {
     DCHECK_EQ(GetSuccessors().size(), 1u);
-    return GetSuccessor(0);
+    return GetSuccessors()[0];
   }
 
   // Returns whether the first occurrence of `predecessor` in the list of
   // predecessors is at index `idx`.
   bool IsFirstIndexOfPredecessor(HBasicBlock* predecessor, size_t idx) const {
-    DCHECK_EQ(GetPredecessor(idx), predecessor);
+    DCHECK_EQ(GetPredecessors()[idx], predecessor);
     return GetPredecessorIndexOf(predecessor) == idx;
   }
 
@@ -886,7 +885,7 @@
 
   bool IsLoopPreHeaderFirstPredecessor() const {
     DCHECK(IsLoopHeader());
-    return GetPredecessor(0) == GetLoopInformation()->GetPreHeader();
+    return GetPredecessors()[0] == GetLoopInformation()->GetPreHeader();
   }
 
   HLoopInformation* GetLoopInformation() const {
@@ -1003,11 +1002,15 @@
 };
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                         \
+  M(Above, Condition)                                                   \
+  M(AboveOrEqual, Condition)                                            \
   M(Add, BinaryOperation)                                               \
   M(And, BinaryOperation)                                               \
   M(ArrayGet, Instruction)                                              \
   M(ArrayLength, Instruction)                                           \
   M(ArraySet, Instruction)                                              \
+  M(Below, Condition)                                                   \
+  M(BelowOrEqual, Condition)                                            \
   M(BooleanNot, UnaryOperation)                                         \
   M(BoundsCheck, Instruction)                                           \
   M(BoundType, Instruction)                                             \
@@ -1083,13 +1086,25 @@
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
 
+#ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
+  M(Arm64IntermediateAddress, Instruction)
+#endif
+
+#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
 
+#ifndef ART_ENABLE_CODEGEN_x86
+#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)
+#else
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                            \
   M(X86ComputeBaseMethodAddress, Instruction)                           \
-  M(X86LoadFromConstantTable, Instruction)
+  M(X86LoadFromConstantTable, Instruction)                              \
+  M(X86PackedSwitch, Instruction)
+#endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
@@ -1097,6 +1112,7 @@
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                                \
+  FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
@@ -1373,6 +1389,10 @@
     return SideEffects(flags_ & ~other.flags_);
   }
 
+  void Add(SideEffects other) {
+    flags_ |= other.flags_;
+  }
+
   bool Includes(SideEffects other) const {
     return (other.flags_ & flags_) == other.flags_;
   }
@@ -1559,12 +1579,10 @@
   void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header);
 
   void SetRawEnvAt(size_t index, HInstruction* instruction) {
-    DCHECK_LT(index, Size());
     vregs_[index] = HUserRecord<HEnvironment*>(instruction);
   }
 
   HInstruction* GetInstructionAt(size_t index) const {
-    DCHECK_LT(index, Size());
     return vregs_[index].GetInstruction();
   }
 
@@ -1575,12 +1593,10 @@
   HEnvironment* GetParent() const { return parent_; }
 
   void SetLocationAt(size_t index, Location location) {
-    DCHECK_LT(index, Size());
     locations_[index] = location;
   }
 
   Location GetLocationAt(size_t index) const {
-    DCHECK_LT(index, Size());
     return locations_[index];
   }
 
@@ -1610,7 +1626,6 @@
   void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
     DCHECK(env_use->GetUser() == this);
     size_t index = env_use->GetIndex();
-    DCHECK_LT(index, Size());
     vregs_[index] = HUserRecord<HEnvironment*>(vregs_[index], env_use);
   }
 
@@ -1656,6 +1671,11 @@
     return GetTypeHandle()->IsObjectClass();
   }
 
+  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsStringClass();
+  }
+
   bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(IsValid());
     return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
@@ -1667,15 +1687,36 @@
   }
 
   bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
     return GetTypeHandle()->IsArrayClass();
   }
 
+  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+  }
+
   bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
     if (!IsExact()) return false;
     if (!IsArrayClass()) return false;
     return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
   }
 
+  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    if (!rti.IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+        rti.GetTypeHandle()->GetComponentType());
+  }
+
   Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
   bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1781,8 +1822,7 @@
     return true;
   }
 
-  virtual bool CanDoImplicitNullCheckOn(HInstruction* obj) const {
-    UNUSED(obj);
+  virtual bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const {
     return false;
   }
 
@@ -1899,16 +1939,14 @@
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
-  virtual bool InstructionTypeEquals(HInstruction* other) const {
-    UNUSED(other);
+  virtual bool InstructionTypeEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether any data encoded in the two instructions is equal.
   // This method does not look at the inputs. Both instructions must be
   // of the same type, otherwise the method has undefined behavior.
-  virtual bool InstructionDataEquals(HInstruction* other) const {
-    UNUSED(other);
+  virtual bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
@@ -1928,6 +1966,7 @@
   }
 
   SideEffects GetSideEffects() const { return side_effects_; }
+  void AddSideEffects(SideEffects other) { side_effects_.Add(other); }
 
   size_t GetLifetimePosition() const { return lifetime_position_; }
   void SetLifetimePosition(size_t position) { lifetime_position_ = position; }
@@ -1947,7 +1986,9 @@
     return NeedsEnvironment() || IsLoadClass() || IsLoadString();
   }
 
-  virtual bool NeedsDexCache() const { return false; }
+  // Returns whether the code generation of the instruction will require to have access
+  // to the dex cache of the current method's declaring class via the current method.
+  virtual bool NeedsDexCacheOfDeclaringClass() const { return false; }
 
   // Does this instruction have any use in an environment before
   // control flow hits 'other'?
@@ -1997,7 +2038,7 @@
   // order of blocks where this instruction's live interval start.
   size_t lifetime_position_;
 
-  const SideEffects side_effects_;
+  SideEffects side_effects_;
 
   // TODO: for primitive types this should be marked as invalid.
   ReferenceTypeInfo reference_type_info_;
@@ -2315,11 +2356,11 @@
   bool IsControlFlow() const OVERRIDE { return true; }
 
   HBasicBlock* IfTrueSuccessor() const {
-    return GetBlock()->GetSuccessor(0);
+    return GetBlock()->GetSuccessors()[0];
   }
 
   HBasicBlock* IfFalseSuccessor() const {
-    return GetBlock()->GetSuccessor(1);
+    return GetBlock()->GetSuccessors()[1];
   }
 
   DECLARE_INSTRUCTION(If);
@@ -2347,7 +2388,7 @@
   bool IsControlFlow() const OVERRIDE { return true; }
 
   // Returns the block's non-exceptional successor (index zero).
-  HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessor(0); }
+  HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; }
 
   // Returns whether `handler` is among its exception handlers (non-zero index
   // successors).
@@ -2384,7 +2425,7 @@
     : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
 
   bool Done() const { return index_ == block_.GetSuccessors().size(); }
-  HBasicBlock* Current() const { return block_.GetSuccessor(index_); }
+  HBasicBlock* Current() const { return block_.GetSuccessors()[index_]; }
   size_t CurrentSuccessorIndex() const { return index_; }
   void Advance() { ++index_; }
 
@@ -2449,7 +2490,7 @@
 
   HBasicBlock* GetDefaultBlock() const {
     // Last entry is the default block.
-    return GetBlock()->GetSuccessor(num_entries_);
+    return GetBlock()->GetSuccessors()[num_entries_];
   }
   DECLARE_INSTRUCTION(PackedSwitch);
 
@@ -2471,8 +2512,7 @@
   Primitive::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2542,8 +2582,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2647,8 +2686,6 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> bool Compute(T x, T y) const { return x == y; }
-
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -2669,6 +2706,8 @@
   }
 
  private:
+  template <typename T> bool Compute(T x, T y) const { return x == y; }
+
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
 
@@ -2679,8 +2718,6 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> bool Compute(T x, T y) const { return x != y; }
-
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -2701,6 +2738,8 @@
   }
 
  private:
+  template <typename T> bool Compute(T x, T y) const { return x != y; }
+
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
 
@@ -2709,8 +2748,6 @@
   HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
-
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -2731,6 +2768,8 @@
   }
 
  private:
+  template <typename T> bool Compute(T x, T y) const { return x < y; }
+
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
 
@@ -2739,8 +2778,6 @@
   HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
-
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -2761,6 +2798,8 @@
   }
 
  private:
+  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
 
@@ -2769,8 +2808,6 @@
   HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
-
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -2791,6 +2828,8 @@
   }
 
  private:
+  template <typename T> bool Compute(T x, T y) const { return x > y; }
+
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
 
@@ -2799,8 +2838,6 @@
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
 
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
-
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
@@ -2821,9 +2858,138 @@
   }
 
  private:
+  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
 
+class HBelow : public HCondition {
+ public:
+  HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
+      : HCondition(first, second, dex_pc) {}
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint32_t>(x->GetValue()),
+                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint64_t>(x->GetValue()),
+                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(Below);
+
+  IfCondition GetCondition() const OVERRIDE {
+    return kCondB;
+  }
+
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondAE;
+  }
+
+ private:
+  template <typename T> bool Compute(T x, T y) const { return x < y; }
+
+  DISALLOW_COPY_AND_ASSIGN(HBelow);
+};
+
+class HBelowOrEqual : public HCondition {
+ public:
+  HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
+      : HCondition(first, second, dex_pc) {}
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint32_t>(x->GetValue()),
+                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint64_t>(x->GetValue()),
+                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(BelowOrEqual);
+
+  IfCondition GetCondition() const OVERRIDE {
+    return kCondBE;
+  }
+
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondA;
+  }
+
+ private:
+  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+
+  DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
+};
+
+class HAbove : public HCondition {
+ public:
+  HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
+      : HCondition(first, second, dex_pc) {}
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint32_t>(x->GetValue()),
+                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint64_t>(x->GetValue()),
+                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(Above);
+
+  IfCondition GetCondition() const OVERRIDE {
+    return kCondA;
+  }
+
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondBE;
+  }
+
+ private:
+  template <typename T> bool Compute(T x, T y) const { return x > y; }
+
+  DISALLOW_COPY_AND_ASSIGN(HAbove);
+};
+
+class HAboveOrEqual : public HCondition {
+ public:
+  HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
+      : HCondition(first, second, dex_pc) {}
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint32_t>(x->GetValue()),
+                static_cast<uint32_t>(y->GetValue())), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(static_cast<uint64_t>(x->GetValue()),
+                static_cast<uint64_t>(y->GetValue())), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(AboveOrEqual);
+
+  IfCondition GetCondition() const OVERRIDE {
+    return kCondAE;
+  }
+
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondB;
+  }
+
+ private:
+  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+
+  DISALLOW_COPY_AND_ASSIGN(HAboveOrEqual);
+};
 
 // Instruction to check how two inputs compare to each other.
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
@@ -3034,11 +3200,7 @@
  public:
   size_t InputCount() const OVERRIDE { return inputs_.size(); }
 
-  // Runtime needs to walk the stack, so Dex -> Dex calls need to
-  // know their environment.
-  bool NeedsEnvironment() const OVERRIDE {
-    return needs_environment_or_cache_ == kNeedsEnvironmentOrCache;
-  }
+  bool NeedsEnvironment() const OVERRIDE;
 
   void SetArgumentAt(size_t index, HInstruction* argument) {
     SetRawInputAt(index, argument);
@@ -3062,10 +3224,7 @@
     return intrinsic_;
   }
 
-  void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) {
-    intrinsic_ = intrinsic;
-    needs_environment_or_cache_ = needs_env_or_cache;
-  }
+  void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
 
   bool IsFromInlinedInvoke() const {
     return GetEnvironment()->GetParent() != nullptr;
@@ -3073,6 +3232,16 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  uint32_t* GetIntrinsicOptimizations() {
+    return &intrinsic_optimizations_;
+  }
+
+  const uint32_t* GetIntrinsicOptimizations() const {
+    return &intrinsic_optimizations_;
+  }
+
+  bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
+
   DECLARE_INSTRUCTION(Invoke);
 
  protected:
@@ -3092,16 +3261,14 @@
       dex_method_index_(dex_method_index),
       original_invoke_type_(original_invoke_type),
       intrinsic_(Intrinsics::kNone),
-      needs_environment_or_cache_(kNeedsEnvironmentOrCache) {
+      intrinsic_optimizations_(0) {
   }
 
   const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    DCHECK_LT(index, InputCount());
     return inputs_[index];
   }
 
   void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LT(index, InputCount());
     inputs_[index] = input;
   }
 
@@ -3111,7 +3278,9 @@
   const uint32_t dex_method_index_;
   const InvokeType original_invoke_type_;
   Intrinsics intrinsic_;
-  IntrinsicNeedsEnvironmentOrCache needs_environment_or_cache_;
+
+  // A magic word holding optimizations for intrinsics. See intrinsics.h.
+  uint32_t intrinsic_optimizations_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
@@ -3207,15 +3376,15 @@
   };
 
   struct DispatchInfo {
-    const MethodLoadKind method_load_kind;
-    const CodePtrLocation code_ptr_location;
+    MethodLoadKind method_load_kind;
+    CodePtrLocation code_ptr_location;
     // The method load data holds
     //   - thread entrypoint offset for kStringInit method if this is a string init invoke.
     //     Note that there are multiple string init methods, each having its own offset.
     //   - the method address for kDirectAddress
     //   - the dex cache arrays offset for kDexCachePcRel.
-    const uint64_t method_load_data;
-    const uint64_t direct_code_ptr;
+    uint64_t method_load_data;
+    uint64_t direct_code_ptr;
   };
 
   HInvokeStaticOrDirect(ArenaAllocator* arena,
@@ -3244,8 +3413,11 @@
         target_method_(target_method),
         dispatch_info_(dispatch_info) {}
 
-  bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    UNUSED(obj);
+  void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+    dispatch_info_ = dispatch_info;
+  }
+
+  bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
     // TODO: for intrinsics we can generate implicit null checks.
     return false;
@@ -3259,14 +3431,13 @@
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
-  bool NeedsDexCache() const OVERRIDE {
-    if (intrinsic_ != Intrinsics::kNone) { return needs_environment_or_cache_; }
-    return !IsRecursive() && !IsStringInit();
-  }
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
   uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
-  bool HasPcRelDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; }
+  bool HasPcRelDexCache() const {
+    return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
+  }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
 
@@ -3686,8 +3857,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -3921,24 +4091,31 @@
 // the calling convention.
 class HParameterValue : public HExpression<0> {
  public:
-  HParameterValue(uint8_t index,
+  HParameterValue(const DexFile& dex_file,
+                  uint16_t type_index,
+                  uint8_t index,
                   Primitive::Type parameter_type,
                   bool is_this = false)
       : HExpression(parameter_type, SideEffects::None(), kNoDexPc),
+        dex_file_(dex_file),
+        type_index_(type_index),
         index_(index),
         is_this_(is_this),
         can_be_null_(!is_this) {}
 
+  const DexFile& GetDexFile() const { return dex_file_; }
+  uint16_t GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
+  bool IsThis() const { return is_this_; }
 
   bool CanBeNull() const OVERRIDE { return can_be_null_; }
   void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
 
-  bool IsThis() const { return is_this_; }
-
   DECLARE_INSTRUCTION(ParameterValue);
 
  private:
+  const DexFile& dex_file_;
+  const uint16_t type_index_;
   // The index of this parameter in the parameters list. Must be less
   // than HGraph::number_of_in_vregs_.
   const uint8_t index_;
@@ -3957,8 +4134,7 @@
       : HUnaryOperation(result_type, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -3983,8 +4159,7 @@
       : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4022,7 +4197,7 @@
   Primitive::Type GetInputType() const { return GetInput()->GetType(); }
   Primitive::Type GetResultType() const { return GetType(); }
 
-  // Required by the x86 and ARM code generators when producing calls
+  // Required by the x86, ARM, MIPS and MIPS64 code generators when producing calls
   // to the runtime.
 
   bool CanBeMoved() const OVERRIDE { return true; }
@@ -4125,12 +4300,10 @@
 
  protected:
   const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    DCHECK_LE(index, InputCount());
     return inputs_[index];
   }
 
   void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LE(index, InputCount());
     inputs_[index] = input;
   }
 
@@ -4152,8 +4325,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4176,18 +4348,21 @@
             Primitive::Type field_type,
             bool is_volatile,
             uint32_t index,
+            uint16_t declaring_class_def_index,
             const DexFile& dex_file,
             Handle<mirror::DexCache> dex_cache)
       : field_offset_(field_offset),
         field_type_(field_type),
         is_volatile_(is_volatile),
         index_(index),
+        declaring_class_def_index_(declaring_class_def_index),
         dex_file_(dex_file),
         dex_cache_(dex_cache) {}
 
   MemberOffset GetFieldOffset() const { return field_offset_; }
   Primitive::Type GetFieldType() const { return field_type_; }
   uint32_t GetFieldIndex() const { return index_; }
+  uint16_t GetDeclaringClassDefIndex() const { return declaring_class_def_index_;}
   const DexFile& GetDexFile() const { return dex_file_; }
   bool IsVolatile() const { return is_volatile_; }
   Handle<mirror::DexCache> GetDexCache() const { return dex_cache_; }
@@ -4197,6 +4372,7 @@
   const Primitive::Type field_type_;
   const bool is_volatile_;
   const uint32_t index_;
+  const uint16_t declaring_class_def_index_;
   const DexFile& dex_file_;
   const Handle<mirror::DexCache> dex_cache_;
 };
@@ -4208,13 +4384,20 @@
                     MemberOffset field_offset,
                     bool is_volatile,
                     uint32_t field_idx,
+                    uint16_t declaring_class_def_index,
                     const DexFile& dex_file,
                     Handle<mirror::DexCache> dex_cache,
                     uint32_t dex_pc)
-      : HExpression(
-            field_type,
-            SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc),
-        field_info_(field_offset, field_type, is_volatile, field_idx, dex_file, dex_cache) {
+      : HExpression(field_type,
+                    SideEffects::FieldReadOfType(field_type, is_volatile),
+                    dex_pc),
+        field_info_(field_offset,
+                    field_type,
+                    is_volatile,
+                    field_idx,
+                    declaring_class_def_index,
+                    dex_file,
+                    dex_cache) {
     SetRawInputAt(0, value);
   }
 
@@ -4254,12 +4437,19 @@
                     MemberOffset field_offset,
                     bool is_volatile,
                     uint32_t field_idx,
+                    uint16_t declaring_class_def_index,
                     const DexFile& dex_file,
                     Handle<mirror::DexCache> dex_cache,
                     uint32_t dex_pc)
-      : HTemplateInstruction(
-          SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc),
-        field_info_(field_offset, field_type, is_volatile, field_idx, dex_file, dex_cache),
+      : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile),
+                             dex_pc),
+        field_info_(field_offset,
+                    field_type,
+                    is_volatile,
+                    field_idx,
+                    declaring_class_def_index,
+                    dex_file,
+                    dex_cache),
         value_can_be_null_(true) {
     SetRawInputAt(0, object);
     SetRawInputAt(1, value);
@@ -4291,19 +4481,20 @@
   HArrayGet(HInstruction* array,
             HInstruction* index,
             Primitive::Type type,
-            uint32_t dex_pc)
-      : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
+            uint32_t dex_pc,
+            SideEffects additional_side_effects = SideEffects::None())
+      : HExpression(type,
+                    SideEffects::ArrayReadOfType(type).Union(additional_side_effects),
+                    dex_pc) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
-  bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    UNUSED(obj);
+  bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // TODO: We can be smarter here.
     // Currently, the array access is always preceded by an ArrayLength or a NullCheck
     // which generates the implicit null check. There are cases when these can be removed
@@ -4329,10 +4520,13 @@
             HInstruction* index,
             HInstruction* value,
             Primitive::Type expected_component_type,
-            uint32_t dex_pc)
+            uint32_t dex_pc,
+            SideEffects additional_side_effects = SideEffects::None())
       : HTemplateInstruction(
             SideEffects::ArrayWriteOfType(expected_component_type).Union(
-                SideEffectsForArchRuntimeCalls(value->GetType())), dex_pc),
+                SideEffectsForArchRuntimeCalls(value->GetType())).Union(
+                    additional_side_effects),
+            dex_pc),
         expected_component_type_(expected_component_type),
         needs_type_check_(value->GetType() == Primitive::kPrimNot),
         value_can_be_null_(true),
@@ -4351,8 +4545,7 @@
   // Can throw ArrayStoreException.
   bool CanThrow() const OVERRIDE { return needs_type_check_; }
 
-  bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    UNUSED(obj);
+  bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // TODO: Same as for ArrayGet.
     return false;
   }
@@ -4388,6 +4581,10 @@
         : expected_component_type_;
   }
 
+  Primitive::Type GetRawExpectedComponentType() const {
+    return expected_component_type_;
+  }
+
   static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type value_type) {
     return (value_type == Primitive::kPrimNot) ? SideEffects::CanTriggerGC() : SideEffects::None();
   }
@@ -4407,7 +4604,7 @@
 
 class HArrayLength : public HExpression<1> {
  public:
-  explicit HArrayLength(HInstruction* array, uint32_t dex_pc)
+  HArrayLength(HInstruction* array, uint32_t dex_pc)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
     // Note that arrays do not change length, so the instruction does not
     // depend on any write.
@@ -4415,8 +4612,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -4439,8 +4635,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4448,6 +4643,7 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  HInstruction* GetIndex() const { return InputAt(0); }
 
   DECLARE_INSTRUCTION(BoundsCheck);
 
@@ -4522,13 +4718,20 @@
         generate_clinit_check_(false),
         needs_access_check_(needs_access_check),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+    // Referrers class should not need access check. We never inline unverified
+    // methods so we can't possibly end up in this situation.
+    DCHECK(!is_referrers_class_ || !needs_access_check_);
     SetRawInputAt(0, current_method);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return other->AsLoadClass()->type_index_ == type_index_;
+    // Note that we don't need to test for generate_clinit_check_.
+    // Whether or not we need to generate the clinit check is processed in
+    // prepare_for_register_allocator based on existing HInvokes and HClinitChecks.
+    return other->AsLoadClass()->type_index_ == type_index_ &&
+        other->AsLoadClass()->needs_access_check_ == needs_access_check_;
   }
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
@@ -4540,13 +4743,16 @@
   bool NeedsEnvironment() const OVERRIDE {
     // Will call runtime and load the class if the class is not loaded yet.
     // TODO: finer grain decision.
-    return !is_referrers_class_ || needs_access_check_;
+    return !is_referrers_class_;
   }
 
   bool MustGenerateClinitCheck() const {
     return generate_clinit_check_;
   }
   void SetMustGenerateClinitCheck(bool generate_clinit_check) {
+    // The entrypoint the code generator is going to call does not do
+    // clinit of the class.
+    DCHECK(!NeedsAccessCheck());
     generate_clinit_check_ = generate_clinit_check;
   }
 
@@ -4576,7 +4782,7 @@
 
   const DexFile& GetDexFile() { return dex_file_; }
 
-  bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; }
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return !is_referrers_class_; }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -4618,7 +4824,7 @@
 
   // TODO: Can we deopt or debug when we resolve a string?
   bool NeedsEnvironment() const OVERRIDE { return false; }
-  bool NeedsDexCache() const OVERRIDE { return true; }
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; }
   bool CanBeNull() const OVERRIDE { return false; }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -4647,8 +4853,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4673,13 +4878,20 @@
                   MemberOffset field_offset,
                   bool is_volatile,
                   uint32_t field_idx,
+                  uint16_t declaring_class_def_index,
                   const DexFile& dex_file,
                   Handle<mirror::DexCache> dex_cache,
                   uint32_t dex_pc)
-      : HExpression(
-            field_type,
-            SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc),
-        field_info_(field_offset, field_type, is_volatile, field_idx, dex_file, dex_cache) {
+      : HExpression(field_type,
+                    SideEffects::FieldReadOfType(field_type, is_volatile),
+                    dex_pc),
+        field_info_(field_offset,
+                    field_type,
+                    is_volatile,
+                    field_idx,
+                    declaring_class_def_index,
+                    dex_file,
+                    dex_cache) {
     SetRawInputAt(0, cls);
   }
 
@@ -4716,12 +4928,19 @@
                   MemberOffset field_offset,
                   bool is_volatile,
                   uint32_t field_idx,
+                  uint16_t declaring_class_def_index,
                   const DexFile& dex_file,
                   Handle<mirror::DexCache> dex_cache,
                   uint32_t dex_pc)
-      : HTemplateInstruction(
-          SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc),
-        field_info_(field_offset, field_type, is_volatile, field_idx, dex_file, dex_cache),
+      : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile),
+                             dex_pc),
+        field_info_(field_offset,
+                    field_type,
+                    is_volatile,
+                    field_idx,
+                    declaring_class_def_index,
+                    dex_file,
+                    dex_cache),
         value_can_be_null_(true) {
     SetRawInputAt(0, cls);
     SetRawInputAt(1, value);
@@ -5240,7 +5459,6 @@
   }
 
   MoveOperands* MoveOperandsAt(size_t index) {
-    DCHECK_LT(index, moves_.size());
     return &moves_[index];
   }
 
@@ -5256,6 +5474,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "nodes_arm64.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "nodes_x86.h"
 #endif
@@ -5267,7 +5488,7 @@
   explicit HGraphVisitor(HGraph* graph) : graph_(graph) {}
   virtual ~HGraphVisitor() {}
 
-  virtual void VisitInstruction(HInstruction* instruction) { UNUSED(instruction); }
+  virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
   virtual void VisitBasicBlock(HBasicBlock* block);
 
   // Visit the graph following basic block insertion order.
@@ -5314,7 +5535,7 @@
   explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
 
   bool Done() const { return index_ == graph_.GetBlocks().size(); }
-  HBasicBlock* Current() const { return graph_.GetBlock(index_); }
+  HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; }
   void Advance() { ++index_; }
 
  private:
@@ -5440,7 +5661,6 @@
       : blocks_in_loop_(info.GetBlocks()),
         blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()),
         index_(0) {
-    DCHECK(!blocks_.empty());
     if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
       Advance();
     }
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
new file mode 100644
index 0000000..885d3a2
--- /dev/null
+++ b/compiler/optimizing/nodes_arm64.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
+
+namespace art {
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HArm64IntermediateAddress : public HExpression<2> {
+ public:
+  HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+      : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+    SetRawInputAt(0, base_address);
+    SetRawInputAt(1, offset);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+
+  HInstruction* GetBaseAddress() const { return InputAt(0); }
+  HInstruction* GetOffset() const { return InputAt(1); }
+
+  DECLARE_INSTRUCTION(Arm64IntermediateAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 8eeac56..764f5fe 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -34,7 +34,8 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
 
@@ -76,8 +77,10 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
-  HInstruction* parameter2 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+  HInstruction* parameter2 = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
   entry->AddInstruction(new (&allocator) HExit());
@@ -102,7 +105,8 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   ASSERT_FALSE(parameter->HasUses());
@@ -122,7 +126,8 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
   entry->AddInstruction(parameter1);
   entry->AddInstruction(with_environment);
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index f7cc872..556217b 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -62,6 +62,45 @@
   DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable);
 };
 
+// X86 version of HPackedSwitch that holds a pointer to the base method address.
+class HX86PackedSwitch : public HTemplateInstruction<2> {
+ public:
+  HX86PackedSwitch(int32_t start_value,
+                   int32_t num_entries,
+                   HInstruction* input,
+                   HX86ComputeBaseMethodAddress* method_base,
+                   uint32_t dex_pc)
+    : HTemplateInstruction(SideEffects::None(), dex_pc),
+      start_value_(start_value),
+      num_entries_(num_entries) {
+    SetRawInputAt(0, input);
+    SetRawInputAt(1, method_base);
+  }
+
+  bool IsControlFlow() const OVERRIDE { return true; }
+
+  int32_t GetStartValue() const { return start_value_; }
+
+  int32_t GetNumEntries() const { return num_entries_; }
+
+  HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const {
+    return InputAt(1)->AsX86ComputeBaseMethodAddress();
+  }
+
+  HBasicBlock* GetDefaultBlock() const {
+    // Last entry is the default block.
+    return GetBlock()->GetSuccessors()[num_entries_];
+  }
+
+  DECLARE_INSTRUCTION(X86PackedSwitch);
+
+ private:
+  const int32_t start_value_;
+  const int32_t num_entries_;
+
+  DISALLOW_COPY_AND_ASSIGN(HX86PackedSwitch);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_X86_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 12d6b03..8cb2cfc 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -31,6 +31,7 @@
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
 #include "base/dumpable.h"
+#include "base/macros.h"
 #include "base/timing_logger.h"
 #include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
@@ -57,10 +58,12 @@
 #include "intrinsics.h"
 #include "licm.h"
 #include "jni/quick/jni_compiler.h"
+#include "load_store_elimination.h"
 #include "nodes.h"
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
@@ -168,13 +171,13 @@
     if (kIsDebugBuild) {
       if (!graph_in_bad_state_) {
         if (graph_->IsInSsaForm()) {
-          SSAChecker checker(graph_->GetArena(), graph_);
+          SSAChecker checker(graph_);
           checker.Run();
           if (!checker.IsValid()) {
             LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<SSAChecker>(checker);
           }
         } else {
-          GraphChecker checker(graph_->GetArena(), graph_);
+          GraphChecker checker(graph_);
           checker.Run();
           if (!checker.IsValid()) {
             LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
@@ -357,8 +360,10 @@
 }
 
 static bool IsInstructionSetSupported(InstructionSet instruction_set) {
-  return instruction_set == kArm64
+  return (instruction_set == kArm && !kArm32QuickCodeUseSoftFloat)
+      || instruction_set == kArm64
       || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat)
+      || instruction_set == kMips
       || instruction_set == kMips64
       || instruction_set == kX86
       || instruction_set == kX86_64;
@@ -374,6 +379,7 @@
 }
 
 static void MaybeRunInliner(HGraph* graph,
+                            CodeGenerator* codegen,
                             CompilerDriver* driver,
                             OptimizingCompilerStats* stats,
                             const DexCompilationUnit& dex_compilation_unit,
@@ -388,7 +394,7 @@
 
   ArenaAllocator* arena = graph->GetArena();
   HInliner* inliner = new (arena) HInliner(
-    graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+    graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
   ReferenceTypePropagation* type_propagation =
     new (arena) ReferenceTypePropagation(graph, handles,
         "reference_type_propagation_after_inlining");
@@ -441,6 +447,7 @@
 }
 
 static void RunOptimizations(HGraph* graph,
+                             CodeGenerator* codegen,
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
                              const DexCompilationUnit& dex_compilation_unit,
@@ -458,10 +465,12 @@
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
   ReferenceTypePropagation* type_propagation =
       new (arena) ReferenceTypePropagation(graph, handles);
+  HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_types");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
@@ -476,12 +485,15 @@
     fold1,
     simplify1,
     type_propagation,
+    sharpening,
     dce1,
     simplify2
   };
 
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
+  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+
   // TODO: Update passes incompatible with try/catch so we have the same
   //       pipeline for all methods.
   if (graph->HasTryCatch()) {
@@ -497,8 +509,6 @@
 
     RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
   } else {
-    MaybeRunInliner(graph, driver, stats, dex_compilation_unit, pass_observer, handles);
-
     HOptimization* optimizations2[] = {
       // BooleanSimplifier depends on the InstructionSimplifier removing
       // redundant suspend checks to recognize empty blocks.
@@ -510,6 +520,7 @@
       induction,
       bce,
       simplify3,
+      lse,
       dce2,
       // The codegen has a few assumptions that only the instruction simplifier
       // can satisfy. For example, the code generator does not expect to see a
@@ -534,6 +545,7 @@
   return ArrayRef<const uint8_t>(vector);
 }
 
+NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
                               PassObserver* pass_observer) {
@@ -562,9 +574,6 @@
   return linker_patches;
 }
 
-// TODO: The function below uses too much stack space. Bug: 24698147
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wframe-larger-than="
 CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
                                                      CodeGenerator* codegen,
                                                      CompilerDriver* compiler_driver,
@@ -573,8 +582,13 @@
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScopeCollection handles(soa.Self());
   soa.Self()->TransitionFromRunnableToSuspended(kNative);
-  RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
-                   dex_compilation_unit, pass_observer, &handles);
+  RunOptimizations(graph,
+                   codegen,
+                   compiler_driver,
+                   compilation_stats_.get(),
+                   dex_compilation_unit,
+                   pass_observer,
+                   &handles);
 
   AllocateRegisters(graph, codegen, pass_observer);
 
@@ -603,7 +617,7 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      &src_mapping_table,
+      ArrayRef<const SrcMapElem>(src_mapping_table),
       ArrayRef<const uint8_t>(),  // mapping_table.
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(),  // native_gc_map.
@@ -614,7 +628,6 @@
   soa.Self()->TransitionFromSuspendedToRunnable();
   return compiled_method;
 }
-#pragma GCC diagnostic pop
 
 CompiledMethod* OptimizingCompiler::CompileBaseline(
     CodeGenerator* codegen,
@@ -649,7 +662,7 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      &src_mapping_table,
+      ArrayRef<const SrcMapElem>(src_mapping_table),
       AlignVectorSize(mapping_table),
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
@@ -667,11 +680,11 @@
                                                jobject class_loader,
                                                const DexFile& dex_file,
                                                Handle<mirror::DexCache> dex_cache) const {
-  UNUSED(invoke_type);
   std::string method_name = PrettyMethod(method_idx, dex_file);
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
+
   // Always use the thumb2 assembler: some runtime functionality (like implicit stack
   // overflow checks) assume thumb2.
   if (instruction_set == kArm) {
@@ -713,9 +726,6 @@
       &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
       kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
 
-  // For testing purposes, we put a special marker on method names that should be compiled
-  // with this compiler. This makes sure we're not regressing.
-  bool shouldCompile = method_name.find("$opt$") != std::string::npos;
   bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
 
   std::unique_ptr<CodeGenerator> codegen(
@@ -724,7 +734,6 @@
                             *compiler_driver->GetInstructionSetFeatures(),
                             compiler_driver->GetCompilerOptions()));
   if (codegen.get() == nullptr) {
-    CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
@@ -765,8 +774,6 @@
   {
     PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
     if (!builder.BuildGraph(*code_item)) {
-      DCHECK(!(IsCompilingWithCoreImage() && shouldCompile))
-          << "Could not build graph in optimizing compiler";
       pass_observer.SetGraphInBadState();
       return nullptr;
     }
@@ -845,18 +852,36 @@
                                             Handle<mirror::DexCache> dex_cache) const {
   CompilerDriver* compiler_driver = GetCompilerDriver();
   CompiledMethod* method = nullptr;
-  const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
-  DCHECK(!verified_method->HasRuntimeThrow());
-  if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-      || CanHandleVerificationFailure(verified_method)) {
-     method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                         method_idx, jclass_loader, dex_file, dex_cache);
-  } else {
-    if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+  if (Runtime::Current()->IsAotCompiler()) {
+    const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
+    DCHECK(!verified_method->HasRuntimeThrow());
+    if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
+        || CanHandleVerificationFailure(verified_method)) {
+       method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+                           method_idx, jclass_loader, dex_file, dex_cache);
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+      if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+        MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+      } else {
+        MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+      }
     }
+  } else {
+    // This is for the JIT compiler, which has already ensured the class is verified.
+    // We can go straight to compiling.
+    DCHECK(Runtime::Current()->UseJit());
+    method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+                        method_idx, jclass_loader, dex_file, dex_cache);
+  }
+
+  if (kIsDebugBuild &&
+      IsCompilingWithCoreImage() &&
+      IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
+    // For testing purposes, we put a special marker on method names that should be compiled
+    // with this compiler. This makes sure we're not regressing.
+    std::string method_name = PrettyMethod(method_idx, dex_file);
+    bool shouldCompile = method_name.find("$opt$") != std::string::npos;
+    DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
   }
 
   return method;
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index fce7769..30bcf19 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -125,7 +125,6 @@
   // which means that a call to PerformMove could change any source operand
   // in the move graph.
 
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   if (move->IsRedundant()) {
@@ -406,7 +405,6 @@
   // we will update source operand in the move graph to reduce dependencies in
   // the graph.
 
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   DCHECK(!move->IsEliminated());
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index da91cb8..46e6f3e 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -56,7 +56,6 @@
       : ParallelMoveResolverWithSwap(allocator) {}
 
   void EmitMove(size_t index) OVERRIDE {
-    DCHECK_LT(index, moves_.size());
     MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
@@ -69,7 +68,6 @@
   }
 
   void EmitSwap(size_t index) OVERRIDE {
-    DCHECK_LT(index, moves_.size());
     MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
@@ -129,7 +127,6 @@
   void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
 
   void EmitMove(size_t index) OVERRIDE {
-    DCHECK_LT(index, moves_.size());
     MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 34850a5..429e6e3 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -131,7 +131,7 @@
     PrintString("  ");
     PrintInt(gota->GetId());
     PrintString(": Goto ");
-    PrintInt(current_block_->GetSuccessor(0)->GetBlockId());
+    PrintInt(current_block_->GetSuccessors()[0]->GetBlockId());
     PrintNewLine();
   }
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index f7a7e42..659da06 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -373,12 +373,18 @@
   if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) {
     // Calls to String.<init> are replaced with a StringFactory.
     if (kIsDebugBuild) {
-      ScopedObjectAccess soa(Thread::Current());
+      HInvoke* invoke = instr->AsInvoke();
       ClassLinker* cl = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = cl->FindDexCache(
-          soa.Self(), instr->AsInvoke()->GetDexFile(), false);
-      ArtMethod* method = dex_cache->GetResolvedMethod(
-          instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize());
+      ScopedObjectAccess soa(Thread::Current());
+      StackHandleScope<2> hs(soa.Self());
+      Handle<mirror::DexCache> dex_cache(
+          hs.NewHandle(cl->FindDexCache(soa.Self(), invoke->GetDexFile(), false)));
+      // Use a null loader. We should probably use the compiling method's class loader,
+      // but then we would need to pass it to RTPVisitor just for this debug check. Since
+      // the method is from the String class, the null loader is good enough.
+      Handle<mirror::ClassLoader> loader;
+      ArtMethod* method = cl->ResolveMethod(
+          invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
       DCHECK(method != nullptr);
       mirror::Class* declaring_class = method->GetDeclaringClass();
       DCHECK(declaring_class != nullptr);
@@ -428,12 +434,21 @@
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
+static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::DexCache* dex_cache =
+      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false);
+  // Get type from dex cache assuming it was populated by the verifier.
+  return dex_cache->GetResolvedType(type_idx);
+}
+
 void RTPVisitor::VisitParameterValue(HParameterValue* instr) {
   ScopedObjectAccess soa(Thread::Current());
   // We check if the existing type is valid: the inliner may have set it.
   if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-    // TODO: parse the signature and add precise types for the parameters.
-    SetClassAsTypeInfo(instr, nullptr, /* is_exact */ false);
+    mirror::Class* resolved_class =
+        GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
+    SetClassAsTypeInfo(instr, resolved_class, /* is_exact */ false);
   }
 }
 
@@ -479,11 +494,9 @@
 
 void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(soa.Self(), instr->GetDexFile(), false);
   // Get type from dex cache assuming it was populated by the verifier.
-  mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
-  // TODO: investigating why we are still getting unresolved classes: b/22821472.
+  mirror::Class* resolved_class =
+      GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
   if (resolved_class != nullptr) {
     instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
         handles_->NewHandle(resolved_class), /* is_exact */ true));
@@ -756,7 +769,9 @@
   while (!worklist_.empty()) {
     HInstruction* instruction = worklist_.back();
     worklist_.pop_back();
-    if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) {
+    bool updated_nullability = UpdateNullability(instruction);
+    bool updated_reference_type = UpdateReferenceTypeInfo(instruction);
+    if (updated_nullability || updated_reference_type) {
       AddDependentInstructionsToWorklist(instruction);
     }
   }
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 9cdb89b..ef22c81 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -85,12 +85,13 @@
 
 bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
                                                 InstructionSet instruction_set) {
-  return instruction_set == kArm64
-      || instruction_set == kX86_64
+  return instruction_set == kArm
+      || instruction_set == kArm64
+      || instruction_set == kMips
       || instruction_set == kMips64
-      || instruction_set == kArm
+      || instruction_set == kThumb2
       || instruction_set == kX86
-      || instruction_set == kThumb2;
+      || instruction_set == kX86_64;
 }
 
 static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
@@ -617,42 +618,40 @@
     // (2) Remove currently active intervals that are dead at this position.
     //     Move active intervals that have a lifetime hole at this position
     //     to inactive.
-    // Note: Copy elements we keep to the beginning, just like
-    //     v.erase(std::remove(v.begin(), v.end(), value), v.end());
-    auto active_kept_end = active_.begin();
-    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-      LiveInterval* interval = *it;
-      if (interval->IsDeadAt(position)) {
-        handled_.push_back(interval);
-      } else if (!interval->Covers(position)) {
-        inactive_.push_back(interval);
-      } else {
-        *active_kept_end++ = interval;  // Keep this interval.
-      }
-    }
-    // We have copied what we want to keep to [active_.begin(), active_kept_end),
-    // the rest of the data in active_ is junk - drop it.
+    auto active_kept_end = std::remove_if(
+        active_.begin(),
+        active_.end(),
+        [this, position](LiveInterval* interval) {
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (!interval->Covers(position)) {
+            inactive_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
     active_.erase(active_kept_end, active_.end());
 
     // (3) Remove currently inactive intervals that are dead at this position.
     //     Move inactive intervals that cover this position to active.
-    // Note: Copy elements we keep to the beginning, just like
-    //     v.erase(std::remove(v.begin(), v.begin() + num, value), v.begin() + num);
-    auto inactive_kept_end = inactive_.begin();
     auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
-    for (auto it = inactive_.begin(); it != inactive_to_handle_end; ++it) {
-      LiveInterval* interval = *it;
-      DCHECK(interval->GetStart() < position || interval->IsFixed());
-      if (interval->IsDeadAt(position)) {
-        handled_.push_back(interval);
-      } else if (interval->Covers(position)) {
-        active_.push_back(interval);
-      } else {
-        *inactive_kept_end++ = interval;  // Keep this interval.
-      }
-    }
-    // We have copied what we want to keep to [inactive_.begin(), inactive_kept_end),
-    // the rest of the data in the processed interval is junk - drop it.
+    auto inactive_kept_end = std::remove_if(
+        inactive_.begin(),
+        inactive_to_handle_end,
+        [this, position](LiveInterval* interval) {
+          DCHECK(interval->GetStart() < position || interval->IsFixed());
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (interval->Covers(position)) {
+            active_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
     inactive_.erase(inactive_kept_end, inactive_to_handle_end);
 
     if (current->IsSlowPathSafepoint()) {
@@ -1894,7 +1893,7 @@
       for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
         HInstruction* phi = inst_it.Current();
         for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
-          HBasicBlock* predecessor = current->GetPredecessor(i);
+          HBasicBlock* predecessor = current->GetPredecessors()[i];
           DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u);
           HInstruction* input = phi->InputAt(i);
           Location source = input->GetLiveInterval()->GetLocationAt(
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 21b36cb..080f970 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -312,7 +312,7 @@
   register_allocator.AllocateRegisters();
   ASSERT_TRUE(register_allocator.Validate(false));
 
-  HBasicBlock* loop_header = graph->GetBlock(2);
+  HBasicBlock* loop_header = graph->GetBlocks()[2];
   HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
 
   LiveInterval* phi_interval = phi->GetLiveInterval();
@@ -321,7 +321,7 @@
   ASSERT_TRUE(loop_update->HasRegister());
   ASSERT_NE(phi_interval->GetRegister(), loop_update->GetRegister());
 
-  HBasicBlock* return_block = graph->GetBlock(3);
+  HBasicBlock* return_block = graph->GetBlocks()[3];
   HReturn* ret = return_block->GetLastInstruction()->AsReturn();
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
@@ -343,8 +343,8 @@
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
 
-  HXor* first_xor = graph->GetBlock(1)->GetFirstInstruction()->AsXor();
-  HXor* last_xor = graph->GetBlock(1)->GetLastInstruction()->GetPrevious()->AsXor();
+  HXor* first_xor = graph->GetBlocks()[1]->GetFirstInstruction()->AsXor();
+  HXor* last_xor = graph->GetBlocks()[1]->GetLastInstruction()->GetPrevious()->AsXor();
   ASSERT_EQ(last_xor->InputAt(0), first_xor);
   LiveInterval* interval = first_xor->GetLiveInterval();
   ASSERT_EQ(interval->GetEnd(), last_xor->GetLifetimePosition());
@@ -475,7 +475,8 @@
   NullHandle<mirror::DexCache> dex_cache;
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (allocator) HBasicBlock(graph);
@@ -487,6 +488,7 @@
                                                          MemberOffset(22),
                                                          false,
                                                          kUnknownFieldIndex,
+                                                         kUnknownClassDefIndex,
                                                          graph->GetDexFile(),
                                                          dex_cache,
                                                          0);
@@ -513,6 +515,7 @@
                                               MemberOffset(42),
                                               false,
                                               kUnknownFieldIndex,
+                                              kUnknownClassDefIndex,
                                               graph->GetDexFile(),
                                               dex_cache,
                                               0);
@@ -521,6 +524,7 @@
                                             MemberOffset(42),
                                             false,
                                             kUnknownFieldIndex,
+                                            kUnknownClassDefIndex,
                                             graph->GetDexFile(),
                                             dex_cache,
                                             0);
@@ -624,7 +628,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (allocator) HBasicBlock(graph);
@@ -636,6 +641,7 @@
                                              MemberOffset(42),
                                              false,
                                              kUnknownFieldIndex,
+                                             kUnknownClassDefIndex,
                                              graph->GetDexFile(),
                                              dex_cache,
                                              0);
@@ -698,7 +704,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry->AddInstruction(parameter);
 
   HInstruction* constant1 = graph->GetIntConstant(1);
@@ -768,8 +775,10 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* first = new (allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* second = new (allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* first = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* second = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry->AddInstruction(first);
   entry->AddInstruction(second);
 
@@ -820,10 +829,14 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* one = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* two = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* three = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* four = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry->AddInstruction(one);
   entry->AddInstruction(two);
   entry->AddInstruction(three);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
new file mode 100644
index 0000000..a128079
--- /dev/null
+++ b/compiler/optimizing/sharpening.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sharpening.h"
+
+#include "code_generator.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+#include "driver/compiler_driver.h"
+#include "nodes.h"
+#include "runtime.h"
+
+namespace art {
+
+void HSharpening::Run() {
+  // We don't care about the order of the blocks here.
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInvokeStaticOrDirect()) {
+        ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+      }
+      // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
+      //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
+      //       because we know the type better when inlining.
+      // TODO: HLoadClass, HLoadString - select PC relative dex cache array access if
+      //       available.
+    }
+  }
+}
+
+void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  if (invoke->IsStringInit()) {
+    // Not using the dex cache arrays. But we could still try to use a better dispatch...
+    // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
+    return;
+  }
+
+  // TODO: Avoid CompilerDriver.
+  InvokeType invoke_type = invoke->GetOriginalInvokeType();
+  MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
+  int vtable_idx;
+  uintptr_t direct_code, direct_method;
+  bool success = compiler_driver_->ComputeInvokeInfo(
+      &compilation_unit_,
+      invoke->GetDexPc(),
+      false /* update_stats: already updated in builder */,
+      true /* enable_devirtualization */,
+      &invoke_type,
+      &target_method,
+      &vtable_idx,
+      &direct_code,
+      &direct_method);
+  DCHECK(success);
+  DCHECK_EQ(invoke_type, invoke->GetInvokeType());
+  DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
+  DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
+  uint64_t method_load_data = 0u;
+  uint64_t direct_code_ptr = 0u;
+
+  HGraph* outer_graph = codegen_->GetGraph();
+  if (target_method.dex_file == &outer_graph->GetDexFile() &&
+      target_method.dex_method_index == outer_graph->GetMethodIdx()) {
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
+  } else {
+    bool use_pc_relative_instructions =
+        ((direct_method == 0u || direct_code == static_cast<uintptr_t>(-1))) &&
+        ContainsElement(compiler_driver_->GetDexFilesForOatFile(), target_method.dex_file);
+    if (direct_method != 0u) {  // Should we use a direct pointer to the method?
+      // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while
+      // kDirectAddress would be fine for image methods, we don't support it at the moment.
+      DCHECK(!Runtime::Current()->UseJit());
+      if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
+        method_load_data = direct_method;
+      } else {  // The direct pointer will be known at link time.
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup;
+      }
+    } else {  // Use dex cache.
+      DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile());
+      if (use_pc_relative_instructions) {  // Can we use PC-relative access to the dex cache arrays?
+        DCHECK(!Runtime::Current()->UseJit());
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
+        DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
+                                    &graph_->GetDexFile());
+        method_load_data = layout.MethodOffset(target_method.dex_method_index);
+      } else {  // We must go through the ArtMethod's pointer to resolved methods.
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+      }
+    }
+    if (direct_code != 0u) {  // Should we use a direct pointer to the code?
+      // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and
+      // while kCallDirect would be fine for image methods, we don't support it at the moment.
+      DCHECK(!Runtime::Current()->UseJit());
+      if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
+        direct_code_ptr = direct_code;
+      } else if (use_pc_relative_instructions) {
+        // Use PC-relative calls for invokes within a multi-dex oat file.
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
+      } else {  // The direct pointer will be known at link time.
+        // NOTE: This is used for app->boot calls when compiling an app against
+        // a relocatable but not yet relocated image.
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup;
+      }
+    } else {  // We must use the code pointer from the ArtMethod.
+      code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    }
+  }
+
+  if (graph_->IsDebuggable()) {
+    // For debuggable apps always use the code pointer from ArtMethod
+    // so that we don't circumvent instrumentation stubs if installed.
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+  }
+
+  HInvokeStaticOrDirect::DispatchInfo desired_dispatch_info = {
+      method_load_kind, code_ptr_location, method_load_data, direct_code_ptr
+  };
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info =
+      codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info,
+                                                         invoke->GetTargetMethod());
+  invoke->SetDispatchInfo(dispatch_info);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
new file mode 100644
index 0000000..adae700
--- /dev/null
+++ b/compiler/optimizing/sharpening.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_
+#define ART_COMPILER_OPTIMIZING_SHARPENING_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+class CompilerDriver;
+class DexCompilationUnit;
+class HInvokeStaticOrDirect;
+
+// Optimization that tries to improve the way we dispatch methods and access types,
+// fields, etc. Besides actual method sharpening based on receiver type (for example
+// virtual->direct), this includes selecting the best available dispatch for
+// invoke-static/-direct based on code generator support.
+class HSharpening : public HOptimization {
+ public:
+  HSharpening(HGraph* graph,
+              CodeGenerator* codegen,
+              const DexCompilationUnit& compilation_unit,
+              CompilerDriver* compiler_driver)
+      : HOptimization(graph, kSharpeningPassName),
+        codegen_(codegen),
+        compilation_unit_(compilation_unit),
+        compiler_driver_(compiler_driver) { }
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kSharpeningPassName = "sharpening";
+
+ private:
+  void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
+
+  CodeGenerator* codegen_;
+  const DexCompilationUnit& compilation_unit_;
+  CompilerDriver* compiler_driver_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SHARPENING_H_
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 338a3aa..1dc6986 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -76,18 +76,15 @@
 
 SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const {
   DCHECK(block->IsLoopHeader());
-  DCHECK_LT(block->GetBlockId(), loop_effects_.size());
   return loop_effects_[block->GetBlockId()];
 }
 
 SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const {
-  DCHECK_LT(block->GetBlockId(), block_effects_.size());
   return block_effects_[block->GetBlockId()];
 }
 
 void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) {
   uint32_t id = info->GetHeader()->GetBlockId();
-  DCHECK_LT(id, loop_effects_.size());
   loop_effects_[id] = loop_effects_[id].Union(effects);
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 40c75af..4565590 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -389,7 +389,6 @@
 }
 
 ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
-  DCHECK_LT(block->GetBlockId(), locals_for_.size());
   ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
   const size_t vregs = GetGraph()->GetNumberOfVRegs();
   if (locals->empty() && vregs != 0u) {
@@ -417,7 +416,6 @@
 
 HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
   ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
-  DCHECK_LT(local, locals->size());
   return (*locals)[local];
 }
 
@@ -467,7 +465,7 @@
     for (size_t local = 0; local < current_locals_->size(); ++local) {
       bool one_predecessor_has_no_value = false;
       bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessor(0), local);
+      HInstruction* value = ValueOfLocal(block->GetPredecessors()[0], local);
 
       for (HBasicBlock* predecessor : block->GetPredecessors()) {
         HInstruction* current = ValueOfLocal(predecessor, local);
@@ -489,7 +487,7 @@
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
         for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessor(i), local);
+          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
           phi->SetRawInputAt(i, pred_value);
         }
         block->AddPhi(phi);
@@ -626,7 +624,6 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  DCHECK_LT(load->GetLocal()->GetRegNumber(), current_locals_->size());
   HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
   // If the operation requests a specific type, we make sure its input is of that type.
   if (load->GetType() != value->GetType()) {
@@ -641,7 +638,6 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  DCHECK_LT(store->GetLocal()->GetRegNumber(), current_locals_->size());
   (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
   store->GetBlock()->RemoveInstruction(store);
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index b869d57..b9d8731 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -159,7 +159,6 @@
 void SsaLivenessAnalysis::ComputeLiveness() {
   for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    DCHECK_LT(block->GetBlockId(), block_infos_.size());
     block_infos_[block->GetBlockId()] =
         new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_);
   }
@@ -388,14 +387,14 @@
         }
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
-        if (end == user->GetBlock()->GetPredecessor(input_index)->GetLifetimeEnd()) {
+        if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) {
           for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
             if (i == input_index) {
               continue;
             }
             HInstruction* input = user->InputAt(i);
             Location location = input->GetLiveInterval()->GetLocationAt(
-                user->GetBlock()->GetPredecessor(i)->GetLifetimeEnd() - 1);
+                user->GetBlock()->GetPredecessors()[i]->GetLifetimeEnd() - 1);
             if (location.IsRegisterKind()) {
               int reg = RegisterOrLowRegister(location);
               if (free_until[reg] >= use_position) {
@@ -432,7 +431,6 @@
     const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
     for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
       HInstruction* input = defined_by_->InputAt(i);
-      DCHECK_LT(i, predecessors.size());
       size_t end = predecessors[i]->GetLifetimeEnd();
       LiveInterval* input_interval = input->GetLiveInterval()->GetSiblingAt(end - 1);
       if (input_interval->GetEnd() == end) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index e4b0999..572a7b6 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -1117,27 +1117,22 @@
   void Analyze();
 
   BitVector* GetLiveInSet(const HBasicBlock& block) const {
-    DCHECK_LT(block.GetBlockId(), block_infos_.size());
     return &block_infos_[block.GetBlockId()]->live_in_;
   }
 
   BitVector* GetLiveOutSet(const HBasicBlock& block) const {
-    DCHECK_LT(block.GetBlockId(), block_infos_.size());
     return &block_infos_[block.GetBlockId()]->live_out_;
   }
 
   BitVector* GetKillSet(const HBasicBlock& block) const {
-    DCHECK_LT(block.GetBlockId(), block_infos_.size());
     return &block_infos_[block.GetBlockId()]->kill_;
   }
 
   HInstruction* GetInstructionFromSsaIndex(size_t index) const {
-    DCHECK_LT(index, instructions_from_ssa_index_.size());
     return instructions_from_ssa_index_[index];
   }
 
   HInstruction* GetInstructionFromPosition(size_t index) const {
-    DCHECK_LT(index, instructions_from_lifetime_position_.size());
     return instructions_from_lifetime_position_[index];
   }
 
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index a095809..c60a4ea 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -210,7 +210,6 @@
       // Entries with the same dex map will have the same offset.
     }
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
-      DCHECK_LT(inline_info_index, inline_infos_.size());
       InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
                                         inline_entry.live_dex_registers_mask);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 4783e28..fc27a2b 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -63,6 +63,7 @@
       : allocator_(allocator),
         stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)),
         location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
+        location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
@@ -136,12 +137,10 @@
   }
 
   const StackMapEntry& GetStackMap(size_t i) const {
-    DCHECK_LT(i, stack_maps_.size());
     return stack_maps_[i];
   }
 
   void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
-    DCHECK_LT(i, stack_maps_.size());
     stack_maps_[i].native_pc_offset = native_pc_offset;
   }
 
@@ -175,8 +174,10 @@
   ArenaVector<DexRegisterLocation> location_catalog_entries_;
   // Map from Dex register location catalog entries to their indices in the
   // location catalog.
-  typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
-                  DexRegisterLocationHashFn> LocationCatalogEntriesIndices;
+  using LocationCatalogEntriesIndices = ArenaHashMap<DexRegisterLocation,
+                                                     size_t,
+                                                     LocationCatalogEntriesIndicesEmptyFn,
+                                                     DexRegisterLocationHashFn>;
   LocationCatalogEntriesIndices location_catalog_entries_indices_;
 
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index c4a3b28..560502f 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -15,8 +15,9 @@
  */
 
 #include "stack_map.h"
+
+#include "base/arena_bit_vector.h"
 #include "stack_map_stream.h"
-#include "utils/arena_bit_vector.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index e745d94..b6c704c 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -36,7 +36,7 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessor(0);
+  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors()[0];
   HInstruction* first_instruction = first_block->GetFirstInstruction();
   // Account for some tests having a store local as first instruction.
   ASSERT_TRUE(first_instruction->IsSuspendCheck()
diff --git a/compiler/utils/arena_allocator_test.cc b/compiler/utils/arena_allocator_test.cc
index 7065527..7f67ef1 100644
--- a/compiler/utils/arena_allocator_test.cc
+++ b/compiler/utils/arena_allocator_test.cc
@@ -15,8 +15,8 @@
  */
 
 #include "base/arena_allocator.h"
+#include "base/arena_bit_vector.h"
 #include "gtest/gtest.h"
-#include "utils/arena_bit_vector.h"
 
 namespace art {
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 807beda..68e3956 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -16,6 +16,8 @@
 
 #include "assembler_arm.h"
 
+#include <algorithm>
+
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -922,5 +924,24 @@
   return value | i << 26 | imm3 << 12 | a << 7;
 }
 
+void ArmAssembler::FinalizeTrackedLabels() {
+  if (!tracked_labels_.empty()) {
+    // This array should be sorted, as assembly is generated in linearized order. It isn't
+    // technically required, but GetAdjustedPosition() used in AdjustLabelPosition() can take
+    // advantage of it. So ensure that it's actually the case.
+    DCHECK(std::is_sorted(
+        tracked_labels_.begin(),
+        tracked_labels_.end(),
+        [](const Label* lhs, const Label* rhs) { return lhs->Position() < rhs->Position(); }));
+
+    Label* last_label = nullptr;  // Track duplicates, we must not adjust twice.
+    for (Label* label : tracked_labels_) {
+      DCHECK_NE(label, last_label);
+      AdjustLabelPosition(label);
+      last_label = label;
+    }
+  }
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 967b191..4a6e6d7 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -77,6 +77,45 @@
   DISALLOW_COPY_AND_ASSIGN(Literal);
 };
 
+// Jump table: table of labels emitted after the literals. Similar to literals.
+class JumpTable {
+ public:
+  explicit JumpTable(std::vector<Label*>&& labels)
+      : label_(), anchor_label_(), labels_(std::move(labels)) {
+  }
+
+  uint32_t GetSize() const {
+    return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t);
+  }
+
+  const std::vector<Label*>& GetData() const {
+    return labels_;
+  }
+
+  Label* GetLabel() {
+    return &label_;
+  }
+
+  const Label* GetLabel() const {
+    return &label_;
+  }
+
+  Label* GetAnchorLabel() {
+    return &anchor_label_;
+  }
+
+  const Label* GetAnchorLabel() const {
+    return &anchor_label_;
+  }
+
+ private:
+  Label label_;
+  Label anchor_label_;
+  std::vector<Label*> labels_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTable);
+};
+
 class ShifterOperand {
  public:
   ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
@@ -470,6 +509,13 @@
     orr(rd, rn, so, cond, kCcSet);
   }
 
+  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
+
+  virtual void orns(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) {
+    orn(rd, rn, so, cond, kCcSet);
+  }
+
   virtual void mov(Register rd, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) = 0;
 
@@ -678,6 +724,8 @@
     AddConstant(rd, rd, value, cond, set_cc);
   }
 
+  virtual void CmpConstant(Register rn, int32_t value, Condition cond = AL) = 0;
+
   // Load and Store. May clobber IP.
   virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
   void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
@@ -832,6 +880,8 @@
                                      uint32_t immediate,
                                      ShifterOperand* shifter_op) = 0;
 
+  virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0;
+
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
@@ -987,11 +1037,43 @@
     b(label);
   }
 
+  // Jump table support. This is split into three functions:
+  //
+  // * CreateJumpTable creates the internal metadata to track the jump targets, and emits code to
+  // load the base address of the jump table.
+  //
+  // * EmitJumpTableDispatch emits the code to actually jump, assuming that the right table value
+  // has been loaded into a register already.
+  //
+  // * FinalizeTables emits the jump table into the literal pool. This can only be called after the
+  // labels for the jump targets have been finalized.
+
+  // Create a jump table for the given labels that will be emitted when finalizing. Create a load
+  // sequence (or placeholder) that stores the base address into the given register. When the table
+  // is emitted, offsets will be relative to the location EmitJumpTableDispatch was called on (the
+  // anchor).
+  virtual JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) = 0;
+
+  // Emit the jump-table jump, assuming that the right value was loaded into displacement_reg.
+  virtual void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) = 0;
+
+  // Bind a Label that needs to be updated by the assembler in FinalizeCode() if its position
+  // changes due to branch/literal fixup.
+  void BindTrackedLabel(Label* label) {
+    Bind(label);
+    tracked_labels_.push_back(label);
+  }
+
  protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
   }
+
+  void FinalizeTrackedLabels();
+
+  // Tracked labels. Use a vector, as we need to sort before adjusting.
+  std::vector<Label*> tracked_labels_;
 };
 
 // Slowpath entered when Thread::Current()->_exception is non-null
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index f7772ae..a7dbacd 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -48,6 +48,11 @@
   return false;
 }
 
+bool Arm32Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
+  ShifterOperand shifter_op;
+  return ShifterOperandCanHoldArm32(immediate, &shifter_op);
+}
+
 bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
                                            Register rn ATTRIBUTE_UNUSED,
                                            Opcode opcode ATTRIBUTE_UNUSED,
@@ -130,6 +135,15 @@
 }
 
 
+void Arm32Assembler::orn(Register rd ATTRIBUTE_UNUSED,
+                         Register rn ATTRIBUTE_UNUSED,
+                         const ShifterOperand& so ATTRIBUTE_UNUSED,
+                         Condition cond ATTRIBUTE_UNUSED,
+                         SetCc set_cc ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "orn is not supported on ARM32";
+}
+
+
 void Arm32Assembler::mov(Register rd, const ShifterOperand& so,
                          Condition cond, SetCc set_cc) {
   EmitType01(cond, so.type(), MOV, set_cc, R0, rd, so);
@@ -1371,6 +1385,21 @@
   }
 }
 
+void Arm32Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
+    cmp(rn, shifter_op, cond);
+  } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
+    cmn(rn, shifter_op, cond);
+  } else {
+    movw(IP, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(IP, value_high, cond);
+    }
+    cmp(rn, ShifterOperand(IP), cond);
+  }
+}
 
 void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
@@ -1570,6 +1599,23 @@
   b(label, NE);
 }
 
+JumpTable* Arm32Assembler::CreateJumpTable(std::vector<Label*>&& labels ATTRIBUTE_UNUSED,
+                                           Register base_reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "CreateJumpTable is not supported on ARM32";
+  UNREACHABLE();
+}
+
+void Arm32Assembler::EmitJumpTableDispatch(JumpTable* jump_table ATTRIBUTE_UNUSED,
+                                           Register displacement_reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "EmitJumpTableDispatch is not supported on ARM32";
+  UNREACHABLE();
+}
+
+void Arm32Assembler::FinalizeCode() {
+  ArmAssembler::FinalizeCode();
+  // Currently the arm32 assembler does not support fixups, and thus no tracking. We must not call
+  // FinalizeTrackedLabels(), which would lead to an abort.
+}
 
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 3407369..5233dcb 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -74,6 +74,9 @@
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+
   virtual void mov(Register rd, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
@@ -258,6 +261,8 @@
   void AddConstant(Register rd, Register rn, int32_t value,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
@@ -294,6 +299,7 @@
                              uint32_t immediate,
                              ShifterOperand* shifter_op) OVERRIDE;
 
+  bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
@@ -304,6 +310,11 @@
 
   void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
 
+  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
+  void FinalizeCode() OVERRIDE;
+
  private:
   void EmitType01(Condition cond,
                   int type,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 0f6c4f5..fb3aa1e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -92,7 +92,7 @@
   label->BindTo(bound_pc);
 }
 
-void Thumb2Assembler::BindLiterals() {
+uint32_t Thumb2Assembler::BindLiterals() {
   // We don't add the padding here, that's done only after adjusting the Fixup sizes.
   uint32_t code_size = buffer_.Size();
   for (Literal& lit : literals_) {
@@ -100,6 +100,15 @@
     BindLabel(label, code_size);
     code_size += lit.GetSize();
   }
+  return code_size;
+}
+
+void Thumb2Assembler::BindJumpTables(uint32_t code_size) {
+  for (JumpTable& table : jump_tables_) {
+    Label* label = table.GetLabel();
+    BindLabel(label, code_size);
+    code_size += table.GetSize();
+  }
 }
 
 void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
@@ -144,7 +153,7 @@
       AdjustFixupIfNeeded(fixup, &current_code_size, &fixups_to_recalculate);
     } while (!fixups_to_recalculate.empty());
 
-    if ((current_code_size & 2) != 0 && !literals_.empty()) {
+    if ((current_code_size & 2) != 0 && (!literals_.empty() || !jump_tables_.empty())) {
       // If we need to add padding before literals, this may just push some out of range,
       // so recalculate all load literals. This makes up for the fact that we don't mark
       // load literal as a dependency of all previous Fixups even though it actually is.
@@ -173,6 +182,13 @@
       label->Reinitialize();
       label->BindTo(old_position + literals_adjustment);
     }
+    for (JumpTable& table : jump_tables_) {
+      Label* label = table.GetLabel();
+      DCHECK(label->IsBound());
+      int old_position = label->Position();
+      label->Reinitialize();
+      label->BindTo(old_position + literals_adjustment);
+    }
   }
 
   return current_code_size;
@@ -229,6 +245,43 @@
   }
 }
 
+void Thumb2Assembler::EmitJumpTables() {
+  if (!jump_tables_.empty()) {
+    // Jump tables require 4 byte alignment. (We don't support byte and half-word jump tables.)
+    uint32_t code_size = buffer_.Size();
+    DCHECK_ALIGNED(code_size, 2);
+    if ((code_size & 2u) != 0u) {
+      Emit16(0);
+    }
+    for (JumpTable& table : jump_tables_) {
+      // Bulk ensure capacity, as this may be large.
+      size_t orig_size = buffer_.Size();
+      buffer_.ExtendCapacity(orig_size + table.GetSize());
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = true;
+#endif
+
+      DCHECK_EQ(static_cast<size_t>(table.GetLabel()->Position()), buffer_.Size());
+      int32_t anchor_position = table.GetAnchorLabel()->Position() + 4;
+
+      for (Label* target : table.GetData()) {
+        // Ensure that the label was tracked, so that it will have the right position.
+        DCHECK(std::find(tracked_labels_.begin(), tracked_labels_.end(), target) !=
+                   tracked_labels_.end());
+
+        int32_t offset = target->Position() - anchor_position;
+        buffer_.Emit<int32_t>(offset);
+      }
+
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = false;
+#endif
+      size_t new_size = buffer_.Size();
+      DCHECK_LE(new_size - orig_size, table.GetSize());
+    }
+  }
+}
+
 inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
   DCHECK_ALIGNED(offset, 2);
   int16_t encoding = B15 | B14;
@@ -382,12 +435,38 @@
   return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset;
 }
 
+inline int16_t Thumb2Assembler::AdrEncoding16(Register rd, int32_t offset) {
+  DCHECK(IsUint<10>(offset));
+  DCHECK(IsAligned<4>(offset));
+  DCHECK(!IsHighRegister(rd));
+  return B15 | B13 | (rd << 8) | (offset >> 2);
+}
+
+inline int32_t Thumb2Assembler::AdrEncoding32(Register rd, int32_t offset) {
+  DCHECK(IsUint<12>(offset));
+  // Bit     26: offset[11]
+  // Bits 14-12: offset[10-8]
+  // Bits   7-0: offset[7-0]
+  int32_t immediate_mask =
+      ((offset & (1 << 11)) << (26 - 11)) |
+      ((offset & (7 << 8)) << (12 - 8)) |
+      (offset & 0xFF);
+  return B31 | B30 | B29 | B28 | B25 | B19 | B18 | B17 | B16 | (rd << 8) | immediate_mask;
+}
+
 void Thumb2Assembler::FinalizeCode() {
   ArmAssembler::FinalizeCode();
-  BindLiterals();
+  uint32_t size_after_literals = BindLiterals();
+  BindJumpTables(size_after_literals);
   uint32_t adjusted_code_size = AdjustFixups();
   EmitFixups(adjusted_code_size);
   EmitLiterals();
+  FinalizeTrackedLabels();
+  EmitJumpTables();
+}
+
+bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
+  return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
 }
 
 bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
@@ -410,6 +489,7 @@
     case MOV:
       // TODO: Support less than or equal to 12bits.
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+
     case MVN:
     default:
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
@@ -492,6 +572,12 @@
 }
 
 
+void Thumb2Assembler::orn(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond, SetCc set_cc) {
+  EmitDataProcessing(cond, ORN, set_cc, rn, rd, so);
+}
+
+
 void Thumb2Assembler::mov(Register rd, const ShifterOperand& so,
                           Condition cond, SetCc set_cc) {
   EmitDataProcessing(cond, MOV, set_cc, R0, rd, so);
@@ -1105,6 +1191,7 @@
       rn_is_valid = false;      // There is no Rn for these instructions.
       break;
     case TEQ:
+    case ORN:
       return true;
     case ADD:
     case SUB:
@@ -1222,6 +1309,7 @@
     case MOV: thumb_opcode =  2U /* 0b0010 */; rn = PC; break;
     case BIC: thumb_opcode =  1U /* 0b0001 */; break;
     case MVN: thumb_opcode =  3U /* 0b0011 */; rn = PC; break;
+    case ORN: thumb_opcode =  3U /* 0b0011 */; break;
     default:
       break;
   }
@@ -1757,6 +1845,15 @@
     case kLiteralFar:
       return 14u;
 
+    case kLiteralAddr1KiB:
+      return 2u;
+    case kLiteralAddr4KiB:
+      return 4u;
+    case kLiteralAddr64KiB:
+      return 6u;
+    case kLiteralAddrFar:
+      return 10u;
+
     case kLongOrFPLiteral1KiB:
       return 4u;
     case kLongOrFPLiteral256KiB:
@@ -1818,6 +1915,8 @@
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLongOrFPLiteral1KiB:
+    case kLiteralAddr1KiB:
+    case kLiteralAddr4KiB:
       DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2));
       diff += LiteralPoolPaddingSize(current_code_size);
       // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC
@@ -1830,12 +1929,14 @@
     case kLiteral1MiB:
     case kLiteral64KiB:
     case kLongOrFPLiteral256KiB:
+    case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 4;        // One extra 32-bit MOV.
       diff += LiteralPoolPaddingSize(current_code_size);
       break;
     case kLiteralFar:
     case kLongOrFPLiteralFar:
+    case kLiteralAddrFar:
       DCHECK_GE(diff, 8);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 8;        // Extra MOVW+MOVT; both 32-bit.
       diff += LiteralPoolPaddingSize(current_code_size);
@@ -1916,6 +2017,29 @@
       // This encoding can reach any target.
       break;
 
+    case kLiteralAddr1KiB:
+      DCHECK(!IsHighRegister(rn_));
+      if (IsUint<10>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddr4KiB);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddr4KiB:
+      if (IsUint<12>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddr64KiB);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddr64KiB:
+      if (IsUint<16>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddrFar);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddrFar:
+      // This encoding can reach any target.
+      break;
+
     case kLongOrFPLiteral1KiB:
       if (IsUint<10>(GetOffset(current_code_size))) {
         break;
@@ -2042,6 +2166,42 @@
       break;
     }
 
+    case kLiteralAddr1KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(location_, encoding);
+      break;
+    }
+    case kLiteralAddr4KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(location_, encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      break;
+    }
+    case kLiteralAddr64KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
+      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+      buffer->Store<int16_t>(location_, mov_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+      break;
+    }
+    case kLiteralAddrFar: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t offset = GetOffset(code_size);
+      int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
+      int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
+      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+      buffer->Store<int16_t>(location_, movw_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+      break;
+    }
+
     case kLongOrFPLiteral1KiB: {
       int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size));  // DCHECKs type_.
       buffer->Store<int16_t>(location_, encoding >> 16);
@@ -3247,6 +3407,25 @@
   }
 }
 
+void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+  // We prefer to select the shorter code sequence rather than selecting add for
+  // positive values and sub for negatives ones, which would slightly improve
+  // the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) {
+    cmp(rn, shifter_op, cond);
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) {
+    cmn(rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    movw(IP, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(IP, value_high, cond);
+    }
+    cmp(rn, ShifterOperand(IP), cond);
+  }
+}
 
 void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
@@ -3463,5 +3642,39 @@
     b(label, NE);
   }
 }
+
+JumpTable* Thumb2Assembler::CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) {
+  jump_tables_.emplace_back(std::move(labels));
+  JumpTable* table = &jump_tables_.back();
+  DCHECK(!table->GetLabel()->IsBound());
+
+  bool use32bit = IsForced32Bit() || IsHighRegister(base_reg);
+  uint32_t location = buffer_.Size();
+  Fixup::Size size = use32bit ? Fixup::kLiteralAddr4KiB : Fixup::kLiteralAddr1KiB;
+  FixupId fixup_id = AddFixup(Fixup::LoadLiteralAddress(location, base_reg, size));
+  Emit16(static_cast<uint16_t>(table->GetLabel()->position_));
+  table->GetLabel()->LinkTo(fixup_id);
+  if (use32bit) {
+    Emit16(0);
+  }
+  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
+
+  return table;
+}
+
+void Thumb2Assembler::EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) {
+  CHECK(!IsForced32Bit()) << "Forced 32-bit dispatch not implemented yet";
+  // 32-bit ADD doesn't support PC as an input, so we need a two-instruction sequence:
+  //   SUB ip, ip, #0
+  //   ADD pc, ip, reg
+  // TODO: Implement.
+
+  // The anchor's position needs to be fixed up before we can compute offsets - so make it a tracked
+  // label.
+  BindTrackedLabel(jump_table->GetAnchorLabel());
+
+  add(PC, PC, ShifterOperand(displacement_reg));
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index a1a8927..38fd244 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
 
 #include <deque>
+#include <utility>
 #include <vector>
 
 #include "base/logging.h"
@@ -98,6 +99,9 @@
   virtual void orr(Register rd, Register rn, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
+                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+
   virtual void mov(Register rd, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
@@ -301,6 +305,8 @@
   void AddConstant(Register rd, Register rn, int32_t value,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
@@ -337,6 +343,8 @@
                              uint32_t immediate,
                              ShifterOperand* shifter_op) OVERRIDE;
 
+  bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
+
 
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
@@ -353,6 +361,12 @@
     force_32bit_ = true;
   }
 
+  // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
+  // will generate a fixup.
+  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+  // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
+  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
  private:
   typedef uint16_t FixupId;
 
@@ -394,6 +408,7 @@
       kCompareAndBranchXZero,     // cbz/cbnz.
       kLoadLiteralNarrow,         // Load narrrow integer literal.
       kLoadLiteralWide,           // Load wide integer literal.
+      kLoadLiteralAddr,           // Load address of literal (used for jump table).
       kLoadFPLiteralSingle,       // Load FP literal single.
       kLoadFPLiteralDouble,       // Load FP literal double.
     };
@@ -424,6 +439,16 @@
       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
       kLiteralFar,
 
+      // Load literal base addr.
+      // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
+      kLiteralAddr1KiB,
+      // ADR rX, label; 4KiB offset. 4 bytes.
+      kLiteralAddr4KiB,
+      // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
+      kLiteralAddr64KiB,
+      // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
+      kLiteralAddrFar,
+
       // Load long or FP literal variants.
       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
       kLongOrFPLiteral1KiB,
@@ -452,7 +477,7 @@
     }
 
     // Load narrow literal.
-    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) {
+    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
              size == kLiteral1MiB || size == kLiteralFar);
       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
@@ -488,6 +513,14 @@
                    AL, kLoadFPLiteralDouble, size, location);
     }
 
+    static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
+      DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
+             size == kLiteralAddrFar);
+      DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
+      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+                   AL, kLoadLiteralAddr, size, location);
+    }
+
     Type GetType() const {
       return type_;
     }
@@ -751,12 +784,14 @@
   }
 
   void BindLabel(Label* label, uint32_t bound_pc);
-  void BindLiterals();
+  uint32_t BindLiterals();
+  void BindJumpTables(uint32_t code_size);
   void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
                            std::deque<FixupId>* fixups_to_recalculate);
   uint32_t AdjustFixups();
   void EmitFixups(uint32_t adjusted_code_size);
   void EmitLiterals();
+  void EmitJumpTables();
 
   static int16_t BEncoding16(int32_t offset, Condition cond);
   static int32_t BEncoding32(int32_t offset, Condition cond);
@@ -773,6 +808,8 @@
   static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
   static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
+  static int16_t AdrEncoding16(Register rd, int32_t offset);
+  static int32_t AdrEncoding32(Register rd, int32_t offset);
 
   std::vector<Fixup> fixups_;
   std::unique_ptr<FixupId[]> fixup_dependents_;
@@ -781,6 +818,9 @@
   // without invalidating pointers and references to existing elements.
   std::deque<Literal> literals_;
 
+  // Jump table list.
+  std::deque<JumpTable> jump_tables_;
+
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
   uint32_t last_old_position_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 9c08ce0..cb4b20b 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -17,6 +17,7 @@
 #include "assembler_thumb2.h"
 
 #include "base/stl_util.h"
+#include "base/stringprintf.h"
 #include "utils/assembler_test.h"
 
 namespace art {
@@ -1011,6 +1012,315 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
+TEST_F(AssemblerThumb2Test, BindTrackedLabel) {
+  Label non_tracked, tracked, branch_target;
+
+  // A few dummy loads on entry.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // A branch that will need to be fixed up.
+  __ cbz(arm::R0, &branch_target);
+
+  // Some more dummy loads.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Now insert tracked and untracked label.
+  __ Bind(&non_tracked);
+  __ BindTrackedLabel(&tracked);
+
+  // A lot of dummy loads, to ensure the branch needs resizing.
+  constexpr size_t kLdrR0R0CountLong = 60;
+  for (size_t i = 0; i != kLdrR0R0CountLong; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Bind the branch target.
+  __ Bind(&branch_target);
+
+  // One more load.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "cmp r0, #0\n"                                                       // cbz r0, 1f
+      "beq.n 1f\n" +
+      RepeatInsn(kLdrR0R0Count + kLdrR0R0CountLong, "ldr r0, [r0]\n") +
+      "1:\n"
+      "ldr r0, [r0]\n";
+  DriverStr(expected, "BindTrackedLabel");
+
+  // Expectation is that the tracked label should have moved.
+  EXPECT_LT(non_tracked.Position(), tracked.Position());
+}
+
+TEST_F(AssemblerThumb2Test, JumpTable) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {          // Note: odd so there's no alignment
+    __ ldr(arm::R0, arm::Address(arm::R0));              //       necessary, as gcc as emits nops,
+  }                                                      //       whereas we emit 0 != nop.
+
+  static_assert((kLdrR0R0Count + 3) * 2 < 1 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "adr r1, .Ljump_table\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable");
+}
+
+// Test for >1K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable4K) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 600;               // Note: even so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 1 * KB, "Not enough offset");
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 4 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "adr r1, .Ljump_table\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable4K");
+}
+
+// Test for >4K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable64K) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 2601;              // Note: odd so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 4 * KB, "Not enough offset");
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 64 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+      // (Note: have to use constants, as labels aren't accepted.
+      "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) & 0xFFFF)\n"
+      "add r1, pc\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable64K");
+}
+
+// Test for >64K fixup.
+TEST_F(AssemblerThumb2Test, JumpTableFar) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 70001;             // Note: odd so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 64 * KB, "Not enough offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+      // (Note: have to use constants, as labels aren't accepted.
+      "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) & 0xFFFF)\n"
+      "movt r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) >> 16)\n"
+      ".Lhelp:"
+      "add r1, pc\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTableFar");
+}
+
 TEST_F(AssemblerThumb2Test, Clz) {
   __ clz(arm::R0, arm::R1);
 
diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h
index 6b4daed..2060064 100644
--- a/compiler/utils/arm/constants_arm.h
+++ b/compiler/utils/arm/constants_arm.h
@@ -148,7 +148,8 @@
   MOV = 13,  // Move
   BIC = 14,  // Bit Clear
   MVN = 15,  // Move Not
-  kMaxOperand = 16
+  ORN = 16,  // Logical OR NOT.
+  kMaxOperand = 17
 };
 std::ostream& operator<<(std::ostream& os, const Opcode& rhs);
 
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 48f0328..5c33639 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -77,15 +77,19 @@
       : array_(array_in), size_(size_in) {
   }
 
-  template <typename Alloc>
-  explicit ArrayRef(std::vector<T, Alloc>& v)
+  template <typename Vector,
+            typename = typename std::enable_if<
+                std::is_same<typename Vector::value_type, value_type>::value>::type>
+  explicit ArrayRef(Vector& v)
       : array_(v.data()), size_(v.size()) {
   }
 
-  template <typename U, typename Alloc>
-  explicit ArrayRef(const std::vector<U, Alloc>& v,
-                    typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-                        t ATTRIBUTE_UNUSED = tag())
+  template <typename Vector,
+            typename = typename std::enable_if<
+                std::is_same<
+                    typename std::add_const<typename Vector::value_type>::type,
+                    value_type>::value>::type>
+  explicit ArrayRef(const Vector& v)
       : array_(v.data()), size_(v.size()) {
   }
 
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 496ca95..b01b0fe 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -122,7 +122,8 @@
   this->AdvancePC(assembler_->CodeSize());
 }
 
-Assembler* Assembler::Create(InstructionSet instruction_set) {
+Assembler* Assembler::Create(InstructionSet instruction_set,
+                             const InstructionSetFeatures* instruction_set_features) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
@@ -136,7 +137,9 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsAssembler();
+      return new mips::MipsAssembler(instruction_set_features != nullptr
+                                         ? instruction_set_features->AsMipsInstructionSetFeatures()
+                                         : nullptr);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 1088cb1..dfe6bab 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "arm/constants_arm.h"
@@ -226,6 +227,8 @@
   // Returns the position in the instruction stream.
   int GetPosition() { return  cursor_ - contents_; }
 
+  void ExtendCapacity(size_t min_capacity = 0u);
+
  private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
   // This leaves enough space for the longest possible instruction and allows
@@ -260,8 +263,6 @@
     return data + capacity - kMinimumGap;
   }
 
-  void ExtendCapacity(size_t min_capacity = 0u);
-
   friend class AssemblerFixup;
 };
 
@@ -284,7 +285,8 @@
 
 class Assembler {
  public:
-  static Assembler* Create(InstructionSet instruction_set);
+  static Assembler* Create(InstructionSet instruction_set,
+                           const InstructionSetFeatures* instruction_set_features = nullptr);
 
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
@@ -299,7 +301,7 @@
   }
 
   // TODO: Implement with disassembler.
-  virtual void Comment(const char* format, ...) { UNUSED(format); }
+  virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index bd994f4..f1233ca 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -83,6 +83,15 @@
         fmt);
   }
 
+  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), std::string fmt) {
+    return RepeatTemplatedRegistersNoDupes<Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
   std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
@@ -129,13 +138,14 @@
     return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
   }
 
-  template <typename Reg1Type, typename Reg2Type, typename ImmType,
-            RegisterView Reg1View, RegisterView Reg2View>
-  std::string RepeatRegRegImmBits(void (Ass::*f)(Reg1Type, Reg2Type, ImmType),
-                                  int imm_bits,
-                                  std::string fmt) {
-    const std::vector<Reg1Type*> reg1_registers = GetRegisters();
-    const std::vector<Reg2Type*> reg2_registers = GetRegisters();
+  template <typename Reg1, typename Reg2, typename ImmType>
+  std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType),
+                                              int imm_bits,
+                                              const std::vector<Reg1*> reg1_registers,
+                                              const std::vector<Reg2*> reg2_registers,
+                                              std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                              std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                              std::string fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
 
@@ -146,13 +156,13 @@
           (assembler_.get()->*f)(*reg1, *reg2, new_imm);
           std::string base = fmt;
 
-          std::string reg1_string = GetRegName<Reg1View>(*reg1);
+          std::string reg1_string = (this->*GetName1)(*reg1);
           size_t reg1_index;
           while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
             base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
           }
 
-          std::string reg2_string = GetRegName<Reg2View>(*reg2);
+          std::string reg2_string = (this->*GetName2)(*reg2);
           size_t reg2_index;
           while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
             base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
@@ -178,15 +188,75 @@
     return str;
   }
 
-  template <typename Reg1Type, typename Reg2Type, typename ImmType>
-  std::string RepeatRRIb(void (Ass::*f)(Reg1Type, Reg2Type, ImmType),
-                         int imm_bits,
-                         std::string fmt) {
-    return RepeatRegRegImmBits<Reg1Type,
-                               Reg2Type,
-                               ImmType,
-                               RegisterView::kUsePrimaryName,
-                               RegisterView::kUsePrimaryName>(f, imm_bits, fmt);
+  template <typename RegType, typename ImmType>
+  std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
+                                              int imm_bits,
+                                              const std::vector<Reg*> registers,
+                                              std::string (AssemblerTest::*GetName)(const RegType&),
+                                              std::string fmt) {
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+
+    for (auto reg : registers) {
+      for (int64_t imm : imms) {
+        ImmType new_imm = CreateImmediate(imm);
+        (assembler_.get()->*f)(*reg, new_imm);
+        std::string base = fmt;
+
+        std::string reg_string = (this->*GetName)(*reg);
+        size_t reg_index;
+        while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
+          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
+        }
+
+        size_t imm_index = base.find(IMM_TOKEN);
+        if (imm_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << imm;
+          std::string imm_string = sreg.str();
+          base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  template <typename ImmType>
+  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f,
+        imm_bits,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f,
+        imm_bits,
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f,
+        imm_bits,
+        GetFPRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
   }
 
   std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
@@ -547,6 +617,45 @@
     return str;
   }
 
+  template <typename Reg1, typename Reg2>
+  std::string RepeatTemplatedRegistersNoDupes(void (Ass::*f)(Reg1, Reg2),
+                                              const std::vector<Reg1*> reg1_registers,
+                                              const std::vector<Reg2*> reg2_registers,
+                                              std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                              std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                              std::string fmt) {
+    WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
+
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        if (reg1 == reg2) continue;
+        (assembler_.get()->*f)(*reg1, *reg2);
+        std::string base = fmt;
+
+        std::string reg1_string = (this->*GetName1)(*reg1);
+        size_t reg1_index;
+        while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+          base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+        }
+
+        std::string reg2_string = (this->*GetName2)(*reg2);
+        size_t reg2_index;
+        while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+          base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   template <typename Reg1, typename Reg2, typename Reg3>
   std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3),
                                        const std::vector<Reg1*> reg1_registers,
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 43c9d94..8c71292 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -520,18 +520,19 @@
     return line;
   }
 
-  // For debug purposes.
-  void FindToolDump(std::string tool_name) {
-    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
+  // Helper for below. If name_predicate is empty, search for all files, otherwise use it for the
+  // "-name" option.
+  static void FindToolDumpPrintout(std::string name_predicate, std::string tmp_file) {
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
     args.push_back("find");
     args.push_back(gcc_path);
-    args.push_back("-name");
-    args.push_back(architecture_string_ + "*" + tool_name);
+    if (!name_predicate.empty()) {
+      args.push_back("-name");
+      args.push_back(name_predicate);
+    }
     args.push_back("|");
     args.push_back("sort");
-    std::string tmp_file = GetTmpnam();
     args.push_back(">");
     args.push_back(tmp_file);
     std::string sh_args = Join(args, ' ');
@@ -547,10 +548,24 @@
       UNREACHABLE();
     }
 
+    LOG(ERROR) << "FindToolDump: gcc_path=" << gcc_path
+               << " cmd=" << sh_args;
     std::ifstream in(tmp_file.c_str());
     if (in) {
-      LOG(ERROR) << in.rdbuf();
+      std::string line;
+      while (std::getline(in, line)) {
+        LOG(ERROR) << line;
+      }
     }
+    in.close();
+    std::remove(tmp_file.c_str());
+  }
+
+  // For debug purposes.
+  void FindToolDump(std::string tool_name) {
+    // Check with the tool name.
+    FindToolDumpPrintout(architecture_string_ + "*" + tool_name, GetTmpnam());
+    FindToolDumpPrintout("", GetTmpnam());
   }
 
   // Use a consistent tmpnam, so store it.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index b2a354b..2ae8841 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -238,6 +238,7 @@
   __ sub(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ and_(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ orr(R0, R1, ShifterOperand(R2), AL, kCcKeep);
+  __ orn(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ eor(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ bic(R0, R1, ShifterOperand(R2), AL, kCcKeep);
   __ adc(R0, R1, ShifterOperand(R2), AL, kCcKeep);
@@ -371,6 +372,7 @@
   __ sub(R0, R1, ShifterOperand(0x55));
   __ and_(R0, R1, ShifterOperand(0x55));
   __ orr(R0, R1, ShifterOperand(0x55));
+  __ orn(R0, R1, ShifterOperand(0x55));
   __ eor(R0, R1, ShifterOperand(0x55));
   __ bic(R0, R1, ShifterOperand(0x55));
   __ adc(R0, R1, ShifterOperand(0x55));
@@ -403,6 +405,7 @@
   __ sub(R0, R1, ShifterOperand(0x550055));
   __ and_(R0, R1, ShifterOperand(0x550055));
   __ orr(R0, R1, ShifterOperand(0x550055));
+  __ orn(R0, R1, ShifterOperand(0x550055));
   __ eor(R0, R1, ShifterOperand(0x550055));
   __ bic(R0, R1, ShifterOperand(0x550055));
   __ adc(R0, R1, ShifterOperand(0x550055));
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 82ad642..b79c2e4 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -23,109 +23,110 @@
   "   8:	eba1 0002 	sub.w	r0, r1, r2\n",
   "   c:	ea01 0002 	and.w	r0, r1, r2\n",
   "  10:	ea41 0002 	orr.w	r0, r1, r2\n",
-  "  14:	ea81 0002 	eor.w	r0, r1, r2\n",
-  "  18:	ea21 0002 	bic.w	r0, r1, r2\n",
-  "  1c:	eb41 0002 	adc.w	r0, r1, r2\n",
-  "  20:	eb61 0002 	sbc.w	r0, r1, r2\n",
-  "  24:	ebc1 0002 	rsb	r0, r1, r2\n",
-  "  28:	ea90 0f01 	teq	r0, r1\n",
-  "  2c:	0008      	movs	r0, r1\n",
-  "  2e:	4608      	mov	r0, r1\n",
-  "  30:	43c8      	mvns	r0, r1\n",
-  "  32:	4408      	add	r0, r1\n",
-  "  34:	1888      	adds	r0, r1, r2\n",
-  "  36:	1a88      	subs	r0, r1, r2\n",
-  "  38:	4148      	adcs	r0, r1\n",
-  "  3a:	4188      	sbcs	r0, r1\n",
-  "  3c:	4008      	ands	r0, r1\n",
-  "  3e:	4308      	orrs	r0, r1\n",
-  "  40:	4048      	eors	r0, r1\n",
-  "  42:	4388      	bics	r0, r1\n",
-  "  44:	4208      	tst	r0, r1\n",
-  "  46:	4288      	cmp	r0, r1\n",
-  "  48:	42c8      	cmn	r0, r1\n",
-  "  4a:	4641		mov	r1, r8\n",
-  "  4c:	4681		mov	r9, r0\n",
-  "  4e:	46c8		mov	r8, r9\n",
-  "  50:	4441		add	r1, r8\n",
-  "  52:	4481		add	r9, r0\n",
-  "  54:	44c8		add	r8, r9\n",
-  "  56:	4548		cmp	r0, r9\n",
-  "  58:	4588		cmp	r8, r1\n",
-  "  5a:	45c1		cmp	r9, r8\n",
-  "  5c:	4248   	   	negs	r0, r1\n",
-  "  5e:	4240   	   	negs	r0, r0\n",
-  "  60:	ea5f 0008  	movs.w	r0, r8\n",
-  "  64:	ea7f 0008  	mvns.w	r0, r8\n",
-  "  68:	eb01 0008 	add.w	r0, r1, r8\n",
-  "  6c:	eb11 0008 	adds.w	r0, r1, r8\n",
-  "  70:	ebb1 0008 	subs.w	r0, r1, r8\n",
-  "  74:	eb50 0008 	adcs.w	r0, r0, r8\n",
-  "  78:	eb70 0008 	sbcs.w	r0, r0, r8\n",
-  "  7c:	ea10 0008 	ands.w	r0, r0, r8\n",
-  "  80:	ea50 0008 	orrs.w	r0, r0, r8\n",
-  "  84:	ea90 0008 	eors.w	r0, r0, r8\n",
-  "  88:	ea30 0008 	bics.w	r0, r0, r8\n",
-  "  8c:	ea10 0f08 	tst.w	r0, r8\n",
-  "  90:	eb10 0f08 	cmn.w	r0, r8\n",
-  "  94:	f1d8 0000 	rsbs	r0, r8, #0\n",
-  "  98:	f1d8 0800 	rsbs	r8, r8, #0\n",
-  "  9c:	bf08       	it	eq\n",
-  "  9e:	ea7f 0001  	mvnseq.w	r0, r1\n",
-  "  a2:	bf08       	it	eq\n",
-  "  a4:	eb11 0002 	addseq.w	r0, r1, r2\n",
-  "  a8:	bf08       	it	eq\n",
-  "  aa:	ebb1 0002 	subseq.w	r0, r1, r2\n",
-  "  ae:	bf08       	it	eq\n",
-  "  b0:	eb50 0001 	adcseq.w	r0, r0, r1\n",
-  "  b4:	bf08       	it	eq\n",
-  "  b6:	eb70 0001 	sbcseq.w	r0, r0, r1\n",
-  "  ba:	bf08       	it	eq\n",
-  "  bc:	ea10 0001 	andseq.w	r0, r0, r1\n",
-  "  c0:	bf08       	it	eq\n",
-  "  c2:	ea50 0001 	orrseq.w	r0, r0, r1\n",
-  "  c6:	bf08       	it	eq\n",
-  "  c8:	ea90 0001 	eorseq.w	r0, r0, r1\n",
-  "  cc:	bf08       	it	eq\n",
-  "  ce:	ea30 0001 	bicseq.w	r0, r0, r1\n",
-  "  d2:	bf08       	it	eq\n",
-  "  d4:	43c8      	mvneq	r0, r1\n",
+  "  14:	ea61 0002 	orn	r0, r1, r2\n",
+  "  18:	ea81 0002 	eor.w	r0, r1, r2\n",
+  "  1c:	ea21 0002 	bic.w	r0, r1, r2\n",
+  "  20:	eb41 0002 	adc.w	r0, r1, r2\n",
+  "  24:	eb61 0002 	sbc.w	r0, r1, r2\n",
+  "  28:	ebc1 0002 	rsb	r0, r1, r2\n",
+  "  2c:	ea90 0f01 	teq	r0, r1\n",
+  "  30:	0008      	movs	r0, r1\n",
+  "  32:	4608      	mov	r0, r1\n",
+  "  34:	43c8      	mvns	r0, r1\n",
+  "  36:	4408      	add	r0, r1\n",
+  "  38:	1888      	adds	r0, r1, r2\n",
+  "  3a:	1a88      	subs	r0, r1, r2\n",
+  "  3c:	4148      	adcs	r0, r1\n",
+  "  3e:	4188      	sbcs	r0, r1\n",
+  "  40:	4008      	ands	r0, r1\n",
+  "  42:	4308      	orrs	r0, r1\n",
+  "  44:	4048      	eors	r0, r1\n",
+  "  46:	4388      	bics	r0, r1\n",
+  "  48:	4208      	tst	r0, r1\n",
+  "  4a:	4288      	cmp	r0, r1\n",
+  "  4c:	42c8      	cmn	r0, r1\n",
+  "  4e:	4641		mov	r1, r8\n",
+  "  50:	4681		mov	r9, r0\n",
+  "  52:	46c8		mov	r8, r9\n",
+  "  54:	4441		add	r1, r8\n",
+  "  56:	4481		add	r9, r0\n",
+  "  58:	44c8		add	r8, r9\n",
+  "  5a:	4548		cmp	r0, r9\n",
+  "  5c:	4588		cmp	r8, r1\n",
+  "  5e:	45c1		cmp	r9, r8\n",
+  "  60:	4248   	   	negs	r0, r1\n",
+  "  62:	4240   	   	negs	r0, r0\n",
+  "  64:	ea5f 0008  	movs.w	r0, r8\n",
+  "  68:	ea7f 0008  	mvns.w	r0, r8\n",
+  "  6c:	eb01 0008 	add.w	r0, r1, r8\n",
+  "  70:	eb11 0008 	adds.w	r0, r1, r8\n",
+  "  74:	ebb1 0008 	subs.w	r0, r1, r8\n",
+  "  78:	eb50 0008 	adcs.w	r0, r0, r8\n",
+  "  7c:	eb70 0008 	sbcs.w	r0, r0, r8\n",
+  "  80:	ea10 0008 	ands.w	r0, r0, r8\n",
+  "  84:	ea50 0008 	orrs.w	r0, r0, r8\n",
+  "  88:	ea90 0008 	eors.w	r0, r0, r8\n",
+  "  8c:	ea30 0008 	bics.w	r0, r0, r8\n",
+  "  90:	ea10 0f08 	tst.w	r0, r8\n",
+  "  94:	eb10 0f08 	cmn.w	r0, r8\n",
+  "  98:	f1d8 0000 	rsbs	r0, r8, #0\n",
+  "  9c:	f1d8 0800 	rsbs	r8, r8, #0\n",
+  "  a0:	bf08       	it	eq\n",
+  "  a2:	ea7f 0001  	mvnseq.w	r0, r1\n",
+  "  a6:	bf08       	it	eq\n",
+  "  a8:	eb11 0002 	addseq.w	r0, r1, r2\n",
+  "  ac:	bf08       	it	eq\n",
+  "  ae:	ebb1 0002 	subseq.w	r0, r1, r2\n",
+  "  b2:	bf08       	it	eq\n",
+  "  b4:	eb50 0001 	adcseq.w	r0, r0, r1\n",
+  "  b8:	bf08       	it	eq\n",
+  "  ba:	eb70 0001 	sbcseq.w	r0, r0, r1\n",
+  "  be:	bf08       	it	eq\n",
+  "  c0:	ea10 0001 	andseq.w	r0, r0, r1\n",
+  "  c4:	bf08       	it	eq\n",
+  "  c6:	ea50 0001 	orrseq.w	r0, r0, r1\n",
+  "  ca:	bf08       	it	eq\n",
+  "  cc:	ea90 0001 	eorseq.w	r0, r0, r1\n",
+  "  d0:	bf08       	it	eq\n",
+  "  d2:	ea30 0001 	bicseq.w	r0, r0, r1\n",
   "  d6:	bf08       	it	eq\n",
-  "  d8:	1888      	addeq	r0, r1, r2\n",
+  "  d8:	43c8      	mvneq	r0, r1\n",
   "  da:	bf08       	it	eq\n",
-  "  dc:	1a88      	subeq	r0, r1, r2\n",
+  "  dc:	1888      	addeq	r0, r1, r2\n",
   "  de:	bf08       	it	eq\n",
-  "  e0:	4148      	adceq	r0, r1\n",
+  "  e0:	1a88      	subeq	r0, r1, r2\n",
   "  e2:	bf08       	it	eq\n",
-  "  e4:	4188      	sbceq	r0, r1\n",
+  "  e4:	4148      	adceq	r0, r1\n",
   "  e6:	bf08       	it	eq\n",
-  "  e8:	4008      	andeq	r0, r1\n",
+  "  e8:	4188      	sbceq	r0, r1\n",
   "  ea:	bf08       	it	eq\n",
-  "  ec:	4308      	orreq	r0, r1\n",
+  "  ec:	4008      	andeq	r0, r1\n",
   "  ee:	bf08       	it	eq\n",
-  "  f0:	4048      	eoreq	r0, r1\n",
+  "  f0:	4308      	orreq	r0, r1\n",
   "  f2:	bf08       	it	eq\n",
-  "  f4:	4388      	biceq	r0, r1\n",
-  "  f6:	4608      	mov	r0, r1\n",
-  "  f8:	43c8      	mvns	r0, r1\n",
-  "  fa:	4408      	add	r0, r1\n",
-  "  fc:	1888      	adds	r0, r1, r2\n",
-  "  fe:	1a88      	subs	r0, r1, r2\n",
-  " 100:	4148      	adcs	r0, r1\n",
-  " 102:	4188      	sbcs	r0, r1\n",
-  " 104:	4008      	ands	r0, r1\n",
-  " 106:	4308      	orrs	r0, r1\n",
-  " 108:	4048      	eors	r0, r1\n",
-  " 10a:	4388      	bics	r0, r1\n",
-  " 10c:	4641		mov	r1, r8\n",
-  " 10e:	4681		mov	r9, r0\n",
-  " 110:	46c8		mov	r8, r9\n",
-  " 112:	4441		add	r1, r8\n",
-  " 114:	4481		add	r9, r0\n",
-  " 116:	44c8		add	r8, r9\n",
-  " 118:	4248   	   	negs	r0, r1\n",
-  " 11a:	4240   	   	negs	r0, r0\n",
-  " 11c:	eb01 0c00 	add.w	ip, r1, r0\n",
+  "  f4:	4048      	eoreq	r0, r1\n",
+  "  f6:	bf08       	it	eq\n",
+  "  f8:	4388      	biceq	r0, r1\n",
+  "  fa:	4608      	mov	r0, r1\n",
+  "  fc:	43c8      	mvns	r0, r1\n",
+  "  fe:	4408      	add	r0, r1\n",
+  " 100:	1888      	adds	r0, r1, r2\n",
+  " 102:	1a88      	subs	r0, r1, r2\n",
+  " 104:	4148      	adcs	r0, r1\n",
+  " 106:	4188      	sbcs	r0, r1\n",
+  " 108:	4008      	ands	r0, r1\n",
+  " 10a:	4308      	orrs	r0, r1\n",
+  " 10c:	4048      	eors	r0, r1\n",
+  " 10e:	4388      	bics	r0, r1\n",
+  " 110:	4641		mov	r1, r8\n",
+  " 112:	4681		mov	r9, r0\n",
+  " 114:	46c8		mov	r8, r9\n",
+  " 116:	4441		add	r1, r8\n",
+  " 118:	4481		add	r9, r0\n",
+  " 11a:	44c8		add	r8, r9\n",
+  " 11c:	4248   	   	negs	r0, r1\n",
+  " 11e:	4240   	   	negs	r0, r0\n",
+  " 120:	eb01 0c00 	add.w	ip, r1, r0\n",
   nullptr
 };
 const char* DataProcessingImmediateResults[] = {
@@ -135,21 +136,22 @@
   "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
-  "  16:	f081 0055 	eor.w	r0, r1, #85	; 0x55\n",
-  "  1a:	f021 0055 	bic.w	r0, r1, #85	; 0x55\n",
-  "  1e:	f141 0055 	adc.w	r0, r1, #85	; 0x55\n",
-  "  22:	f161 0055 	sbc.w	r0, r1, #85	; 0x55\n",
-  "  26:	f1c1 0055 	rsb	r0, r1, #85	; 0x55\n",
-  "  2a:	f010 0f55 	tst.w	r0, #85	; 0x55\n",
-  "  2e:	f090 0f55 	teq	r0, #85	; 0x55\n",
-  "  32:	2855      	cmp	r0, #85	; 0x55\n",
-  "  34:	f110 0f55 	cmn.w	r0, #85	; 0x55\n",
-  "  38:	1d48      	adds	r0, r1, #5\n",
-  "  3a:	1f48      	subs	r0, r1, #5\n",
-  "  3c:	2055      	movs	r0, #85	; 0x55\n",
-  "  3e:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
-  "  42:	1d48      	adds  r0, r1, #5\n",
-  "  44:	1f48      	subs  r0, r1, #5\n",
+  "  16:	f061 0055 	orn	r0, r1, #85	; 0x55\n",
+  "  1a:	f081 0055 	eor.w	r0, r1, #85	; 0x55\n",
+  "  1e:	f021 0055 	bic.w	r0, r1, #85	; 0x55\n",
+  "  22:	f141 0055 	adc.w	r0, r1, #85	; 0x55\n",
+  "  26:	f161 0055 	sbc.w	r0, r1, #85	; 0x55\n",
+  "  2a:	f1c1 0055 	rsb	r0, r1, #85	; 0x55\n",
+  "  2e:	f010 0f55 	tst.w	r0, #85	; 0x55\n",
+  "  32:	f090 0f55 	teq	r0, #85	; 0x55\n",
+  "  36:	2855      	cmp	r0, #85	; 0x55\n",
+  "  38:	f110 0f55 	cmn.w	r0, #85	; 0x55\n",
+  "  3c:	1d48      	adds	r0, r1, #5\n",
+  "  3e:	1f48      	subs	r0, r1, #5\n",
+  "  40:	2055      	movs	r0, #85	; 0x55\n",
+  "  42:	f07f 0055 	mvns.w	r0, #85	; 0x55\n",
+  "  46:	1d48      	adds	r0, r1, #5\n",
+  "  48:	1f48      	subs	r0, r1, #5\n",
   nullptr
 };
 const char* DataProcessingModifiedImmediateResults[] = {
@@ -159,15 +161,16 @@
   "   c:	f1a1 1055 	sub.w	r0, r1, #5570645	; 0x550055\n",
   "  10:	f001 1055 	and.w	r0, r1, #5570645	; 0x550055\n",
   "  14:	f041 1055 	orr.w	r0, r1, #5570645	; 0x550055\n",
-  "  18:	f081 1055 	eor.w	r0, r1, #5570645	; 0x550055\n",
-  "  1c:	f021 1055 	bic.w	r0, r1, #5570645	; 0x550055\n",
-  "  20:	f141 1055 	adc.w	r0, r1, #5570645	; 0x550055\n",
-  "  24:	f161 1055 	sbc.w	r0, r1, #5570645	; 0x550055\n",
-  "  28:	f1c1 1055 	rsb	r0, r1, #5570645	; 0x550055\n",
-  "  2c:	f010 1f55 	tst.w	r0, #5570645	; 0x550055\n",
-  "  30:	f090 1f55 	teq	r0, #5570645	; 0x550055\n",
-  "  34:	f1b0 1f55 	cmp.w	r0, #5570645	; 0x550055\n",
-  "  38:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
+  "  18:	f061 1055 	orn	r0, r1, #5570645	; 0x550055\n",
+  "  1c:	f081 1055 	eor.w	r0, r1, #5570645	; 0x550055\n",
+  "  20:	f021 1055 	bic.w	r0, r1, #5570645	; 0x550055\n",
+  "  24:	f141 1055 	adc.w	r0, r1, #5570645	; 0x550055\n",
+  "  28:	f161 1055 	sbc.w	r0, r1, #5570645	; 0x550055\n",
+  "  2c:	f1c1 1055 	rsb	r0, r1, #5570645	; 0x550055\n",
+  "  30:	f010 1f55 	tst.w	r0, #5570645	; 0x550055\n",
+  "  34:	f090 1f55 	teq	r0, #5570645	; 0x550055\n",
+  "  38:	f1b0 1f55 	cmp.w	r0, #5570645	; 0x550055\n",
+  "  3c:	f110 1f55 	cmn.w	r0, #5570645	; 0x550055\n",
   nullptr
 };
 const char* DataProcessingModifiedImmediatesResults[] = {
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
new file mode 100644
index 0000000..ac54813
--- /dev/null
+++ b/compiler/utils/dedupe_set-inl.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+#define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+
+#include "dedupe_set.h"
+
+#include <algorithm>
+#include <inttypes.h>
+#include <unordered_map>
+
+#include "base/mutex.h"
+#include "base/hash_set.h"
+#include "base/stl_util.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+
+namespace art {
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
+  size_t collision_sum = 0u;
+  size_t collision_max = 0u;
+  size_t total_probe_distance = 0u;
+  size_t total_size = 0u;
+};
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
+ public:
+  Shard(const Alloc& alloc, const std::string& lock_name)
+      : alloc_(alloc),
+        lock_name_(lock_name),
+        lock_(lock_name_.c_str()),
+        keys_() {
+  }
+
+  ~Shard() {
+    for (const HashedKey<StoreKey>& key : keys_) {
+      DCHECK(key.Key() != nullptr);
+      alloc_.Destroy(key.Key());
+    }
+  }
+
+  const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
+    MutexLock lock(self, lock_);
+    HashedKey<InKey> hashed_in_key(hash, &in_key);
+    auto it = keys_.Find(hashed_in_key);
+    if (it != keys_.end()) {
+      DCHECK(it->Key() != nullptr);
+      return it->Key();
+    }
+    const StoreKey* store_key = alloc_.Copy(in_key);
+    keys_.Insert(HashedKey<StoreKey> { hash, store_key });
+    return store_key;
+  }
+
+  void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
+    // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
+    // for bookkeeping while collecting the stats.
+    std::unordered_map<HashType, size_t> stats;
+    {
+      MutexLock lock(self, lock_);
+      // Note: The total_probe_distance will be updated with the current state.
+      // It may have been higher before a re-hash.
+      global_stats->total_probe_distance += keys_.TotalProbeDistance();
+      global_stats->total_size += keys_.Size();
+      for (const HashedKey<StoreKey>& key : keys_) {
+        auto it = stats.find(key.Hash());
+        if (it == stats.end()) {
+          stats.insert({key.Hash(), 1u});
+        } else {
+          ++it->second;
+        }
+      }
+    }
+    for (const auto& entry : stats) {
+      size_t number_of_entries = entry.second;
+      if (number_of_entries > 1u) {
+        global_stats->collision_sum += number_of_entries - 1u;
+        global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
+      }
+    }
+  }
+
+ private:
+  template <typename T>
+  class HashedKey {
+   public:
+    HashedKey() : hash_(0u), key_(nullptr) { }
+    HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
+
+    size_t Hash() const {
+      return hash_;
+    }
+
+    const T* Key() const {
+      return key_;
+    }
+
+    bool IsEmpty() const {
+      return Key() == nullptr;
+    }
+
+    void MakeEmpty() {
+      key_ = nullptr;
+    }
+
+   private:
+    size_t hash_;
+    const T* key_;
+  };
+
+  class ShardEmptyFn {
+   public:
+    bool IsEmpty(const HashedKey<StoreKey>& key) const {
+      return key.IsEmpty();
+    }
+
+    void MakeEmpty(HashedKey<StoreKey>& key) {
+      key.MakeEmpty();
+    }
+  };
+
+  struct ShardHashFn {
+    template <typename T>
+    size_t operator()(const HashedKey<T>& key) const {
+      return key.Hash();
+    }
+  };
+
+  struct ShardPred {
+    typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
+    operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
+      DCHECK(lhs.Key() != nullptr);
+      DCHECK(rhs.Key() != nullptr);
+      // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
+      return lhs.Key() == rhs.Key();
+    }
+
+    template <typename LeftT, typename RightT>
+    bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
+      DCHECK(lhs.Key() != nullptr);
+      DCHECK(rhs.Key() != nullptr);
+      return lhs.Hash() == rhs.Hash() &&
+          lhs.Key()->size() == rhs.Key()->size() &&
+          std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
+    }
+  };
+
+  Alloc alloc_;
+  const std::string lock_name_;
+  Mutex lock_;
+  HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
+};
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
+    Thread* self, const InKey& key) {
+  uint64_t hash_start;
+  if (kIsDebugBuild) {
+    hash_start = NanoTime();
+  }
+  HashType raw_hash = HashFunc()(key);
+  if (kIsDebugBuild) {
+    uint64_t hash_end = NanoTime();
+    hash_time_ += hash_end - hash_start;
+  }
+  HashType shard_hash = raw_hash / kShard;
+  HashType shard_bin = raw_hash % kShard;
+  return shards_[shard_bin]->Add(self, shard_hash, key);
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
+                                                                         const Alloc& alloc)
+    : hash_time_(0) {
+  for (HashType i = 0; i < kShard; ++i) {
+    std::ostringstream oss;
+    oss << set_name << " lock " << i;
+    shards_[i].reset(new Shard(alloc, oss.str()));
+  }
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
+  // Everything done by member destructors.
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
+    Thread* self) const {
+  Stats stats;
+  for (HashType shard = 0; shard < kShard; ++shard) {
+    shards_[shard]->UpdateStats(self, &stats);
+  }
+  return StringPrintf("%zu collisions, %zu max hash collisions, "
+                      "%zu/%zu probe distance, %" PRIu64 " ns hash time",
+                      stats.collision_sum,
+                      stats.collision_max,
+                      stats.total_probe_distance,
+                      stats.total_size,
+                      hash_time_);
+}
+
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 2c4a689..b62f216 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -17,151 +17,41 @@
 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_
 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
 
-#include <algorithm>
-#include <inttypes.h>
 #include <memory>
-#include <set>
+#include <stdint.h>
 #include <string>
 
-#include "base/mutex.h"
-#include "base/stl_util.h"
-#include "base/stringprintf.h"
-#include "base/time_utils.h"
-#include "utils/swap_space.h"
+#include "base/macros.h"
 
 namespace art {
 
+class Thread;
+
 // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
 // Add method. The data-structure is thread-safe through the use of internal locks, it also
 // supports the lock being sharded.
-template <typename InKey, typename StoreKey, typename HashType, typename HashFunc,
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
           HashType kShard = 1>
 class DedupeSet {
-  typedef std::pair<HashType, const InKey*> HashedInKey;
-  struct HashedKey {
-    StoreKey* store_ptr;
-    union {
-      HashType store_hash;        // Valid if store_ptr != null.
-      const HashedInKey* in_key;  // Valid if store_ptr == null.
-    };
-  };
-
-  class Comparator {
-   public:
-    bool operator()(const HashedKey& a, const HashedKey& b) const {
-      HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first;
-      HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first;
-      if (a_hash != b_hash) {
-        return a_hash < b_hash;
-      }
-      if (a.store_ptr != nullptr && b.store_ptr != nullptr) {
-        return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
-                                            b.store_ptr->begin(), b.store_ptr->end());
-      } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) {
-        return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
-                                            b.in_key->second->begin(), b.in_key->second->end());
-      } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) {
-        return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
-                                            b.store_ptr->begin(), b.store_ptr->end());
-      } else {
-        return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
-                                            b.in_key->second->begin(), b.in_key->second->end());
-      }
-    }
-  };
-
  public:
-  StoreKey* Add(Thread* self, const InKey& key) {
-    uint64_t hash_start;
-    if (kIsDebugBuild) {
-      hash_start = NanoTime();
-    }
-    HashType raw_hash = HashFunc()(key);
-    if (kIsDebugBuild) {
-      uint64_t hash_end = NanoTime();
-      hash_time_ += hash_end - hash_start;
-    }
-    HashType shard_hash = raw_hash / kShard;
-    HashType shard_bin = raw_hash % kShard;
-    HashedInKey hashed_in_key(shard_hash, &key);
-    HashedKey hashed_key;
-    hashed_key.store_ptr = nullptr;
-    hashed_key.in_key = &hashed_in_key;
-    MutexLock lock(self, *lock_[shard_bin]);
-    auto it = keys_[shard_bin].find(hashed_key);
-    if (it != keys_[shard_bin].end()) {
-      DCHECK(it->store_ptr != nullptr);
-      return it->store_ptr;
-    }
-    hashed_key.store_ptr = CreateStoreKey(key);
-    hashed_key.store_hash = shard_hash;
-    keys_[shard_bin].insert(hashed_key);
-    return hashed_key.store_ptr;
-  }
+  // Add a new key to the dedupe set if not present. Return the equivalent deduplicated stored key.
+  const StoreKey* Add(Thread* self, const InKey& key);
 
-  DedupeSet(const char* set_name, SwapAllocator<void>& alloc)
-      : allocator_(alloc), hash_time_(0) {
-    for (HashType i = 0; i < kShard; ++i) {
-      std::ostringstream oss;
-      oss << set_name << " lock " << i;
-      lock_name_[i] = oss.str();
-      lock_[i].reset(new Mutex(lock_name_[i].c_str()));
-    }
-  }
+  DedupeSet(const char* set_name, const Alloc& alloc);
 
-  ~DedupeSet() {
-    // Have to manually free all pointers.
-    for (auto& shard : keys_) {
-      for (const auto& hashed_key : shard) {
-        DCHECK(hashed_key.store_ptr != nullptr);
-        DeleteStoreKey(hashed_key.store_ptr);
-      }
-    }
-  }
+  ~DedupeSet();
 
-  std::string DumpStats() const {
-    size_t collision_sum = 0;
-    size_t collision_max = 0;
-    for (HashType shard = 0; shard < kShard; ++shard) {
-      HashType last_hash = 0;
-      size_t collision_cur_max = 0;
-      for (const HashedKey& key : keys_[shard]) {
-        DCHECK(key.store_ptr != nullptr);
-        if (key.store_hash == last_hash) {
-          collision_cur_max++;
-          if (collision_cur_max > 1) {
-            collision_sum++;
-            if (collision_cur_max > collision_max) {
-              collision_max = collision_cur_max;
-            }
-          }
-        } else {
-          collision_cur_max = 1;
-          last_hash = key.store_hash;
-        }
-      }
-    }
-    return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time",
-                        collision_sum, collision_max, hash_time_);
-  }
+  std::string DumpStats(Thread* self) const;
 
  private:
-  StoreKey* CreateStoreKey(const InKey& key) {
-    StoreKey* ret = allocator_.allocate(1);
-    allocator_.construct(ret, key.begin(), key.end(), allocator_);
-    return ret;
-  }
+  struct Stats;
+  class Shard;
 
-  void DeleteStoreKey(StoreKey* key) {
-    SwapAllocator<StoreKey> alloc(allocator_);
-    alloc.destroy(key);
-    alloc.deallocate(key, 1);
-  }
-
-  std::string lock_name_[kShard];
-  std::unique_ptr<Mutex> lock_[kShard];
-  std::set<HashedKey, Comparator> keys_[kShard];
-  SwapAllocator<StoreKey> allocator_;
+  std::unique_ptr<Shard> shards_[kShard];
   uint64_t hash_time_;
 
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 637964e..60a891d 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -18,15 +18,18 @@
 
 #include <algorithm>
 #include <cstdio>
+#include <vector>
 
+#include "dedupe_set-inl.h"
 #include "gtest/gtest.h"
 #include "thread-inl.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class DedupeHashFunc {
+class DedupeSetTestHashFunc {
  public:
-  size_t operator()(const std::vector<uint8_t>& array) const {
+  size_t operator()(const ArrayRef<const uint8_t>& array) const {
     size_t hash = 0;
     for (uint8_t c : array) {
       hash += c;
@@ -36,46 +39,52 @@
     return hash;
   }
 };
+
+class DedupeSetTestAlloc {
+ public:
+  const std::vector<uint8_t>* Copy(const ArrayRef<const uint8_t>& src) {
+    return new std::vector<uint8_t>(src.begin(), src.end());
+  }
+
+  void Destroy(const std::vector<uint8_t>* key) {
+    delete key;
+  }
+};
+
 TEST(DedupeSetTest, Test) {
   Thread* self = Thread::Current();
-  typedef std::vector<uint8_t> ByteArray;
-  SwapAllocator<void> swap(nullptr);
-  DedupeSet<ByteArray, SwapVector<uint8_t>, size_t, DedupeHashFunc> deduplicator("test", swap);
-  SwapVector<uint8_t>* array1;
+  DedupeSetTestAlloc alloc;
+  DedupeSet<ArrayRef<const uint8_t>,
+            std::vector<uint8_t>,
+            DedupeSetTestAlloc,
+            size_t,
+            DedupeSetTestHashFunc> deduplicator("test", alloc);
+  const std::vector<uint8_t>* array1;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(20);
-    test1.push_back(30);
-    test1.push_back(45);
-
+    uint8_t raw_test1[] = { 10u, 20u, 30u, 45u };
+    ArrayRef<const uint8_t> test1(raw_test1);
     array1 = deduplicator.Add(self, test1);
     ASSERT_NE(array1, nullptr);
     ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array1->begin()));
   }
 
-  SwapVector<uint8_t>* array2;
+  const std::vector<uint8_t>* array2;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(20);
-    test1.push_back(30);
-    test1.push_back(45);
-    array2 = deduplicator.Add(self, test1);
+    uint8_t raw_test2[] = { 10u, 20u, 30u, 45u };
+    ArrayRef<const uint8_t> test2(raw_test2);
+    array2 = deduplicator.Add(self, test2);
     ASSERT_EQ(array2, array1);
-    ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array2->begin()));
+    ASSERT_TRUE(std::equal(test2.begin(), test2.end(), array2->begin()));
   }
 
-  SwapVector<uint8_t>* array3;
+  const std::vector<uint8_t>* array3;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(22);
-    test1.push_back(30);
-    test1.push_back(47);
-    array3 = deduplicator.Add(self, test1);
+    uint8_t raw_test3[] = { 10u, 22u, 30u, 47u };
+    ArrayRef<const uint8_t> test3(raw_test3);
+    array3 = deduplicator.Add(self, test3);
     ASSERT_NE(array3, nullptr);
-    ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array3->begin()));
+    ASSERT_NE(array3, array1);
+    ASSERT_TRUE(std::equal(test3.begin(), test3.end(), array3->begin()));
   }
 }
 
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index ff4a1a4..1038f44 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -70,6 +70,13 @@
  public:
   Label() : position_(0) {}
 
+  Label(Label&& src)
+      : position_(src.position_) {
+    // We must unlink/unbind the src label when moving; if not, calling the destructor on
+    // the src label would fail.
+    src.position_ = 0;
+  }
+
   ~Label() {
     // Assert if label is being destroyed with unresolved branches pending.
     CHECK(!IsLinked());
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index c5fae92..6f35e9e 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -19,6 +19,7 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "memory_region.h"
 #include "thread.h"
 
@@ -34,172 +35,193 @@
   return os;
 }
 
-void MipsAssembler::Emit(int32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<int32_t>(value);
+void MipsAssembler::FinalizeCode() {
+  for (auto& exception_block : exception_blocks_) {
+    EmitExceptionPoll(&exception_block);
+  }
+  PromoteBranches();
+}
+
+void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  EmitBranches();
+  Assembler::FinalizeInstructions(region);
+}
+
+void MipsAssembler::EmitBranches() {
+  CHECK(!overwriting_);
+  // Switch from appending instructions at the end of the buffer to overwriting
+  // existing instructions (branch placeholders) in the buffer.
+  overwriting_ = true;
+  for (auto& branch : branches_) {
+    EmitBranch(&branch);
+  }
+  overwriting_ = false;
+}
+
+void MipsAssembler::Emit(uint32_t value) {
+  if (overwriting_) {
+    // Branches to labels are emitted into their placeholders here.
+    buffer_.Store<uint32_t>(overwrite_location_, value);
+    overwrite_location_ += sizeof(uint32_t);
+  } else {
+    // Other instructions are simply appended at the end here.
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    buffer_.Emit<uint32_t>(value);
+  }
 }
 
 void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct) {
   CHECK_NE(rs, kNoRegister);
   CHECK_NE(rt, kNoRegister);
   CHECK_NE(rd, kNoRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     shamt << kShamtShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      static_cast<uint32_t>(rd) << kRdShift |
+                      shamt << kShamtShift |
+                      funct;
   Emit(encoding);
 }
 
 void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) {
   CHECK_NE(rs, kNoRegister);
   CHECK_NE(rt, kNoRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      imm;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitJ(int opcode, int address) {
-  int32_t encoding = opcode << kOpcodeShift |
-                     address;
+void MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) {
+  CHECK_NE(rs, kNoRegister);
+  CHECK(IsUint<21>(imm21)) << imm21;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      imm21;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct) {
+void MipsAssembler::EmitI26(int opcode, uint32_t imm26) {
+  CHECK(IsUint<26>(imm26)) << imm26;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
+  Emit(encoding);
+}
+
+void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd,
+                           int funct) {
   CHECK_NE(ft, kNoFRegister);
   CHECK_NE(fs, kNoFRegister);
   CHECK_NE(fd, kNoFRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(ft) << kFtShift |
-                     static_cast<int32_t>(fs) << kFsShift |
-                     static_cast<int32_t>(fd) << kFdShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      static_cast<uint32_t>(fs) << kFsShift |
+                      static_cast<uint32_t>(fd) << kFdShift |
+                      funct;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm) {
-  CHECK_NE(rt, kNoFRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+void MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) {
+  CHECK_NE(ft, kNoFRegister);
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      imm;
   Emit(encoding);
 }
 
-void MipsAssembler::EmitBranch(Register rt, Register rs, Label* label, bool equal) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the branch instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (equal) {
-    Beq(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  } else {
-    Bne(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  }
-}
-
-void MipsAssembler::EmitJump(Label* label, bool link) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the jump instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (link) {
-    Jal((offset >> 2) & kJumpOffsetMask);
-  } else {
-    J((offset >> 2) & kJumpOffsetMask);
-  }
-}
-
-int32_t MipsAssembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
-  CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
-
-  // Properly preserve only the bits supported in the instruction.
-  offset >>= 2;
-  if (is_jump) {
-    offset &= kJumpOffsetMask;
-    return (inst & ~kJumpOffsetMask) | offset;
-  } else {
-    offset &= kBranchOffsetMask;
-    return (inst & ~kBranchOffsetMask) | offset;
-  }
-}
-
-int MipsAssembler::DecodeBranchOffset(int32_t inst, bool is_jump) {
-  // Sign-extend, then left-shift by 2.
-  if (is_jump) {
-    return (((inst & kJumpOffsetMask) << 6) >> 4);
-  } else {
-    return (((inst & kBranchOffsetMask) << 16) >> 14);
-  }
-}
-
-void MipsAssembler::Bind(Label* label, bool is_jump) {
-  CHECK(!label->IsBound());
-  int bound_pc = buffer_.Size();
-  while (label->IsLinked()) {
-    int32_t position = label->Position();
-    int32_t next = buffer_.Load<int32_t>(position);
-    int32_t offset = is_jump ? bound_pc - position : bound_pc - position - 4;
-    int32_t encoded = MipsAssembler::EncodeBranchOffset(offset, next, is_jump);
-    buffer_.Store<int32_t>(position, encoded);
-    label->position_ = MipsAssembler::DecodeBranchOffset(next, is_jump);
-  }
-  label->BindTo(bound_pc);
-}
-
-void MipsAssembler::Add(Register rd, Register rs, Register rt) {
-  EmitR(0, rs, rt, rd, 0, 0x20);
-}
-
 void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
 
-void MipsAssembler::Addi(Register rt, Register rs, uint16_t imm16) {
-  EmitI(0x8, rs, rt, imm16);
-}
-
 void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
   EmitI(0x9, rs, rt, imm16);
 }
 
-void MipsAssembler::Sub(Register rd, Register rs, Register rt) {
-  EmitR(0, rs, rt, rd, 0, 0x22);
-}
-
 void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
 
-void MipsAssembler::Mult(Register rs, Register rt) {
+void MipsAssembler::MultR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18);
 }
 
-void MipsAssembler::Multu(Register rs, Register rt) {
+void MipsAssembler::MultuR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19);
 }
 
-void MipsAssembler::Div(Register rs, Register rt) {
+void MipsAssembler::DivR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a);
 }
 
-void MipsAssembler::Divu(Register rs, Register rt) {
+void MipsAssembler::DivuR2(Register rs, Register rt) {
+  CHECK(!IsR6());
   EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b);
 }
 
+void MipsAssembler::MulR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0x1c, rs, rt, rd, 0, 2);
+}
+
+void MipsAssembler::DivR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivR2(rs, rt);
+  Mflo(rd);
+}
+
+void MipsAssembler::ModR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivR2(rs, rt);
+  Mfhi(rd);
+}
+
+void MipsAssembler::DivuR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivuR2(rs, rt);
+  Mflo(rd);
+}
+
+void MipsAssembler::ModuR2(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  DivuR2(rs, rt);
+  Mfhi(rd);
+}
+
+void MipsAssembler::MulR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 2, 0x18);
+}
+
+void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x19);
+}
+
+void MipsAssembler::DivR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 2, 0x1a);
+}
+
+void MipsAssembler::ModR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x1a);
+}
+
+void MipsAssembler::DivuR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 2, 0x1b);
+}
+
+void MipsAssembler::ModuR6(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 3, 0x1b);
+}
+
 void MipsAssembler::And(Register rd, Register rs, Register rt) {
   EmitR(0, rs, rt, rd, 0, 0x24);
 }
@@ -228,27 +250,35 @@
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
-void MipsAssembler::Sll(Register rd, Register rs, int shamt) {
-  EmitR(0, rs, static_cast<Register>(0), rd, shamt, 0x00);
+void MipsAssembler::Seb(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20);
 }
 
-void MipsAssembler::Srl(Register rd, Register rs, int shamt) {
-  EmitR(0, rs, static_cast<Register>(0), rd, shamt, 0x02);
+void MipsAssembler::Seh(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
 }
 
-void MipsAssembler::Sra(Register rd, Register rs, int shamt) {
-  EmitR(0, rs, static_cast<Register>(0), rd, shamt, 0x03);
+void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+  EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
 }
 
-void MipsAssembler::Sllv(Register rd, Register rs, Register rt) {
+void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+  EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
+}
+
+void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+  EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
+}
+
+void MipsAssembler::Sllv(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x04);
 }
 
-void MipsAssembler::Srlv(Register rd, Register rs, Register rt) {
+void MipsAssembler::Srlv(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
-void MipsAssembler::Srav(Register rd, Register rs, Register rt) {
+void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
@@ -276,11 +306,18 @@
   EmitI(0xf, static_cast<Register>(0), rt, imm16);
 }
 
+void MipsAssembler::Sync(uint32_t stype) {
+  EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0),
+        stype & 0x1f, 0xf);
+}
+
 void MipsAssembler::Mfhi(Register rd) {
+  CHECK(!IsR6());
   EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x10);
 }
 
 void MipsAssembler::Mflo(Register rd) {
+  CHECK(!IsR6());
   EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x12);
 }
 
@@ -312,34 +349,276 @@
   EmitI(0xb, rs, rt, imm16);
 }
 
-void MipsAssembler::Beq(Register rt, Register rs, uint16_t imm16) {
+void MipsAssembler::B(uint16_t imm16) {
+  EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
-  Nop();
 }
 
-void MipsAssembler::Bne(Register rt, Register rs, uint16_t imm16) {
+void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x5, rs, rt, imm16);
-  Nop();
 }
 
-void MipsAssembler::J(uint32_t address) {
-  EmitJ(0x2, address);
-  Nop();
+void MipsAssembler::Beqz(Register rt, uint16_t imm16) {
+  Beq(ZERO, rt, imm16);
 }
 
-void MipsAssembler::Jal(uint32_t address) {
-  EmitJ(0x2, address);
-  Nop();
+void MipsAssembler::Bnez(Register rt, uint16_t imm16) {
+  Bne(ZERO, rt, imm16);
 }
 
-void MipsAssembler::Jr(Register rs) {
-  EmitR(0, rs, static_cast<Register>(0), static_cast<Register>(0), 0, 0x09);  // Jalr zero, rs
-  Nop();
+void MipsAssembler::Bltz(Register rt, uint16_t imm16) {
+  EmitI(0x1, rt, static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::Bgez(Register rt, uint16_t imm16) {
+  EmitI(0x1, rt, static_cast<Register>(0x1), imm16);
+}
+
+void MipsAssembler::Blez(Register rt, uint16_t imm16) {
+  EmitI(0x6, rt, static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::Bgtz(Register rt, uint16_t imm16) {
+  EmitI(0x7, rt, static_cast<Register>(0), imm16);
+}
+
+void MipsAssembler::J(uint32_t addr26) {
+  EmitI26(0x2, addr26);
+}
+
+void MipsAssembler::Jal(uint32_t addr26) {
+  EmitI26(0x3, addr26);
+}
+
+void MipsAssembler::Jalr(Register rd, Register rs) {
+  EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09);
 }
 
 void MipsAssembler::Jalr(Register rs) {
-  EmitR(0, rs, static_cast<Register>(0), RA, 0, 0x09);
-  Nop();
+  Jalr(RA, rs);
+}
+
+void MipsAssembler::Jr(Register rs) {
+  Jalr(ZERO, rs);
+}
+
+void MipsAssembler::Nal() {
+  EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0);
+}
+
+void MipsAssembler::Auipc(Register rs, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16);
+}
+
+void MipsAssembler::Addiupc(Register rs, uint32_t imm19) {
+  CHECK(IsR6());
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, imm19);
+}
+
+void MipsAssembler::Bc(uint32_t imm26) {
+  CHECK(IsR6());
+  EmitI26(0x32, imm26);
+}
+
+void MipsAssembler::Jic(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0x36, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Jialc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitI(0x3E, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x17, rs, rt, imm16);
+}
+
+void MipsAssembler::Bltzc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, rt, rt, imm16);
+}
+
+void MipsAssembler::Bgtzc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x16, rs, rt, imm16);
+}
+
+void MipsAssembler::Bgezc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, rt, rt, imm16);
+}
+
+void MipsAssembler::Blezc(Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, static_cast<Register>(0), rt, imm16);
+}
+
+void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x7, rs, rt, imm16);
+}
+
+void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x6, rs, rt, imm16);
+}
+
+void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
+}
+
+void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
+}
+
+void MipsAssembler::Beqzc(Register rs, uint32_t imm21) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x36, rs, imm21);
+}
+
+void MipsAssembler::Bnezc(Register rs, uint32_t imm21) {
+  CHECK(IsR6());
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x3E, rs, imm21);
+}
+
+void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
+  switch (cond) {
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltz(rs, imm16);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgez(rs, imm16);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blez(rs, imm16);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtz(rs, imm16);
+      break;
+    case kCondEQ:
+      Beq(rs, rt, imm16);
+      break;
+    case kCondNE:
+      Bne(rs, rt, imm16);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqz(rs, imm16);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnez(rs, imm16);
+      break;
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+    case kUncond:
+      // We don't support synthetic R2 branches (preceded with slt[u]) at this level
+      // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >).
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
+}
+
+void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
+  switch (cond) {
+    case kCondLT:
+      Bltc(rs, rt, imm16_21);
+      break;
+    case kCondGE:
+      Bgec(rs, rt, imm16_21);
+      break;
+    case kCondLE:
+      Bgec(rt, rs, imm16_21);
+      break;
+    case kCondGT:
+      Bltc(rt, rs, imm16_21);
+      break;
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltzc(rs, imm16_21);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgezc(rs, imm16_21);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blezc(rs, imm16_21);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtzc(rs, imm16_21);
+      break;
+    case kCondEQ:
+      Beqc(rs, rt, imm16_21);
+      break;
+    case kCondNE:
+      Bnec(rs, rt, imm16_21);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqzc(rs, imm16_21);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnezc(rs, imm16_21);
+      break;
+    case kCondLTU:
+      Bltuc(rs, rt, imm16_21);
+      break;
+    case kCondGEU:
+      Bgeuc(rs, rt, imm16_21);
+      break;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
 }
 
 void MipsAssembler::AddS(FRegister fd, FRegister fs, FRegister ft) {
@@ -358,52 +637,84 @@
   EmitFR(0x11, 0x10, ft, fs, fd, 0x3);
 }
 
-void MipsAssembler::AddD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x0);
+void MipsAssembler::AddD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x0);
 }
 
-void MipsAssembler::SubD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x1);
+void MipsAssembler::SubD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x1);
 }
 
-void MipsAssembler::MulD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x2);
+void MipsAssembler::MulD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x2);
 }
 
-void MipsAssembler::DivD(DRegister fd, DRegister fs, DRegister ft) {
-  EmitFR(0x11, 0x11, ConvertDRegToFReg(ft), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x3);
+void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) {
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x3);
 }
 
 void MipsAssembler::MovS(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6);
 }
 
-void MipsAssembler::MovD(DRegister fd, DRegister fs) {
-  EmitFR(0x11, 0x11, static_cast<FRegister>(0), ConvertDRegToFReg(fs), ConvertDRegToFReg(fd), 0x6);
+void MipsAssembler::MovD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6);
+}
+
+void MipsAssembler::NegS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7);
+}
+
+void MipsAssembler::NegD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7);
+}
+
+void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdw(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
+void MipsAssembler::Cvtsd(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
-  EmitFR(0x11, 0x00, ConvertRegToFReg(rt), fs, static_cast<FRegister>(0), 0x0);
+  EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
-void MipsAssembler::Mtc1(FRegister ft, Register rs) {
-  EmitFR(0x11, 0x04, ft, ConvertRegToFReg(rs), static_cast<FRegister>(0), 0x0);
+void MipsAssembler::Mtc1(Register rt, FRegister fs) {
+  EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+}
+
+void MipsAssembler::Mfhc1(Register rt, FRegister fs) {
+  EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+}
+
+void MipsAssembler::Mthc1(Register rt, FRegister fs) {
+  EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x31, rs, ConvertFRegToReg(ft), imm16);
+  EmitI(0x31, rs, static_cast<Register>(ft), imm16);
 }
 
-void MipsAssembler::Ldc1(DRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x35, rs, ConvertDRegToReg(ft), imm16);
+void MipsAssembler::Ldc1(FRegister ft, Register rs, uint16_t imm16) {
+  EmitI(0x35, rs, static_cast<Register>(ft), imm16);
 }
 
 void MipsAssembler::Swc1(FRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x39, rs, ConvertFRegToReg(ft), imm16);
+  EmitI(0x39, rs, static_cast<Register>(ft), imm16);
 }
 
-void MipsAssembler::Sdc1(DRegister ft, Register rs, uint16_t imm16) {
-  EmitI(0x3d, rs, ConvertDRegToReg(ft), imm16);
+void MipsAssembler::Sdc1(FRegister ft, Register rs, uint16_t imm16) {
+  EmitI(0x3d, rs, static_cast<Register>(ft), imm16);
 }
 
 void MipsAssembler::Break() {
@@ -415,63 +726,881 @@
   EmitR(0x0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), 0, 0x0);
 }
 
-void MipsAssembler::Move(Register rt, Register rs) {
-  EmitI(0x9, rs, rt, 0);    // Addiu
+void MipsAssembler::Move(Register rd, Register rs) {
+  Or(rd, rs, ZERO);
 }
 
-void MipsAssembler::Clear(Register rt) {
-  EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rt, 0, 0x20);
+void MipsAssembler::Clear(Register rd) {
+  Move(rd, ZERO);
 }
 
-void MipsAssembler::Not(Register rt, Register rs) {
-  EmitR(0, static_cast<Register>(0), rs, rt, 0, 0x27);
+void MipsAssembler::Not(Register rd, Register rs) {
+  Nor(rd, rs, ZERO);
 }
 
-void MipsAssembler::Mul(Register rd, Register rs, Register rt) {
-  Mult(rs, rt);
-  Mflo(rd);
+void MipsAssembler::Push(Register rs) {
+  IncreaseFrameSize(kMipsWordSize);
+  Sw(rs, SP, 0);
 }
 
-void MipsAssembler::Div(Register rd, Register rs, Register rt) {
-  Div(rs, rt);
-  Mflo(rd);
+void MipsAssembler::Pop(Register rd) {
+  Lw(rd, SP, 0);
+  DecreaseFrameSize(kMipsWordSize);
 }
 
-void MipsAssembler::Rem(Register rd, Register rs, Register rt) {
-  Div(rs, rt);
-  Mfhi(rd);
+void MipsAssembler::PopAndReturn(Register rd, Register rt) {
+  Lw(rd, SP, 0);
+  Jr(rt);
+  DecreaseFrameSize(kMipsWordSize);
 }
 
-void MipsAssembler::AddConstant(Register rt, Register rs, int32_t value) {
-  Addiu(rt, rs, value);
-}
-
-void MipsAssembler::LoadImmediate(Register rt, int32_t value) {
-  Addiu(rt, ZERO, value);
-}
-
-void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset,
-                             size_t size) {
-  MipsManagedRegister dst = m_dst.AsMips();
-  if (dst.IsNoRegister()) {
-    CHECK_EQ(0u, size) << dst;
-  } else if (dst.IsCoreRegister()) {
-    CHECK_EQ(4u, size) << dst;
-    LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
-  } else if (dst.IsRegisterPair()) {
-    CHECK_EQ(8u, size) << dst;
-    LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset);
-    LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4);
-  } else if (dst.IsFRegister()) {
-    LoadSFromOffset(dst.AsFRegister(), src_register, src_offset);
+void MipsAssembler::LoadConst32(Register rd, int32_t value) {
+  if (IsUint<16>(value)) {
+    // Use OR with (unsigned) immediate to encode 16b unsigned int.
+    Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    // Use ADD with (signed) immediate to encode 16b signed int.
+    Addiu(rd, ZERO, value);
   } else {
-    CHECK(dst.IsDRegister()) << dst;
-    LoadDFromOffset(dst.AsDRegister(), src_register, src_offset);
+    Lui(rd, High16Bits(value));
+    if (value & 0xFFFF)
+      Ori(rd, rd, Low16Bits(value));
+  }
+}
+
+void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) {
+  LoadConst32(reg_lo, Low32Bits(value));
+  LoadConst32(reg_hi, High32Bits(value));
+}
+
+void MipsAssembler::StoreConst32ToOffset(int32_t value,
+                                         Register base,
+                                         int32_t offset,
+                                         Register temp) {
+  if (!IsInt<16>(offset)) {
+    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+  LoadConst32(temp, value);
+  Sw(temp, base, offset);
+}
+
+void MipsAssembler::StoreConst64ToOffset(int64_t value,
+                                         Register base,
+                                         int32_t offset,
+                                         Register temp) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) || !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize))) {
+    CHECK_NE(temp, AT);  //  Must not use AT as temp, as not to overwrite the loaded value.
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+  LoadConst32(temp, Low32Bits(value));
+  Sw(temp, base, offset);
+  LoadConst32(temp, High32Bits(value));
+  Sw(temp, base, offset + kMipsWordSize);
+}
+
+void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) {
+  LoadConst32(temp, value);
+  Mtc1(temp, r);
+}
+
+void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
+  LoadConst32(temp, Low32Bits(value));
+  Mtc1(temp, rd);
+  LoadConst32(temp, High32Bits(value));
+  Mthc1(temp, rd);
+}
+
+void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) {
+  if (IsInt<16>(value)) {
+    Addiu(rt, rs, value);
+  } else {
+    LoadConst32(temp, value);
+    Addu(rt, rs, temp);
+  }
+}
+
+void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits offset_size,
+                                            MipsAssembler::Branch::Type short_type,
+                                            MipsAssembler::Branch::Type long_type) {
+  type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
+}
+
+void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) {
+  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+  if (is_r6) {
+    // R6
+    if (is_call) {
+      InitShortOrLong(offset_size, kR6Call, kR6LongCall);
+    } else if (condition_ == kUncond) {
+      InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+    } else {
+      if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
+        // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+        type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+      } else {
+        InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+      }
+    }
+  } else {
+    // R2
+    if (is_call) {
+      InitShortOrLong(offset_size, kCall, kLongCall);
+    } else if (condition_ == kUncond) {
+      InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+    } else {
+      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+    }
+  }
+  old_type_ = type_;
+}
+
+bool MipsAssembler::Branch::IsNop(BranchCondition condition, Register lhs, Register rhs) {
+  switch (condition) {
+    case kCondLT:
+    case kCondGT:
+    case kCondNE:
+    case kCondLTU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Register rhs) {
+  switch (condition) {
+    case kUncond:
+      return true;
+    case kCondGE:
+    case kCondLE:
+    case kCondEQ:
+    case kCondGEU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(0),
+      rhs_reg_(0),
+      condition_(kUncond) {
+  InitializeType(false, is_r6);
+}
+
+MipsAssembler::Branch::Branch(bool is_r6,
+                              uint32_t location,
+                              uint32_t target,
+                              MipsAssembler::BranchCondition condition,
+                              Register lhs_reg,
+                              Register rhs_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(lhs_reg),
+      rhs_reg_(rhs_reg),
+      condition_(condition) {
+  CHECK_NE(condition, kUncond);
+  switch (condition) {
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+      // We don't support synthetic R2 branches (preceded with slt[u]) at this level
+      // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >).
+      // We leave this up to the caller.
+      CHECK(is_r6);
+      FALLTHROUGH_INTENDED;
+    case kCondEQ:
+    case kCondNE:
+      // Require registers other than 0 not only for R6, but also for R2 to catch errors.
+      // To compare with 0, use dedicated kCond*Z conditions.
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_NE(rhs_reg, ZERO);
+      break;
+    case kCondLTZ:
+    case kCondGEZ:
+    case kCondLEZ:
+    case kCondGTZ:
+    case kCondEQZ:
+    case kCondNEZ:
+      // Require registers other than 0 not only for R6, but also for R2 to catch errors.
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kUncond:
+      UNREACHABLE();
+  }
+  CHECK(!IsNop(condition, lhs_reg, rhs_reg));
+  if (IsUncond(condition, lhs_reg, rhs_reg)) {
+    // Branch condition is always true, make the branch unconditional.
+    condition_ = kUncond;
+  }
+  InitializeType(false, is_r6);
+}
+
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(indirect_reg),
+      rhs_reg_(0),
+      condition_(kUncond) {
+  CHECK_NE(indirect_reg, ZERO);
+  CHECK_NE(indirect_reg, AT);
+  InitializeType(true, is_r6);
+}
+
+MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition(
+    MipsAssembler::BranchCondition cond) {
+  switch (cond) {
+    case kCondLT:
+      return kCondGE;
+    case kCondGE:
+      return kCondLT;
+    case kCondLE:
+      return kCondGT;
+    case kCondGT:
+      return kCondLE;
+    case kCondLTZ:
+      return kCondGEZ;
+    case kCondGEZ:
+      return kCondLTZ;
+    case kCondLEZ:
+      return kCondGTZ;
+    case kCondGTZ:
+      return kCondLEZ;
+    case kCondEQ:
+      return kCondNE;
+    case kCondNE:
+      return kCondEQ;
+    case kCondEQZ:
+      return kCondNEZ;
+    case kCondNEZ:
+      return kCondEQZ;
+    case kCondLTU:
+      return kCondGEU;
+    case kCondGEU:
+      return kCondLTU;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+  }
+  UNREACHABLE();
+}
+
+MipsAssembler::Branch::Type MipsAssembler::Branch::GetType() const {
+  return type_;
+}
+
+MipsAssembler::BranchCondition MipsAssembler::Branch::GetCondition() const {
+  return condition_;
+}
+
+Register MipsAssembler::Branch::GetLeftRegister() const {
+  return static_cast<Register>(lhs_reg_);
+}
+
+Register MipsAssembler::Branch::GetRightRegister() const {
+  return static_cast<Register>(rhs_reg_);
+}
+
+uint32_t MipsAssembler::Branch::GetTarget() const {
+  return target_;
+}
+
+uint32_t MipsAssembler::Branch::GetLocation() const {
+  return location_;
+}
+
+uint32_t MipsAssembler::Branch::GetOldLocation() const {
+  return old_location_;
+}
+
+uint32_t MipsAssembler::Branch::GetLength() const {
+  return branch_info_[type_].length;
+}
+
+uint32_t MipsAssembler::Branch::GetOldLength() const {
+  return branch_info_[old_type_].length;
+}
+
+uint32_t MipsAssembler::Branch::GetSize() const {
+  return GetLength() * sizeof(uint32_t);
+}
+
+uint32_t MipsAssembler::Branch::GetOldSize() const {
+  return GetOldLength() * sizeof(uint32_t);
+}
+
+uint32_t MipsAssembler::Branch::GetEndLocation() const {
+  return GetLocation() + GetSize();
+}
+
+uint32_t MipsAssembler::Branch::GetOldEndLocation() const {
+  return GetOldLocation() + GetOldSize();
+}
+
+bool MipsAssembler::Branch::IsLong() const {
+  switch (type_) {
+    // R2 short branches.
+    case kUncondBranch:
+    case kCondBranch:
+    case kCall:
+    // R6 short branches.
+    case kR6UncondBranch:
+    case kR6CondBranch:
+    case kR6Call:
+      return false;
+    // R2 long branches.
+    case kLongUncondBranch:
+    case kLongCondBranch:
+    case kLongCall:
+    // R6 long branches.
+    case kR6LongUncondBranch:
+    case kR6LongCondBranch:
+    case kR6LongCall:
+      return true;
+  }
+  UNREACHABLE();
+}
+
+bool MipsAssembler::Branch::IsResolved() const {
+  return target_ != kUnresolved;
+}
+
+MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSize() const {
+  OffsetBits offset_size =
+      (type_ == kR6CondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+          ? kOffset23
+          : branch_info_[type_].offset_size;
+  return offset_size;
+}
+
+MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSizeNeeded(uint32_t location,
+                                                                             uint32_t target) {
+  // For unresolved targets assume the shortest encoding
+  // (later it will be made longer if needed).
+  if (target == kUnresolved)
+    return kOffset16;
+  int64_t distance = static_cast<int64_t>(target) - location;
+  // To simplify calculations in composite branches consisting of multiple instructions
+  // bump up the distance by a value larger than the max byte size of a composite branch.
+  distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize;
+  if (IsInt<kOffset16>(distance))
+    return kOffset16;
+  else if (IsInt<kOffset18>(distance))
+    return kOffset18;
+  else if (IsInt<kOffset21>(distance))
+    return kOffset21;
+  else if (IsInt<kOffset23>(distance))
+    return kOffset23;
+  else if (IsInt<kOffset28>(distance))
+    return kOffset28;
+  return kOffset32;
+}
+
+void MipsAssembler::Branch::Resolve(uint32_t target) {
+  target_ = target;
+}
+
+void MipsAssembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+  if (location_ > expand_location) {
+    location_ += delta;
+  }
+  if (!IsResolved()) {
+    return;  // Don't know the target yet.
+  }
+  if (target_ > expand_location) {
+    target_ += delta;
+  }
+}
+
+void MipsAssembler::Branch::PromoteToLong() {
+  switch (type_) {
+    // R2 short branches.
+    case kUncondBranch:
+      type_ = kLongUncondBranch;
+      break;
+    case kCondBranch:
+      type_ = kLongCondBranch;
+      break;
+    case kCall:
+      type_ = kLongCall;
+      break;
+    // R6 short branches.
+    case kR6UncondBranch:
+      type_ = kR6LongUncondBranch;
+      break;
+    case kR6CondBranch:
+      type_ = kR6LongCondBranch;
+      break;
+    case kR6Call:
+      type_ = kR6LongCall;
+      break;
+    default:
+      // Note: 'type_' is already long.
+      break;
+  }
+  CHECK(IsLong());
+}
+
+uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+  // If the branch is still unresolved or already long, nothing to do.
+  if (IsLong() || !IsResolved()) {
+    return 0;
+  }
+  // Promote the short branch to long if the offset size is too small
+  // to hold the distance between location_ and target_.
+  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+    PromoteToLong();
+    uint32_t old_size = GetOldSize();
+    uint32_t new_size = GetSize();
+    CHECK_GT(new_size, old_size);
+    return new_size - old_size;
+  }
+  // The following logic is for debugging/testing purposes.
+  // Promote some short branches to long when it's not really required.
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+    int64_t distance = static_cast<int64_t>(target_) - location_;
+    distance = (distance >= 0) ? distance : -distance;
+    if (distance >= max_short_distance) {
+      PromoteToLong();
+      uint32_t old_size = GetOldSize();
+      uint32_t new_size = GetSize();
+      CHECK_GT(new_size, old_size);
+      return new_size - old_size;
+    }
+  }
+  return 0;
+}
+
+uint32_t MipsAssembler::Branch::GetOffsetLocation() const {
+  return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+}
+
+uint32_t MipsAssembler::Branch::GetOffset() const {
+  CHECK(IsResolved());
+  uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
+  // Calculate the byte distance between instructions and also account for
+  // different PC-relative origins.
+  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  // Prepare the offset for encoding into the instruction(s).
+  offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
+  return offset;
+}
+
+MipsAssembler::Branch* MipsAssembler::GetBranch(uint32_t branch_id) {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+const MipsAssembler::Branch* MipsAssembler::GetBranch(uint32_t branch_id) const {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+void MipsAssembler::Bind(MipsLabel* label) {
+  CHECK(!label->IsBound());
+  uint32_t bound_pc = buffer_.Size();
+
+  // Walk the list of branches referring to and preceding this label.
+  // Store the previously unknown target addresses in them.
+  while (label->IsLinked()) {
+    uint32_t branch_id = label->Position();
+    Branch* branch = GetBranch(branch_id);
+    branch->Resolve(bound_pc);
+
+    uint32_t branch_location = branch->GetLocation();
+    // Extract the location of the previous branch in the list (walking the list backwards;
+    // the previous branch ID was stored in the space reserved for this branch).
+    uint32_t prev = buffer_.Load<uint32_t>(branch_location);
+
+    // On to the previous branch in the list...
+    label->position_ = prev;
+  }
+
+  // Now make the label object contain its own location (relative to the end of the preceding
+  // branch, if any; it will be used by the branches referring to and following this label).
+  label->prev_branch_id_plus_one_ = branches_.size();
+  if (label->prev_branch_id_plus_one_) {
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    bound_pc -= branch->GetEndLocation();
+  }
+  label->BindTo(bound_pc);
+}
+
+uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const {
+  CHECK(label->IsBound());
+  uint32_t target = label->Position();
+  if (label->prev_branch_id_plus_one_) {
+    // Get label location based on the branch preceding it.
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    target += branch->GetEndLocation();
+  }
+  return target;
+}
+
+uint32_t MipsAssembler::GetAdjustedPosition(uint32_t old_position) {
+  // We can reconstruct the adjustment by going through all the branches from the beginning
+  // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
+  // with increasing old_position, we can use the data from last AdjustedPosition() to
+  // continue where we left off and the whole loop should be O(m+n) where m is the number
+  // of positions to adjust and n is the number of branches.
+  if (old_position < last_old_position_) {
+    last_position_adjustment_ = 0;
+    last_old_position_ = 0;
+    last_branch_id_ = 0;
+  }
+  while (last_branch_id_ != branches_.size()) {
+    const Branch* branch = GetBranch(last_branch_id_);
+    if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+      break;
+    }
+    last_position_adjustment_ += branch->GetSize() - branch->GetOldSize();
+    ++last_branch_id_;
+  }
+  last_old_position_ = old_position;
+  return old_position + last_position_adjustment_;
+}
+
+void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
+  uint32_t length = branches_.back().GetLength();
+  if (!label->IsBound()) {
+    // Branch forward (to a following label), distance is unknown.
+    // The first branch forward will contain 0, serving as the terminator of
+    // the list of forward-reaching branches.
+    Emit(label->position_);
+    length--;
+    // Now make the label object point to this branch
+    // (this forms a linked list of branches preceding this label).
+    uint32_t branch_id = branches_.size() - 1;
+    label->LinkTo(branch_id);
+  }
+  // Reserve space for the branch.
+  while (length--) {
+    Nop();
+  }
+}
+
+void MipsAssembler::Buncond(MipsLabel* label) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(IsR6(), buffer_.Size(), target);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs) {
+  // If lhs = rhs, this can be a NOP.
+  if (Branch::IsNop(condition, lhs, rhs)) {
+    return;
+  }
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::PromoteBranches() {
+  // Promote short branches to long as necessary.
+  bool changed;
+  do {
+    changed = false;
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+      uint32_t delta = branch.PromoteIfNeeded();
+      // If this branch has been promoted and needs to expand in size,
+      // relocate all branches by the expansion size.
+      if (delta) {
+        changed = true;
+        uint32_t expand_location = branch.GetLocation();
+        for (auto& branch2 : branches_) {
+          branch2.Relocate(expand_location, delta);
+        }
+      }
+    }
+  } while (changed);
+
+  // Account for branch expansion by resizing the code buffer
+  // and moving the code in it to its final location.
+  size_t branch_count = branches_.size();
+  if (branch_count > 0) {
+    // Resize.
+    Branch& last_branch = branches_[branch_count - 1];
+    uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+    uint32_t old_size = buffer_.Size();
+    buffer_.Resize(old_size + size_delta);
+    // Move the code residing between branch placeholders.
+    uint32_t end = old_size;
+    for (size_t i = branch_count; i > 0; ) {
+      Branch& branch = branches_[--i];
+      uint32_t size = end - branch.GetOldEndLocation();
+      buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+      end = branch.GetOldLocation();
+    }
+  }
+}
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = {
+  // R2 short branches.
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kUncondBranch
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCondBranch
+  {  5, 2, 0, MipsAssembler::Branch::kOffset16, 0 },  // kCall
+  // R2 long branches.
+  {  9, 3, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongUncondBranch
+  { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCondBranch
+  {  6, 1, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCall
+  // R6 short branches.
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6UncondBranch
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6CondBranch
+                                                      // Exception: kOffset23 for beqzc/bnezc.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Call
+  // R6 long branches.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongUncondBranch
+  {  3, 1, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCondBranch
+  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+};
+
+// Note: make sure branch_info_[] and mitBranch() are kept synchronized.
+void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
+  CHECK_EQ(overwriting_, true);
+  overwrite_location_ = branch->GetLocation();
+  uint32_t offset = branch->GetOffset();
+  BranchCondition condition = branch->GetCondition();
+  Register lhs = branch->GetLeftRegister();
+  Register rhs = branch->GetRightRegister();
+  switch (branch->GetType()) {
+    // R2 short branches.
+    case Branch::kUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      B(offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+    case Branch::kCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcond(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+    case Branch::kCall:
+      Nal();
+      Nop();  // TODO: is this NOP really needed here?
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiu(lhs, RA, offset);
+      Jalr(lhs);
+      Nop();
+      break;
+
+    // R2 long branches.
+    case Branch::kLongUncondBranch:
+      // To get the value of the PC register we need to use the NAL instruction.
+      // NAL clobbers the RA register. However, RA must be preserved if the
+      // method is compiled without the entry/exit sequences that would take care
+      // of preserving RA (typically, leaf methods don't preserve RA explicitly).
+      // So, we need to preserve RA in some temporary storage ourselves. The AT
+      // register can't be used for this because we need it to load a constant
+      // which will be added to the value that NAL stores in RA. And we can't
+      // use T9 for this in the context of the JNI compiler, which uses it
+      // as a scratch register (see InterproceduralScratchRegister()).
+      // If we were to add a 32-bit constant to RA using two ADDIU instructions,
+      // we'd also need to use the ROTR instruction, which requires no less than
+      // MIPSR2.
+      // Perhaps, we could use T8 or one of R2's multiplier/divider registers
+      // (LO or HI) or even a floating-point register, but that doesn't seem
+      // like a nice solution. We may want this to work on both R6 and pre-R6.
+      // For now simply use the stack for RA. This should be OK since for the
+      // vast majority of code a short PC-relative branch is sufficient.
+      // TODO: can this be improved?
+      Push(RA);
+      Nal();
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Ori(AT, AT, Low16Bits(offset));
+      Addu(AT, AT, RA);
+      Lw(RA, SP, 0);
+      Jr(AT);
+      DecreaseFrameSize(kMipsWordSize);
+      break;
+    case Branch::kLongCondBranch:
+      // The comment on case 'Branch::kLongUncondBranch' applies here as well.
+      // Note: the opposite condition branch encodes 8 as the distance, which is equal to the
+      // number of instructions skipped:
+      // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR).
+      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, 8);
+      Push(RA);
+      Nal();
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Ori(AT, AT, Low16Bits(offset));
+      Addu(AT, AT, RA);
+      Lw(RA, SP, 0);
+      Jr(AT);
+      DecreaseFrameSize(kMipsWordSize);
+      break;
+    case Branch::kLongCall:
+      Nal();
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Ori(AT, AT, Low16Bits(offset));
+      Addu(lhs, AT, RA);
+      Jalr(lhs);
+      Nop();
+      break;
+
+    // R6 short branches.
+    case Branch::kR6UncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kR6CondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondc(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden slot.
+      break;
+    case Branch::kR6Call:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiupc(lhs, offset);
+      Jialc(lhs, 0);
+      break;
+
+    // R6 long branches.
+    case Branch::kR6LongUncondBranch:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kR6LongCondBranch:
+      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kR6LongCall:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in addiu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(lhs, High16Bits(offset));
+      Addiu(lhs, lhs, Low16Bits(offset));
+      Jialc(lhs, 0);
+      break;
+  }
+  CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+  CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
+}
+
+void MipsAssembler::B(MipsLabel* label) {
+  Buncond(label);
+}
+
+void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) {
+  Call(label, indirect_reg);
+}
+
+void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
+  Bcond(label, kCondEQ, rs, rt);
+}
+
+void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label) {
+  Bcond(label, kCondNE, rs, rt);
+}
+
+void MipsAssembler::Beqz(Register rt, MipsLabel* label) {
+  Bcond(label, kCondEQZ, rt);
+}
+
+void MipsAssembler::Bnez(Register rt, MipsLabel* label) {
+  Bcond(label, kCondNEZ, rt);
+}
+
+void MipsAssembler::Bltz(Register rt, MipsLabel* label) {
+  Bcond(label, kCondLTZ, rt);
+}
+
+void MipsAssembler::Bgez(Register rt, MipsLabel* label) {
+  Bcond(label, kCondGEZ, rt);
+}
+
+void MipsAssembler::Blez(Register rt, MipsLabel* label) {
+  Bcond(label, kCondLEZ, rt);
+}
+
+void MipsAssembler::Bgtz(Register rt, MipsLabel* label) {
+  Bcond(label, kCondGTZ, rt);
+}
+
+void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondLT, rs, rt);
+  } else if (!Branch::IsNop(kCondLT, rs, rt)) {
+    // Synthesize the instruction (not available on R2).
+    Slt(AT, rs, rt);
+    Bnez(AT, label);
+  }
+}
+
+void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondGE, rs, rt);
+  } else if (Branch::IsUncond(kCondGE, rs, rt)) {
+    B(label);
+  } else {
+    // Synthesize the instruction (not available on R2).
+    Slt(AT, rs, rt);
+    Beqz(AT, label);
+  }
+}
+
+void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondLTU, rs, rt);
+  } else if (!Branch::IsNop(kCondLTU, rs, rt)) {
+    // Synthesize the instruction (not available on R2).
+    Sltu(AT, rs, rt);
+    Bnez(AT, label);
+  }
+}
+
+void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) {
+  if (IsR6()) {
+    Bcond(label, kCondGEU, rs, rt);
+  } else if (Branch::IsUncond(kCondGEU, rs, rt)) {
+    B(label);
+  } else {
+    // Synthesize the instruction (not available on R2).
+    Sltu(AT, rs, rt);
+    Beqz(AT, label);
   }
 }
 
 void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
                                    int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (type == kLoadDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kLoadSignedByte:
       Lb(reg, base, offset);
@@ -488,8 +1617,16 @@
     case kLoadWord:
       Lw(reg, base, offset);
       break;
-    case kLoadWordPair:
-      LOG(FATAL) << "UNREACHABLE";
+    case kLoadDoubleword:
+      if (reg == base) {
+        // This will clobber the base when loading the lower register. Since we have to load the
+        // higher register as well, this will fail. Solution: reverse the order.
+        Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize);
+        Lw(reg, base, offset);
+      } else {
+        Lw(reg, base, offset);
+        Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize);
+      }
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -497,15 +1634,74 @@
 }
 
 void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   Lwc1(reg, base, offset);
 }
 
-void MipsAssembler::LoadDFromOffset(DRegister reg, Register base, int32_t offset) {
-  Ldc1(reg, base, offset);
+void MipsAssembler::LoadDFromOffset(FRegister reg, Register base, int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (!IsAligned<kMipsDoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
+  if (offset & 0x7) {
+    if (Is32BitFPU()) {
+      Lwc1(reg, base, offset);
+      Lwc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize);
+    } else {
+      // 64-bit FPU.
+      Lwc1(reg, base, offset);
+      Lw(T8, base, offset + kMipsWordSize);
+      Mthc1(T8, reg);
+    }
+  } else {
+    Ldc1(reg, base, offset);
+  }
+}
+
+void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset,
+                             size_t size) {
+  MipsManagedRegister dst = m_dst.AsMips();
+  if (dst.IsNoRegister()) {
+    CHECK_EQ(0u, size) << dst;
+  } else if (dst.IsCoreRegister()) {
+    CHECK_EQ(kMipsWordSize, size) << dst;
+    LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset);
+  } else if (dst.IsRegisterPair()) {
+    CHECK_EQ(kMipsDoublewordSize, size) << dst;
+    LoadFromOffset(kLoadDoubleword, dst.AsRegisterPairLow(), src_register, src_offset);
+  } else if (dst.IsFRegister()) {
+    if (size == kMipsWordSize) {
+      LoadSFromOffset(dst.AsFRegister(), src_register, src_offset);
+    } else {
+      CHECK_EQ(kMipsDoublewordSize, size) << dst;
+      LoadDFromOffset(dst.AsFRegister(), src_register, src_offset);
+    }
+  }
 }
 
 void MipsAssembler::StoreToOffset(StoreOperandType type, Register reg, Register base,
                                   int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (type == kStoreDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kStoreByte:
       Sb(reg, base, offset);
@@ -516,8 +1712,11 @@
     case kStoreWord:
       Sw(reg, base, offset);
       break;
-    case kStoreWordPair:
-      LOG(FATAL) << "UNREACHABLE";
+    case kStoreDoubleword:
+      CHECK_NE(reg, base);
+      CHECK_NE(static_cast<Register>(reg + 1), base);
+      Sw(reg, base, offset);
+      Sw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize);
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
@@ -525,11 +1724,40 @@
 }
 
 void MipsAssembler::StoreSToOffset(FRegister reg, Register base, int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   Swc1(reg, base, offset);
 }
 
-void MipsAssembler::StoreDToOffset(DRegister reg, Register base, int32_t offset) {
-  Sdc1(reg, base, offset);
+void MipsAssembler::StoreDToOffset(FRegister reg, Register base, int32_t offset) {
+  // IsInt<16> must be passed a signed value.
+  if (!IsInt<16>(offset) ||
+      (!IsAligned<kMipsDoublewordSize>(offset) &&
+       !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) {
+    LoadConst32(AT, offset);
+    Addu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
+  if (offset & 0x7) {
+    if (Is32BitFPU()) {
+      Swc1(reg, base, offset);
+      Swc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize);
+    } else {
+      // 64-bit FPU.
+      Mfhc1(T8, reg);
+      Swc1(reg, base, offset);
+      Sw(T8, base, offset + kMipsWordSize);
+    }
+  } else {
+    Sdc1(reg, base, offset);
+  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -546,7 +1774,7 @@
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
 
-  // Push callee saves and return address
+  // Push callee saves and return address.
   int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreWord, RA, SP, stack_offset);
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
@@ -569,13 +1797,13 @@
       offset += spill.getSize();
     } else if (reg.IsCoreRegister()) {
       StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
-      offset += 4;
+      offset += kMipsWordSize;
     } else if (reg.IsFRegister()) {
       StoreSToOffset(reg.AsFRegister(), SP, offset);
-      offset += 4;
+      offset += kMipsWordSize;
     } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
+      StoreDToOffset(reg.AsOverlappingDRegisterLow(), SP, offset);
+      offset += kMipsDoublewordSize;
     }
   }
 }
@@ -585,7 +1813,7 @@
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
 
-  // Pop callee saves and return address
+  // Pop callee saves and return address.
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
@@ -601,6 +1829,7 @@
 
   // Then jump to the return address.
   Jr(RA);
+  Nop();
 
   // The CFI should be restored for any code that follows the exit block.
   cfi_.RestoreState();
@@ -608,14 +1837,14 @@
 }
 
 void MipsAssembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, SP, -adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Addiu32(SP, SP, -adjust);
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, SP, adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Addiu32(SP, SP, adjust);
   cfi_.AdjustCFAOffset(-adjust);
 }
 
@@ -624,18 +1853,20 @@
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (src.IsCoreRegister()) {
-    CHECK_EQ(4u, size);
+    CHECK_EQ(kMipsWordSize, size);
     StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
   } else if (src.IsRegisterPair()) {
-    CHECK_EQ(8u, size);
+    CHECK_EQ(kMipsDoublewordSize, size);
     StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
     StoreToOffset(kStoreWord, src.AsRegisterPairHigh(),
-                  SP, dest.Int32Value() + 4);
+                  SP, dest.Int32Value() + kMipsWordSize);
   } else if (src.IsFRegister()) {
-    StoreSToOffset(src.AsFRegister(), SP, dest.Int32Value());
-  } else {
-    CHECK(src.IsDRegister());
-    StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
+    if (size == kMipsWordSize) {
+      StoreSToOffset(src.AsFRegister(), SP, dest.Int32Value());
+    } else {
+      CHECK_EQ(kMipsDoublewordSize, size);
+      StoreDToOffset(src.AsFRegister(), SP, dest.Int32Value());
+    }
   }
 }
 
@@ -655,29 +1886,30 @@
                                           ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
+  LoadConst32(scratch.AsCoreRegister(), imm);
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm,
-                                           ManagedRegister mscratch) {
-  MipsManagedRegister scratch = mscratch.AsMips();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
-}
-
-void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs,
-                                             FrameOffset fr_offs,
+void MipsAssembler::StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, uint32_t imm,
                                              ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
+  // Is this function even referenced anywhere else in the code?
+  LoadConst32(scratch.AsCoreRegister(), imm);
+  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
+}
+
+void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                                               FrameOffset fr_offs,
+                                               ManagedRegister mscratch) {
+  MipsManagedRegister scratch = mscratch.AsMips();
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) {
+void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) {
   StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value());
 }
 
@@ -687,14 +1919,15 @@
   MipsManagedRegister scratch = mscratch.AsMips();
   StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value());
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + kMipsWordSize);
 }
 
 void MipsAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void MipsAssembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) {
+void MipsAssembler::LoadFromThread32(ManagedRegister mdest,
+                                     ThreadOffset<kMipsWordSize> src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -707,7 +1940,7 @@
 void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
                             bool unpoison_reference) {
   MipsManagedRegister dest = mdest.AsMips();
-  CHECK(dest.IsCoreRegister() && dest.IsCoreRegister());
+  CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
   if (kPoisonHeapReferences && unpoison_reference) {
@@ -715,16 +1948,15 @@
   }
 }
 
-void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                               Offset offs) {
+void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) {
   MipsManagedRegister dest = mdest.AsMips();
-  CHECK(dest.IsCoreRegister() && dest.IsCoreRegister()) << dest;
+  CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
 }
 
 void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                         ThreadOffset<4> offs) {
+                                           ThreadOffset<kMipsWordSize> offs) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value());
@@ -738,7 +1970,7 @@
   UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
 }
 
-void MipsAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t /*size*/) {
+void MipsAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
   MipsManagedRegister dest = mdest.AsMips();
   MipsManagedRegister src = msrc.AsMips();
   if (!dest.Equals(src)) {
@@ -747,14 +1979,19 @@
       Move(dest.AsCoreRegister(), src.AsCoreRegister());
     } else if (dest.IsFRegister()) {
       CHECK(src.IsFRegister()) << src;
-      MovS(dest.AsFRegister(), src.AsFRegister());
+      if (size == kMipsWordSize) {
+        MovS(dest.AsFRegister(), src.AsFRegister());
+      } else {
+        CHECK_EQ(kMipsDoublewordSize, size);
+        MovD(dest.AsFRegister(), src.AsFRegister());
+      }
     } else if (dest.IsDRegister()) {
       CHECK(src.IsDRegister()) << src;
-      MovD(dest.AsDRegister(), src.AsDRegister());
+      MovD(dest.AsOverlappingDRegisterLow(), src.AsOverlappingDRegisterLow());
     } else {
       CHECK(dest.IsRegisterPair()) << dest;
       CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second
+      // Ensure that the first move doesn't clobber the input of the second.
       if (src.AsRegisterPairHigh() != dest.AsRegisterPairLow()) {
         Move(dest.AsRegisterPairLow(), src.AsRegisterPairLow());
         Move(dest.AsRegisterPairHigh(), src.AsRegisterPairHigh());
@@ -766,8 +2003,7 @@
   }
 }
 
-void MipsAssembler::CopyRef(FrameOffset dest, FrameOffset src,
-                            ManagedRegister mscratch) {
+void MipsAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
@@ -775,8 +2011,8 @@
 }
 
 void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                         ThreadOffset<4> thr_offs,
-                                         ManagedRegister mscratch) {
+                                           ThreadOffset<kMipsWordSize> thr_offs,
+                                           ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -785,9 +2021,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs,
-                                       FrameOffset fr_offs,
-                                       ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                                         FrameOffset fr_offs,
+                                         ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -796,26 +2032,25 @@
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::Copy(FrameOffset dest, FrameOffset src,
-                         ManagedRegister mscratch, size_t size) {
+void MipsAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
+  CHECK(size == kMipsWordSize || size == kMipsDoublewordSize) << size;
+  if (size == kMipsWordSize) {
     LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
     StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
+  } else if (size == kMipsDoublewordSize) {
     LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
     StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + kMipsWordSize);
+    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + kMipsWordSize);
   }
 }
 
 void MipsAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
                          ManagedRegister mscratch, size_t size) {
   Register scratch = mscratch.AsMips().AsCoreRegister();
-  CHECK_EQ(size, 4u);
+  CHECK_EQ(size, kMipsWordSize);
   LoadFromOffset(kLoadWord, scratch, src_base.AsMips().AsCoreRegister(), src_offset.Int32Value());
   StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
 }
@@ -823,107 +2058,117 @@
 void MipsAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
                          ManagedRegister mscratch, size_t size) {
   Register scratch = mscratch.AsMips().AsCoreRegister();
-  CHECK_EQ(size, 4u);
+  CHECK_EQ(size, kMipsWordSize);
   LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value());
   StoreToOffset(kStoreWord, scratch, dest_base.AsMips().AsCoreRegister(), dest_offset.Int32Value());
 }
 
-void MipsAssembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips implementation";
+void MipsAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                         FrameOffset src_base ATTRIBUTE_UNUSED,
+                         Offset src_offset ATTRIBUTE_UNUSED,
+                         ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                         size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "no MIPS implementation";
 }
 
 void MipsAssembler::Copy(ManagedRegister dest, Offset dest_offset,
                          ManagedRegister src, Offset src_offset,
                          ManagedRegister mscratch, size_t size) {
-  CHECK_EQ(size, 4u);
+  CHECK_EQ(size, kMipsWordSize);
   Register scratch = mscratch.AsMips().AsCoreRegister();
   LoadFromOffset(kLoadWord, scratch, src.AsMips().AsCoreRegister(), src_offset.Int32Value());
   StoreToOffset(kStoreWord, scratch, dest.AsMips().AsCoreRegister(), dest_offset.Int32Value());
 }
 
-void MipsAssembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips implementation";
+void MipsAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                         Offset dest_offset ATTRIBUTE_UNUSED,
+                         FrameOffset src ATTRIBUTE_UNUSED,
+                         Offset src_offset ATTRIBUTE_UNUSED,
+                         ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                         size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "no MIPS implementation";
 }
 
 void MipsAssembler::MemoryBarrier(ManagedRegister) {
-  UNIMPLEMENTED(FATAL) << "no mips implementation";
+  // TODO: sync?
+  UNIMPLEMENTED(FATAL) << "no MIPS implementation";
 }
 
 void MipsAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister min_reg, bool null_allowed) {
+                                           FrameOffset handle_scope_offset,
+                                           ManagedRegister min_reg,
+                                           bool null_allowed) {
   MipsManagedRegister out_reg = mout_reg.AsMips();
   MipsManagedRegister in_reg = min_reg.AsMips();
   CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg;
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   if (null_allowed) {
-    Label null_arg;
+    MipsLabel null_arg;
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
-    // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    // E.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset).
     if (in_reg.IsNoRegister()) {
       LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
                      SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
     if (!out_reg.Equals(in_reg)) {
-      LoadImmediate(out_reg.AsCoreRegister(), 0);
+      LoadConst32(out_reg.AsCoreRegister(), 0);
     }
-    EmitBranch(in_reg.AsCoreRegister(), ZERO, &null_arg, true);
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    Beqz(in_reg.AsCoreRegister(), &null_arg);
+    Addiu32(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Addiu32(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
   }
 }
 
 void MipsAssembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister mscratch,
-                                    bool null_allowed) {
+                                           FrameOffset handle_scope_offset,
+                                           ManagedRegister mscratch,
+                                           bool null_allowed) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   if (null_allowed) {
-    Label null_arg;
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP,
-                   handle_scope_offset.Int32Value());
+    MipsLabel null_arg;
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
-    // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    EmitBranch(scratch.AsCoreRegister(), ZERO, &null_arg, true);
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    // E.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset).
+    Beqz(scratch.AsCoreRegister(), &null_arg);
+    Addiu32(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
+    Addiu32(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value());
   }
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value());
 }
 
 // Given a handle scope entry, load the associated reference.
 void MipsAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                          ManagedRegister min_reg) {
+                                                 ManagedRegister min_reg) {
   MipsManagedRegister out_reg = mout_reg.AsMips();
   MipsManagedRegister in_reg = min_reg.AsMips();
   CHECK(out_reg.IsCoreRegister()) << out_reg;
   CHECK(in_reg.IsCoreRegister()) << in_reg;
-  Label null_arg;
+  MipsLabel null_arg;
   if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0);
+    LoadConst32(out_reg.AsCoreRegister(), 0);
   }
-  EmitBranch(in_reg.AsCoreRegister(), ZERO, &null_arg, true);
+  Beqz(in_reg.AsCoreRegister(), &null_arg);
   LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
                  in_reg.AsCoreRegister(), 0);
-  Bind(&null_arg, false);
+  Bind(&null_arg);
 }
 
-void MipsAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+void MipsAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
+                                 bool could_be_null ATTRIBUTE_UNUSED) {
+  // TODO: not validating references.
 }
 
-void MipsAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+void MipsAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
+                                 bool could_be_null ATTRIBUTE_UNUSED) {
+  // TODO: not validating references.
 }
 
 void MipsAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
@@ -934,22 +2179,24 @@
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  base.AsCoreRegister(), offset.Int32Value());
   Jalr(scratch.AsCoreRegister());
-  // TODO: place reference map on call
+  Nop();
+  // TODO: place reference map on call.
 }
 
 void MipsAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 SP, base.Int32Value());
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value());
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  scratch.AsCoreRegister(), offset.Int32Value());
   Jalr(scratch.AsCoreRegister());
-  // TODO: place reference map on call
+  Nop();
+  // TODO: place reference map on call.
 }
 
-void MipsAssembler::CallFromThread32(ThreadOffset<4> /*offset*/, ManagedRegister /*mscratch*/) {
+void MipsAssembler::CallFromThread32(ThreadOffset<kMipsWordSize> offset ATTRIBUTE_UNUSED,
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
@@ -958,35 +2205,38 @@
 }
 
 void MipsAssembler::GetCurrentThread(FrameOffset offset,
-                                     ManagedRegister /*mscratch*/) {
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   StoreToOffset(kStoreWord, S1, SP, offset.Int32Value());
 }
 
 void MipsAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   MipsManagedRegister scratch = mscratch.AsMips();
-  MipsExceptionSlowPath* slow = new MipsExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
+  exception_blocks_.emplace_back(scratch, stack_adjust);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                 S1, Thread::ExceptionOffset<4>().Int32Value());
-  EmitBranch(scratch.AsCoreRegister(), ZERO, slow->Entry(), false);
+                 S1, Thread::ExceptionOffset<kMipsWordSize>().Int32Value());
+  // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry());
+  // as the NAL instruction (occurring in long R2 branches) may become deprecated.
+  // For now use common for R2 and R6 instructions as this code must execute on both.
+  Bnez(scratch.AsCoreRegister(), exception_blocks_.back().Entry());
 }
 
-void MipsExceptionSlowPath::Emit(Assembler* sasm) {
-  MipsAssembler* sp_asm = down_cast<MipsAssembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_, false);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
+void MipsAssembler::EmitExceptionPoll(MipsExceptionSlowPath* exception) {
+  Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving A0 as this call won't return
-  __ Move(A0, scratch_.AsCoreRegister());
-  // Set up call to Thread::Current()->pDeliverException
-  __ LoadFromOffset(kLoadWord, T9, S1, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
-  __ Jr(T9);
-  // Call never returns
-  __ Break();
-#undef __
+  // Pass exception object as argument.
+  // Don't care about preserving A0 as this call won't return.
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+  Move(A0, exception->scratch_.AsCoreRegister());
+  // Set up call to Thread::Current()->pDeliverException.
+  LoadFromOffset(kLoadWord, T9, S1,
+    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pDeliverException).Int32Value());
+  Jr(T9);
+  Nop();
+
+  // Call never returns.
+  Break();
 }
 
 }  // namespace mips
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 378a59c..aa187b8 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -17,54 +17,111 @@
 #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 
+#include <utility>
 #include <vector>
 
+#include "arch/mips/instruction_set_features_mips.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
-#include "utils/assembler.h"
 #include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
 
 namespace art {
 namespace mips {
 
+static constexpr size_t kMipsWordSize = 4;
+static constexpr size_t kMipsDoublewordSize = 8;
+
 enum LoadOperandType {
   kLoadSignedByte,
   kLoadUnsignedByte,
   kLoadSignedHalfword,
   kLoadUnsignedHalfword,
   kLoadWord,
-  kLoadWordPair,
-  kLoadSWord,
-  kLoadDWord
+  kLoadDoubleword
 };
 
 enum StoreOperandType {
   kStoreByte,
   kStoreHalfword,
   kStoreWord,
-  kStoreWordPair,
-  kStoreSWord,
-  kStoreDWord
+  kStoreDoubleword
+};
+
+class MipsLabel : public Label {
+ public:
+  MipsLabel() : prev_branch_id_plus_one_(0) {}
+
+  MipsLabel(MipsLabel&& src)
+      : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+
+ private:
+  uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
+
+  friend class MipsAssembler;
+  DISALLOW_COPY_AND_ASSIGN(MipsLabel);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null.
+class MipsExceptionSlowPath {
+ public:
+  explicit MipsExceptionSlowPath(MipsManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+  MipsExceptionSlowPath(MipsExceptionSlowPath&& src)
+      : scratch_(std::move(src.scratch_)),
+        stack_adjust_(std::move(src.stack_adjust_)),
+        exception_entry_(std::move(src.exception_entry_)) {}
+
+ private:
+  MipsLabel* Entry() { return &exception_entry_; }
+  const MipsManagedRegister scratch_;
+  const size_t stack_adjust_;
+  MipsLabel exception_entry_;
+
+  friend class MipsAssembler;
+  DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath);
 };
 
 class MipsAssembler FINAL : public Assembler {
  public:
-  MipsAssembler() {}
-  virtual ~MipsAssembler() {}
+  explicit MipsAssembler(const MipsInstructionSetFeatures* instruction_set_features = nullptr)
+      : overwriting_(false),
+        overwrite_location_(0),
+        last_position_adjustment_(0),
+        last_old_position_(0),
+        last_branch_id_(0),
+        isa_features_(instruction_set_features) {}
+
+  virtual ~MipsAssembler() {
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+    }
+  }
 
   // Emit Machine Instructions.
-  void Add(Register rd, Register rs, Register rt);
   void Addu(Register rd, Register rs, Register rt);
-  void Addi(Register rt, Register rs, uint16_t imm16);
   void Addiu(Register rt, Register rs, uint16_t imm16);
-  void Sub(Register rd, Register rs, Register rt);
   void Subu(Register rd, Register rs, Register rt);
-  void Mult(Register rs, Register rt);
-  void Multu(Register rs, Register rt);
-  void Div(Register rs, Register rt);
-  void Divu(Register rs, Register rt);
+
+  void MultR2(Register rs, Register rt);  // R2
+  void MultuR2(Register rs, Register rt);  // R2
+  void DivR2(Register rs, Register rt);  // R2
+  void DivuR2(Register rs, Register rt);  // R2
+  void MulR2(Register rd, Register rs, Register rt);  // R2
+  void DivR2(Register rd, Register rs, Register rt);  // R2
+  void ModR2(Register rd, Register rs, Register rt);  // R2
+  void DivuR2(Register rd, Register rs, Register rt);  // R2
+  void ModuR2(Register rd, Register rs, Register rt);  // R2
+  void MulR6(Register rd, Register rs, Register rt);  // R6
+  void MuhuR6(Register rd, Register rs, Register rt);  // R6
+  void DivR6(Register rd, Register rs, Register rt);  // R6
+  void ModR6(Register rd, Register rs, Register rt);  // R6
+  void DivuR6(Register rd, Register rs, Register rt);  // R6
+  void ModuR6(Register rd, Register rs, Register rt);  // R6
 
   void And(Register rd, Register rs, Register rt);
   void Andi(Register rt, Register rs, uint16_t imm16);
@@ -74,12 +131,15 @@
   void Xori(Register rt, Register rs, uint16_t imm16);
   void Nor(Register rd, Register rs, Register rt);
 
-  void Sll(Register rd, Register rs, int shamt);
-  void Srl(Register rd, Register rs, int shamt);
-  void Sra(Register rd, Register rs, int shamt);
-  void Sllv(Register rd, Register rs, Register rt);
-  void Srlv(Register rd, Register rs, Register rt);
-  void Srav(Register rd, Register rs, Register rt);
+  void Seb(Register rd, Register rt);  // R2+
+  void Seh(Register rd, Register rt);  // R2+
+
+  void Sll(Register rd, Register rt, int shamt);
+  void Srl(Register rd, Register rt, int shamt);
+  void Sra(Register rd, Register rt, int shamt);
+  void Sllv(Register rd, Register rt, Register rs);
+  void Srlv(Register rd, Register rt, Register rs);
+  void Srav(Register rd, Register rt, Register rs);
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
@@ -87,8 +147,9 @@
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
   void Lui(Register rt, uint16_t imm16);
-  void Mfhi(Register rd);
-  void Mflo(Register rd);
+  void Sync(uint32_t stype);
+  void Mfhi(Register rd);  // R2
+  void Mflo(Register rd);  // R2
 
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
@@ -99,81 +160,138 @@
   void Slti(Register rt, Register rs, uint16_t imm16);
   void Sltiu(Register rt, Register rs, uint16_t imm16);
 
-  void Beq(Register rt, Register rs, uint16_t imm16);
-  void Bne(Register rt, Register rs, uint16_t imm16);
-  void J(uint32_t address);
-  void Jal(uint32_t address);
-  void Jr(Register rs);
+  void B(uint16_t imm16);
+  void Beq(Register rs, Register rt, uint16_t imm16);
+  void Bne(Register rs, Register rt, uint16_t imm16);
+  void Beqz(Register rt, uint16_t imm16);
+  void Bnez(Register rt, uint16_t imm16);
+  void Bltz(Register rt, uint16_t imm16);
+  void Bgez(Register rt, uint16_t imm16);
+  void Blez(Register rt, uint16_t imm16);
+  void Bgtz(Register rt, uint16_t imm16);
+  void J(uint32_t addr26);
+  void Jal(uint32_t addr26);
+  void Jalr(Register rd, Register rs);
   void Jalr(Register rs);
+  void Jr(Register rs);
+  void Nal();
+  void Auipc(Register rs, uint16_t imm16);  // R6
+  void Addiupc(Register rs, uint32_t imm19);  // R6
+  void Bc(uint32_t imm26);  // R6
+  void Jic(Register rt, uint16_t imm16);  // R6
+  void Jialc(Register rt, uint16_t imm16);  // R6
+  void Bltc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bltzc(Register rt, uint16_t imm16);  // R6
+  void Bgtzc(Register rt, uint16_t imm16);  // R6
+  void Bgec(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bgezc(Register rt, uint16_t imm16);  // R6
+  void Blezc(Register rt, uint16_t imm16);  // R6
+  void Bltuc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bgeuc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Beqc(Register rs, Register rt, uint16_t imm16);  // R6
+  void Bnec(Register rs, Register rt, uint16_t imm16);  // R6
+  void Beqzc(Register rs, uint32_t imm21);  // R6
+  void Bnezc(Register rs, uint32_t imm21);  // R6
 
   void AddS(FRegister fd, FRegister fs, FRegister ft);
   void SubS(FRegister fd, FRegister fs, FRegister ft);
   void MulS(FRegister fd, FRegister fs, FRegister ft);
   void DivS(FRegister fd, FRegister fs, FRegister ft);
-  void AddD(DRegister fd, DRegister fs, DRegister ft);
-  void SubD(DRegister fd, DRegister fs, DRegister ft);
-  void MulD(DRegister fd, DRegister fs, DRegister ft);
-  void DivD(DRegister fd, DRegister fs, DRegister ft);
+  void AddD(FRegister fd, FRegister fs, FRegister ft);
+  void SubD(FRegister fd, FRegister fs, FRegister ft);
+  void MulD(FRegister fd, FRegister fs, FRegister ft);
+  void DivD(FRegister fd, FRegister fs, FRegister ft);
   void MovS(FRegister fd, FRegister fs);
-  void MovD(DRegister fd, DRegister fs);
+  void MovD(FRegister fd, FRegister fs);
+  void NegS(FRegister fd, FRegister fs);
+  void NegD(FRegister fd, FRegister fs);
+
+  void Cvtsw(FRegister fd, FRegister fs);
+  void Cvtdw(FRegister fd, FRegister fs);
+  void Cvtsd(FRegister fd, FRegister fs);
+  void Cvtds(FRegister fd, FRegister fs);
 
   void Mfc1(Register rt, FRegister fs);
-  void Mtc1(FRegister ft, Register rs);
+  void Mtc1(Register rt, FRegister fs);
+  void Mfhc1(Register rt, FRegister fs);
+  void Mthc1(Register rt, FRegister fs);
   void Lwc1(FRegister ft, Register rs, uint16_t imm16);
-  void Ldc1(DRegister ft, Register rs, uint16_t imm16);
+  void Ldc1(FRegister ft, Register rs, uint16_t imm16);
   void Swc1(FRegister ft, Register rs, uint16_t imm16);
-  void Sdc1(DRegister ft, Register rs, uint16_t imm16);
+  void Sdc1(FRegister ft, Register rs, uint16_t imm16);
 
   void Break();
   void Nop();
-  void Move(Register rt, Register rs);
-  void Clear(Register rt);
-  void Not(Register rt, Register rs);
-  void Mul(Register rd, Register rs, Register rt);
-  void Div(Register rd, Register rs, Register rt);
-  void Rem(Register rd, Register rs, Register rt);
+  void Move(Register rd, Register rs);
+  void Clear(Register rd);
+  void Not(Register rd, Register rs);
 
-  void AddConstant(Register rt, Register rs, int32_t value);
-  void LoadImmediate(Register rt, int32_t value);
+  // Higher level composite instructions.
+  void LoadConst32(Register rd, int32_t value);
+  void LoadConst64(Register reg_hi, Register reg_lo, int64_t value);
+  void LoadDConst64(FRegister rd, int64_t value, Register temp);
+  void LoadSConst32(FRegister r, int32_t value, Register temp);
+  void StoreConst32ToOffset(int32_t value, Register base, int32_t offset, Register temp);
+  void StoreConst64ToOffset(int64_t value, Register base, int32_t offset, Register temp);
+  void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT);
+
+  // These will generate R2 branches or R6 branches as appropriate.
+  void Bind(MipsLabel* label);
+  void B(MipsLabel* label);
+  void Jalr(MipsLabel* label, Register indirect_reg);
+  void Beq(Register rs, Register rt, MipsLabel* label);
+  void Bne(Register rs, Register rt, MipsLabel* label);
+  void Beqz(Register rt, MipsLabel* label);
+  void Bnez(Register rt, MipsLabel* label);
+  void Bltz(Register rt, MipsLabel* label);
+  void Bgez(Register rt, MipsLabel* label);
+  void Blez(Register rt, MipsLabel* label);
+  void Bgtz(Register rt, MipsLabel* label);
+  void Blt(Register rs, Register rt, MipsLabel* label);
+  void Bge(Register rs, Register rt, MipsLabel* label);
+  void Bltu(Register rs, Register rt, MipsLabel* label);
+  void Bgeu(Register rs, Register rt, MipsLabel* label);
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset);
   void LoadSFromOffset(FRegister reg, Register base, int32_t offset);
-  void LoadDFromOffset(DRegister reg, Register base, int32_t offset);
+  void LoadDFromOffset(FRegister reg, Register base, int32_t offset);
   void StoreToOffset(StoreOperandType type, Register reg, Register base, int32_t offset);
   void StoreSToOffset(FRegister reg, Register base, int32_t offset);
-  void StoreDToOffset(DRegister reg, Register base, int32_t offset);
+  void StoreDToOffset(FRegister reg, Register base, int32_t offset);
 
   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
-  void Emit(int32_t value);
-  void EmitBranch(Register rt, Register rs, Label* label, bool equal);
-  void EmitJump(Label* label, bool link);
-  void Bind(Label* label, bool is_jump);
+  void Emit(uint32_t value);
+
+  // Push/pop composite routines.
+  void Push(Register rs);
+  void Pop(Register rd);
+  void PopAndReturn(Register rd, Register rt);
 
   void Bind(Label* label) OVERRIDE {
-    Bind(label, false);
+    Bind(down_cast<MipsLabel*>(label));
   }
-  void Jump(Label* label) OVERRIDE {
-    EmitJump(label, false);
+  void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
   }
 
   //
-  // Overridden common assembler high-level functionality
+  // Overridden common assembler high-level functionality.
   //
 
-  // Emit code that will create an activation on the stack
+  // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
-  // Emit code that will remove an activation from the stack
+  // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
 
-  // Store routines
+  // Store routines.
   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
@@ -191,7 +309,7 @@
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
-  // Load routines
+  // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
   void LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) OVERRIDE;
@@ -205,7 +323,7 @@
 
   void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<4> offs) OVERRIDE;
 
-  // Copying routines
+  // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
   void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
@@ -235,13 +353,13 @@
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
-  // Sign extension
+  // Sign extension.
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Zero extension
+  // Zero extension.
   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Exploit fast access in managed code to Thread::Current()
+  // Exploit fast access in managed code to Thread::Current().
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
 
@@ -257,7 +375,7 @@
   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
                               ManagedRegister mscratch, bool null_allowed) OVERRIDE;
 
-  // src holds a handle scope entry (Object**) load this into dst
+  // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
@@ -265,7 +383,7 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
 
-  // Call to address held at [base+offset]
+  // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void CallFromThread32(ThreadOffset<4> offset, ManagedRegister mscratch) OVERRIDE;
@@ -274,43 +392,253 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
 
+  // Emit slow paths queued during assembly and promote short branches to long if needed.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit branches and finalize all instructions.
+  void FinalizeInstructions(const MemoryRegion& region);
+
+  // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS,
+  // must be used instead of MipsLabel::GetPosition()).
+  uint32_t GetLabelLocation(MipsLabel* label) const;
+
+  // Get the final position of a label after local fixup based on the old position
+  // recorded before FinalizeCode().
+  uint32_t GetAdjustedPosition(uint32_t old_position);
+
+  enum BranchCondition {
+    kCondLT,
+    kCondGE,
+    kCondLE,
+    kCondGT,
+    kCondLTZ,
+    kCondGEZ,
+    kCondLEZ,
+    kCondGTZ,
+    kCondEQ,
+    kCondNE,
+    kCondEQZ,
+    kCondNEZ,
+    kCondLTU,
+    kCondGEU,
+    kUncond,
+  };
+  friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+
  private:
+  class Branch {
+   public:
+    enum Type {
+      // R2 short branches.
+      kUncondBranch,
+      kCondBranch,
+      kCall,
+      // R2 long branches.
+      kLongUncondBranch,
+      kLongCondBranch,
+      kLongCall,
+      // R6 short branches.
+      kR6UncondBranch,
+      kR6CondBranch,
+      kR6Call,
+      // R6 long branches.
+      kR6LongUncondBranch,
+      kR6LongCondBranch,
+      kR6LongCall,
+    };
+    // Bit sizes of offsets defined as enums to minimize chance of typos.
+    enum OffsetBits {
+      kOffset16 = 16,
+      kOffset18 = 18,
+      kOffset21 = 21,
+      kOffset23 = 23,
+      kOffset28 = 28,
+      kOffset32 = 32,
+    };
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
+    static constexpr int32_t kMaxBranchLength = 32;
+    static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+
+    struct BranchInfo {
+      // Branch length as a number of 4-byte-long instructions.
+      uint32_t length;
+      // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
+      // PC-relative offset (or its most significant 16-bit half, which goes first).
+      uint32_t instr_offset;
+      // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
+      // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
+      // instructions) from the instruction containing the offset.
+      uint32_t pc_org;
+      // How large (in bits) a PC-relative offset can be for a given type of branch (kR6CondBranch
+      // is an exception: use kOffset23 for beqzc/bnezc).
+      OffsetBits offset_size;
+      // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
+      // count.
+      int offset_shift;
+    };
+    static const BranchInfo branch_info_[/* Type */];
+
+    // Unconditional branch.
+    Branch(bool is_r6, uint32_t location, uint32_t target);
+    // Conditional branch.
+    Branch(bool is_r6,
+           uint32_t location,
+           uint32_t target,
+           BranchCondition condition,
+           Register lhs_reg,
+           Register rhs_reg = ZERO);
+    // Call (branch and link) that stores the target address in a given register (i.e. T9).
+    Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg);
+
+    // Some conditional branches with lhs = rhs are effectively NOPs, while some
+    // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
+    // So, we need a way to identify such branches in order to emit no instructions for them
+    // or change them to unconditional.
+    static bool IsNop(BranchCondition condition, Register lhs, Register rhs);
+    static bool IsUncond(BranchCondition condition, Register lhs, Register rhs);
+
+    static BranchCondition OppositeCondition(BranchCondition cond);
+
+    Type GetType() const;
+    BranchCondition GetCondition() const;
+    Register GetLeftRegister() const;
+    Register GetRightRegister() const;
+    uint32_t GetTarget() const;
+    uint32_t GetLocation() const;
+    uint32_t GetOldLocation() const;
+    uint32_t GetLength() const;
+    uint32_t GetOldLength() const;
+    uint32_t GetSize() const;
+    uint32_t GetOldSize() const;
+    uint32_t GetEndLocation() const;
+    uint32_t GetOldEndLocation() const;
+    bool IsLong() const;
+    bool IsResolved() const;
+
+    // Returns the bit size of the signed offset that the branch instruction can handle.
+    OffsetBits GetOffsetSize() const;
+
+    // Calculates the distance between two byte locations in the assembler buffer and
+    // returns the number of bits needed to represent the distance as a signed integer.
+    //
+    // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
+    // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
+    //
+    // Composite branches (made of several instructions) with longer reach have 32-bit
+    // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
+    // The composite branches cover the range of PC + +/-2GB.
+    //
+    // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
+    // case with the addiu instruction and a 16 bit offset.
+    static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+    // Resolve a branch when the target is known.
+    void Resolve(uint32_t target);
+
+    // Relocate a branch by a given delta if needed due to expansion of this or another
+    // branch at a given location by this delta (just changes location_ and target_).
+    void Relocate(uint32_t expand_location, uint32_t delta);
+
+    // If the branch is short, changes its type to long.
+    void PromoteToLong();
+
+    // If necessary, updates the type by promoting a short branch to a long branch
+    // based on the branch location and target. Returns the amount (in bytes) by
+    // which the branch size has increased.
+    // max_short_distance caps the maximum distance between location_ and target_
+    // that is allowed for short branches. This is for debugging/testing purposes.
+    // max_short_distance = 0 forces all short branches to become long.
+    // Use the implicit default argument when not debugging/testing.
+    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+
+    // Returns the location of the instruction(s) containing the offset.
+    uint32_t GetOffsetLocation() const;
+
+    // Calculates and returns the offset ready for encoding in the branch instruction(s).
+    uint32_t GetOffset() const;
+
+   private:
+    // Completes branch construction by determining and recording its type.
+    void InitializeType(bool is_call, bool is_r6);
+    // Helper for the above.
+    void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
+
+    uint32_t old_location_;          // Offset into assembler buffer in bytes.
+    uint32_t location_;              // Offset into assembler buffer in bytes.
+    uint32_t target_;                // Offset into assembler buffer in bytes.
+
+    uint32_t lhs_reg_ : 5;           // Left-hand side register in conditional branches or
+                                     // indirect call register.
+    uint32_t rhs_reg_ : 5;           // Right-hand side register in conditional branches.
+    BranchCondition condition_ : 5;  // Condition for conditional branches.
+
+    Type type_ : 5;                  // Current type of the branch.
+    Type old_type_ : 5;              // Initial type of the branch.
+  };
+  friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
+  friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
+
   void EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct);
   void EmitI(int opcode, Register rs, Register rt, uint16_t imm);
-  void EmitJ(int opcode, int address);
+  void EmitI21(int opcode, Register rs, uint32_t imm21);
+  void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
+  void EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
+  void EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);  // R6
 
-  int32_t EncodeBranchOffset(int offset, int32_t inst, bool is_jump);
-  int DecodeBranchOffset(int32_t inst, bool is_jump);
+  void Buncond(MipsLabel* label);
+  void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
+  void Call(MipsLabel* label, Register indirect_reg);
+  void FinalizeLabeledBranch(MipsLabel* label);
 
-  FRegister ConvertDRegToFReg(DRegister reg) {
-    return static_cast<FRegister>(reg * 2);
+  Branch* GetBranch(uint32_t branch_id);
+  const Branch* GetBranch(uint32_t branch_id) const;
+
+  void PromoteBranches();
+  void EmitBranch(Branch* branch);
+  void EmitBranches();
+
+  // Emits exception block.
+  void EmitExceptionPoll(MipsExceptionSlowPath* exception);
+
+  bool IsR6() const {
+    if (isa_features_ != nullptr) {
+      return isa_features_->IsR6();
+    } else {
+      return false;
+    }
   }
-  Register ConvertDRegToReg(DRegister reg) {
-    return static_cast<Register>(reg * 2);
+
+  bool Is32BitFPU() const {
+    if (isa_features_ != nullptr) {
+      return isa_features_->Is32BitFloatingPoint();
+    } else {
+      return true;
+    }
   }
-  Register ConvertFRegToReg(FRegister reg) {
-    return static_cast<Register>(reg);
-  }
-  FRegister ConvertRegToFReg(Register reg) {
-    return static_cast<FRegister>(reg);
-  }
+
+  // List of exception blocks to generate at the end of the code cache.
+  std::vector<MipsExceptionSlowPath> exception_blocks_;
+
+  std::vector<Branch> branches_;
+
+  // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+  bool overwriting_;
+  // The current overwrite location.
+  uint32_t overwrite_location_;
+
+  // Data for AdjustedPosition(), see the description there.
+  uint32_t last_position_adjustment_;
+  uint32_t last_old_position_;
+  uint32_t last_branch_id_;
+
+  const MipsInstructionSetFeatures* isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsAssembler);
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class MipsExceptionSlowPath FINAL : public SlowPath {
- public:
-  MipsExceptionSlowPath(MipsManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const MipsManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace mips
 }  // namespace art
 
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
new file mode 100644
index 0000000..063d8bd
--- /dev/null
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -0,0 +1,1324 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_mips.h"
+
+#include <map>
+
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+namespace art {
+
+struct MIPSCpuRegisterCompare {
+  bool operator()(const mips::Register& a, const mips::Register& b) const {
+    return a < b;
+  }
+};
+
+class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler,
+                                               mips::Register,
+                                               mips::FRegister,
+                                               uint32_t> {
+ public:
+  typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base;
+
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "mips";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    return " --no-warn -32 -march=mips32r2";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mmips:isa32r2";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.push_back(new mips::Register(mips::ZERO));
+      registers_.push_back(new mips::Register(mips::AT));
+      registers_.push_back(new mips::Register(mips::V0));
+      registers_.push_back(new mips::Register(mips::V1));
+      registers_.push_back(new mips::Register(mips::A0));
+      registers_.push_back(new mips::Register(mips::A1));
+      registers_.push_back(new mips::Register(mips::A2));
+      registers_.push_back(new mips::Register(mips::A3));
+      registers_.push_back(new mips::Register(mips::T0));
+      registers_.push_back(new mips::Register(mips::T1));
+      registers_.push_back(new mips::Register(mips::T2));
+      registers_.push_back(new mips::Register(mips::T3));
+      registers_.push_back(new mips::Register(mips::T4));
+      registers_.push_back(new mips::Register(mips::T5));
+      registers_.push_back(new mips::Register(mips::T6));
+      registers_.push_back(new mips::Register(mips::T7));
+      registers_.push_back(new mips::Register(mips::S0));
+      registers_.push_back(new mips::Register(mips::S1));
+      registers_.push_back(new mips::Register(mips::S2));
+      registers_.push_back(new mips::Register(mips::S3));
+      registers_.push_back(new mips::Register(mips::S4));
+      registers_.push_back(new mips::Register(mips::S5));
+      registers_.push_back(new mips::Register(mips::S6));
+      registers_.push_back(new mips::Register(mips::S7));
+      registers_.push_back(new mips::Register(mips::T8));
+      registers_.push_back(new mips::Register(mips::T9));
+      registers_.push_back(new mips::Register(mips::K0));
+      registers_.push_back(new mips::Register(mips::K1));
+      registers_.push_back(new mips::Register(mips::GP));
+      registers_.push_back(new mips::Register(mips::SP));
+      registers_.push_back(new mips::Register(mips::FP));
+      registers_.push_back(new mips::Register(mips::RA));
+
+      secondary_register_names_.emplace(mips::Register(mips::ZERO), "zero");
+      secondary_register_names_.emplace(mips::Register(mips::AT), "at");
+      secondary_register_names_.emplace(mips::Register(mips::V0), "v0");
+      secondary_register_names_.emplace(mips::Register(mips::V1), "v1");
+      secondary_register_names_.emplace(mips::Register(mips::A0), "a0");
+      secondary_register_names_.emplace(mips::Register(mips::A1), "a1");
+      secondary_register_names_.emplace(mips::Register(mips::A2), "a2");
+      secondary_register_names_.emplace(mips::Register(mips::A3), "a3");
+      secondary_register_names_.emplace(mips::Register(mips::T0), "t0");
+      secondary_register_names_.emplace(mips::Register(mips::T1), "t1");
+      secondary_register_names_.emplace(mips::Register(mips::T2), "t2");
+      secondary_register_names_.emplace(mips::Register(mips::T3), "t3");
+      secondary_register_names_.emplace(mips::Register(mips::T4), "t4");
+      secondary_register_names_.emplace(mips::Register(mips::T5), "t5");
+      secondary_register_names_.emplace(mips::Register(mips::T6), "t6");
+      secondary_register_names_.emplace(mips::Register(mips::T7), "t7");
+      secondary_register_names_.emplace(mips::Register(mips::S0), "s0");
+      secondary_register_names_.emplace(mips::Register(mips::S1), "s1");
+      secondary_register_names_.emplace(mips::Register(mips::S2), "s2");
+      secondary_register_names_.emplace(mips::Register(mips::S3), "s3");
+      secondary_register_names_.emplace(mips::Register(mips::S4), "s4");
+      secondary_register_names_.emplace(mips::Register(mips::S5), "s5");
+      secondary_register_names_.emplace(mips::Register(mips::S6), "s6");
+      secondary_register_names_.emplace(mips::Register(mips::S7), "s7");
+      secondary_register_names_.emplace(mips::Register(mips::T8), "t8");
+      secondary_register_names_.emplace(mips::Register(mips::T9), "t9");
+      secondary_register_names_.emplace(mips::Register(mips::K0), "k0");
+      secondary_register_names_.emplace(mips::Register(mips::K1), "k1");
+      secondary_register_names_.emplace(mips::Register(mips::GP), "gp");
+      secondary_register_names_.emplace(mips::Register(mips::SP), "sp");
+      secondary_register_names_.emplace(mips::Register(mips::FP), "fp");
+      secondary_register_names_.emplace(mips::Register(mips::RA), "ra");
+
+      fp_registers_.push_back(new mips::FRegister(mips::F0));
+      fp_registers_.push_back(new mips::FRegister(mips::F1));
+      fp_registers_.push_back(new mips::FRegister(mips::F2));
+      fp_registers_.push_back(new mips::FRegister(mips::F3));
+      fp_registers_.push_back(new mips::FRegister(mips::F4));
+      fp_registers_.push_back(new mips::FRegister(mips::F5));
+      fp_registers_.push_back(new mips::FRegister(mips::F6));
+      fp_registers_.push_back(new mips::FRegister(mips::F7));
+      fp_registers_.push_back(new mips::FRegister(mips::F8));
+      fp_registers_.push_back(new mips::FRegister(mips::F9));
+      fp_registers_.push_back(new mips::FRegister(mips::F10));
+      fp_registers_.push_back(new mips::FRegister(mips::F11));
+      fp_registers_.push_back(new mips::FRegister(mips::F12));
+      fp_registers_.push_back(new mips::FRegister(mips::F13));
+      fp_registers_.push_back(new mips::FRegister(mips::F14));
+      fp_registers_.push_back(new mips::FRegister(mips::F15));
+      fp_registers_.push_back(new mips::FRegister(mips::F16));
+      fp_registers_.push_back(new mips::FRegister(mips::F17));
+      fp_registers_.push_back(new mips::FRegister(mips::F18));
+      fp_registers_.push_back(new mips::FRegister(mips::F19));
+      fp_registers_.push_back(new mips::FRegister(mips::F20));
+      fp_registers_.push_back(new mips::FRegister(mips::F21));
+      fp_registers_.push_back(new mips::FRegister(mips::F22));
+      fp_registers_.push_back(new mips::FRegister(mips::F23));
+      fp_registers_.push_back(new mips::FRegister(mips::F24));
+      fp_registers_.push_back(new mips::FRegister(mips::F25));
+      fp_registers_.push_back(new mips::FRegister(mips::F26));
+      fp_registers_.push_back(new mips::FRegister(mips::F27));
+      fp_registers_.push_back(new mips::FRegister(mips::F28));
+      fp_registers_.push_back(new mips::FRegister(mips::F29));
+      fp_registers_.push_back(new mips::FRegister(mips::F30));
+      fp_registers_.push_back(new mips::FRegister(mips::F31));
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
+  }
+
+  std::vector<mips::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+  std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+ private:
+  std::vector<mips::Register*> registers_;
+  std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
+
+  std::vector<mips::FRegister*> fp_registers_;
+};
+
+
+TEST_F(AssemblerMIPSTest, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+#define __ GetAssembler()->
+
+TEST_F(AssemblerMIPSTest, Addu) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "Addu");
+}
+
+TEST_F(AssemblerMIPSTest, Addiu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Addiu, -16, "addiu ${reg1}, ${reg2}, {imm}"), "Addiu");
+}
+
+TEST_F(AssemblerMIPSTest, Subu) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Subu, "subu ${reg1}, ${reg2}, ${reg3}"), "Subu");
+}
+
+TEST_F(AssemblerMIPSTest, MultR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::MultR2, "mult ${reg1}, ${reg2}"), "MultR2");
+}
+
+TEST_F(AssemblerMIPSTest, MultuR2) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::MultuR2, "multu ${reg1}, ${reg2}"), "MultuR2");
+}
+
+TEST_F(AssemblerMIPSTest, DivR2Basic) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::DivR2, "div $zero, ${reg1}, ${reg2}"), "DivR2Basic");
+}
+
+TEST_F(AssemblerMIPSTest, DivuR2Basic) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::DivuR2, "divu $zero, ${reg1}, ${reg2}"), "DivuR2Basic");
+}
+
+TEST_F(AssemblerMIPSTest, MulR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::MulR2, "mul ${reg1}, ${reg2}, ${reg3}"), "MulR2");
+}
+
+TEST_F(AssemblerMIPSTest, DivR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivR2, "div $zero, ${reg2}, ${reg3}\nmflo ${reg1}"),
+            "DivR2");
+}
+
+TEST_F(AssemblerMIPSTest, ModR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModR2, "div $zero, ${reg2}, ${reg3}\nmfhi ${reg1}"),
+            "ModR2");
+}
+
+TEST_F(AssemblerMIPSTest, DivuR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::DivuR2, "divu $zero, ${reg2}, ${reg3}\nmflo ${reg1}"),
+            "DivuR2");
+}
+
+TEST_F(AssemblerMIPSTest, ModuR2) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::ModuR2, "divu $zero, ${reg2}, ${reg3}\nmfhi ${reg1}"),
+            "ModuR2");
+}
+
+TEST_F(AssemblerMIPSTest, And) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::And, "and ${reg1}, ${reg2}, ${reg3}"), "And");
+}
+
+TEST_F(AssemblerMIPSTest, Andi) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Andi, 16, "andi ${reg1}, ${reg2}, {imm}"), "Andi");
+}
+
+TEST_F(AssemblerMIPSTest, Or) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Or, "or ${reg1}, ${reg2}, ${reg3}"), "Or");
+}
+
+TEST_F(AssemblerMIPSTest, Ori) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Ori, 16, "ori ${reg1}, ${reg2}, {imm}"), "Ori");
+}
+
+TEST_F(AssemblerMIPSTest, Xor) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Xor, "xor ${reg1}, ${reg2}, ${reg3}"), "Xor");
+}
+
+TEST_F(AssemblerMIPSTest, Xori) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Xori, 16, "xori ${reg1}, ${reg2}, {imm}"), "Xori");
+}
+
+TEST_F(AssemblerMIPSTest, Nor) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "Nor");
+}
+
+TEST_F(AssemblerMIPSTest, Seb) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Seb, "seb ${reg1}, ${reg2}"), "Seb");
+}
+
+TEST_F(AssemblerMIPSTest, Seh) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Seh, "seh ${reg1}, ${reg2}"), "Seh");
+}
+
+TEST_F(AssemblerMIPSTest, Sll) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sll, 5, "sll ${reg1}, ${reg2}, {imm}"), "Sll");
+}
+
+TEST_F(AssemblerMIPSTest, Srl) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Srl, 5, "srl ${reg1}, ${reg2}, {imm}"), "Srl");
+}
+
+TEST_F(AssemblerMIPSTest, Sra) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sra, 5, "sra ${reg1}, ${reg2}, {imm}"), "Sra");
+}
+
+TEST_F(AssemblerMIPSTest, Sllv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Sllv, "sllv ${reg1}, ${reg2}, ${reg3}"), "Sllv");
+}
+
+TEST_F(AssemblerMIPSTest, Srlv) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Srlv, "srlv ${reg1}, ${reg2}, ${reg3}"), "Srlv");
+}
+
+TEST_F(AssemblerMIPSTest, Srav) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav");
+}
+
+TEST_F(AssemblerMIPSTest, Lb) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb");
+}
+
+TEST_F(AssemblerMIPSTest, Lh) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "Lh");
+}
+
+TEST_F(AssemblerMIPSTest, Lw) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "Lw");
+}
+
+TEST_F(AssemblerMIPSTest, Lbu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "Lbu");
+}
+
+TEST_F(AssemblerMIPSTest, Lhu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Lhu, -16, "lhu ${reg1}, {imm}(${reg2})"), "Lhu");
+}
+
+TEST_F(AssemblerMIPSTest, Lui) {
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Lui, 16, "lui ${reg}, {imm}"), "Lui");
+}
+
+TEST_F(AssemblerMIPSTest, Mfhi) {
+  DriverStr(RepeatR(&mips::MipsAssembler::Mfhi, "mfhi ${reg}"), "Mfhi");
+}
+
+TEST_F(AssemblerMIPSTest, Mflo) {
+  DriverStr(RepeatR(&mips::MipsAssembler::Mflo, "mflo ${reg}"), "Mflo");
+}
+
+TEST_F(AssemblerMIPSTest, Sb) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sb, -16, "sb ${reg1}, {imm}(${reg2})"), "Sb");
+}
+
+TEST_F(AssemblerMIPSTest, Sh) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "Sh");
+}
+
+TEST_F(AssemblerMIPSTest, Sw) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "Sw");
+}
+
+TEST_F(AssemblerMIPSTest, Slt) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "Slt");
+}
+
+TEST_F(AssemblerMIPSTest, Sltu) {
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Sltu, "sltu ${reg1}, ${reg2}, ${reg3}"), "Sltu");
+}
+
+TEST_F(AssemblerMIPSTest, Slti) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Slti, -16, "slti ${reg1}, ${reg2}, {imm}"), "Slti");
+}
+
+TEST_F(AssemblerMIPSTest, Sltiu) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Sltiu, -16, "sltiu ${reg1}, ${reg2}, {imm}"), "Sltiu");
+}
+
+TEST_F(AssemblerMIPSTest, AddS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::AddS, "add.s ${reg1}, ${reg2}, ${reg3}"), "AddS");
+}
+
+TEST_F(AssemblerMIPSTest, AddD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::AddD, "add.d ${reg1}, ${reg2}, ${reg3}"), "AddD");
+}
+
+TEST_F(AssemblerMIPSTest, SubS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SubS, "sub.s ${reg1}, ${reg2}, ${reg3}"), "SubS");
+}
+
+TEST_F(AssemblerMIPSTest, SubD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SubD, "sub.d ${reg1}, ${reg2}, ${reg3}"), "SubD");
+}
+
+TEST_F(AssemblerMIPSTest, MulS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MulS, "mul.s ${reg1}, ${reg2}, ${reg3}"), "MulS");
+}
+
+TEST_F(AssemblerMIPSTest, MulD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::MulD, "mul.d ${reg1}, ${reg2}, ${reg3}"), "MulD");
+}
+
+TEST_F(AssemblerMIPSTest, DivS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::DivS, "div.s ${reg1}, ${reg2}, ${reg3}"), "DivS");
+}
+
+TEST_F(AssemblerMIPSTest, DivD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::DivD, "div.d ${reg1}, ${reg2}, ${reg3}"), "DivD");
+}
+
+TEST_F(AssemblerMIPSTest, MovS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::MovS, "mov.s ${reg1}, ${reg2}"), "MovS");
+}
+
+TEST_F(AssemblerMIPSTest, MovD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::MovD, "mov.d ${reg1}, ${reg2}"), "MovD");
+}
+
+TEST_F(AssemblerMIPSTest, NegS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::NegS, "neg.s ${reg1}, ${reg2}"), "NegS");
+}
+
+TEST_F(AssemblerMIPSTest, NegD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
+}
+
+TEST_F(AssemblerMIPSTest, CvtSW) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDW) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
+}
+
+TEST_F(AssemblerMIPSTest, CvtSD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
+}
+
+TEST_F(AssemblerMIPSTest, Mfc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
+}
+
+TEST_F(AssemblerMIPSTest, Mtc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mtc1, "mtc1 ${reg1}, ${reg2}"), "Mtc1");
+}
+
+TEST_F(AssemblerMIPSTest, Mfhc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mfhc1, "mfhc1 ${reg1}, ${reg2}"), "Mfhc1");
+}
+
+TEST_F(AssemblerMIPSTest, Mthc1) {
+  DriverStr(RepeatRF(&mips::MipsAssembler::Mthc1, "mthc1 ${reg1}, ${reg2}"), "Mthc1");
+}
+
+TEST_F(AssemblerMIPSTest, Lwc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Lwc1, -16, "lwc1 ${reg1}, {imm}(${reg2})"), "Lwc1");
+}
+
+TEST_F(AssemblerMIPSTest, Ldc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Ldc1, -16, "ldc1 ${reg1}, {imm}(${reg2})"), "Ldc1");
+}
+
+TEST_F(AssemblerMIPSTest, Swc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Swc1, -16, "swc1 ${reg1}, {imm}(${reg2})"), "Swc1");
+}
+
+TEST_F(AssemblerMIPSTest, Sdc1) {
+  DriverStr(RepeatFRIb(&mips::MipsAssembler::Sdc1, -16, "sdc1 ${reg1}, {imm}(${reg2})"), "Sdc1");
+}
+
+TEST_F(AssemblerMIPSTest, Move) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Move, "or ${reg1}, ${reg2}, $zero"), "Move");
+}
+
+TEST_F(AssemblerMIPSTest, Clear) {
+  DriverStr(RepeatR(&mips::MipsAssembler::Clear, "or ${reg}, $zero, $zero"), "Clear");
+}
+
+TEST_F(AssemblerMIPSTest, Not) {
+  DriverStr(RepeatRR(&mips::MipsAssembler::Not, "nor ${reg1}, ${reg2}, $zero"), "Not");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFromOffset) {
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 256);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 1000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x10000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x12345678);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, -256);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A1, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A1, mips::A0, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 256);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 1000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x10000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x12345678);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -256);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xFFFF8000);
+  __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+
+  const char* expected =
+      "lb $a0, 0($a0)\n"
+      "lb $a0, 0($a1)\n"
+      "lb $a0, 256($a1)\n"
+      "lb $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+      "lb $a0, -256($a1)\n"
+      "lb $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lb $a0, 0($at)\n"
+
+      "lbu $a0, 0($a0)\n"
+      "lbu $a0, 0($a1)\n"
+      "lbu $a0, 256($a1)\n"
+      "lbu $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+      "lbu $a0, -256($a1)\n"
+      "lbu $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lbu $a0, 0($at)\n"
+
+      "lh $a0, 0($a0)\n"
+      "lh $a0, 0($a1)\n"
+      "lh $a0, 256($a1)\n"
+      "lh $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+      "lh $a0, -256($a1)\n"
+      "lh $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lh $a0, 0($at)\n"
+
+      "lhu $a0, 0($a0)\n"
+      "lhu $a0, 0($a1)\n"
+      "lhu $a0, 256($a1)\n"
+      "lhu $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+      "lhu $a0, -256($a1)\n"
+      "lhu $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lhu $a0, 0($at)\n"
+
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a0, 256($a1)\n"
+      "lw $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+      "lw $a0, -256($a1)\n"
+      "lw $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "lw $a0, 0($at)\n"
+
+      "lw $a1, 4($a0)\n"
+      "lw $a0, 0($a0)\n"
+      "lw $a0, 0($a1)\n"
+      "lw $a1, 4($a1)\n"
+      "lw $a1, 0($a0)\n"
+      "lw $a2, 4($a0)\n"
+      "lw $a0, 0($a2)\n"
+      "lw $a1, 4($a2)\n"
+      "lw $a0, 256($a2)\n"
+      "lw $a1, 260($a2)\n"
+      "lw $a0, 1000($a2)\n"
+      "lw $a1, 1004($a2)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n"
+      "lw $a0, -256($a2)\n"
+      "lw $a1, -252($a2)\n"
+      "lw $a0, 0xFFFF8000($a2)\n"
+      "lw $a1, 0xFFFF8004($a2)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a2\n"
+      "lw $a0, 0($at)\n"
+      "lw $a1, 4($at)\n";
+  DriverStr(expected, "LoadFromOffset");
+}
+
+TEST_F(AssemblerMIPSTest, LoadSFromOffset) {
+  __ LoadSFromOffset(mips::F0, mips::A0, 0);
+  __ LoadSFromOffset(mips::F0, mips::A0, 4);
+  __ LoadSFromOffset(mips::F0, mips::A0, 256);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0x8000);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0x10000);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0x12345678);
+  __ LoadSFromOffset(mips::F0, mips::A0, -256);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ LoadSFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "lwc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n"
+      "lwc1 $f0, -256($a0)\n"
+      "lwc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "lwc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadSFromOffset");
+}
+
+
+TEST_F(AssemblerMIPSTest, LoadDFromOffset) {
+  __ LoadDFromOffset(mips::F0, mips::A0, 0);
+  __ LoadDFromOffset(mips::F0, mips::A0, 4);
+  __ LoadDFromOffset(mips::F0, mips::A0, 256);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0x8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0x10000);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0x12345678);
+  __ LoadDFromOffset(mips::F0, mips::A0, -256);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ LoadDFromOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "ldc1 $f0, 0($a0)\n"
+      "lwc1 $f0, 4($a0)\n"
+      "lwc1 $f1, 8($a0)\n"
+      "ldc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n"
+      "ldc1 $f0, -256($a0)\n"
+      "ldc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "ldc1 $f0, 0($at)\n";
+  DriverStr(expected, "LoadDFromOffset");
+}
+
+TEST_F(AssemblerMIPSTest, StoreToOffset) {
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A0, 0);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 256);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 1000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x10000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x12345678);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, -256);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A0, 0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 256);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 1000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x10000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x12345678);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, -256);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A0, 0);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 256);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 1000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x10000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x12345678);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, -256);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xABCDEF00);
+
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 256);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 1000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x10000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x12345678);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -256);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xFFFF8000);
+  __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xABCDEF00);
+
+  const char* expected =
+      "sb $a0, 0($a0)\n"
+      "sb $a0, 0($a1)\n"
+      "sb $a0, 256($a1)\n"
+      "sb $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+      "sb $a0, -256($a1)\n"
+      "sb $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "sb $a0, 0($at)\n"
+
+      "sh $a0, 0($a0)\n"
+      "sh $a0, 0($a1)\n"
+      "sh $a0, 256($a1)\n"
+      "sh $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+      "sh $a0, -256($a1)\n"
+      "sh $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "sh $a0, 0($at)\n"
+
+      "sw $a0, 0($a0)\n"
+      "sw $a0, 0($a1)\n"
+      "sw $a0, 256($a1)\n"
+      "sw $a0, 1000($a1)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+      "sw $a0, -256($a1)\n"
+      "sw $a0, 0xFFFF8000($a1)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a1\n"
+      "sw $a0, 0($at)\n"
+
+      "sw $a0, 0($a2)\n"
+      "sw $a1, 4($a2)\n"
+      "sw $a0, 256($a2)\n"
+      "sw $a1, 260($a2)\n"
+      "sw $a0, 1000($a2)\n"
+      "sw $a1, 1004($a2)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n"
+      "sw $a0, -256($a2)\n"
+      "sw $a1, -252($a2)\n"
+      "sw $a0, 0xFFFF8000($a2)\n"
+      "sw $a1, 0xFFFF8004($a2)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a2\n"
+      "sw $a0, 0($at)\n"
+      "sw $a1, 4($at)\n";
+  DriverStr(expected, "StoreToOffset");
+}
+
+TEST_F(AssemblerMIPSTest, StoreSToOffset) {
+  __ StoreSToOffset(mips::F0, mips::A0, 0);
+  __ StoreSToOffset(mips::F0, mips::A0, 4);
+  __ StoreSToOffset(mips::F0, mips::A0, 256);
+  __ StoreSToOffset(mips::F0, mips::A0, 0x8000);
+  __ StoreSToOffset(mips::F0, mips::A0, 0x10000);
+  __ StoreSToOffset(mips::F0, mips::A0, 0x12345678);
+  __ StoreSToOffset(mips::F0, mips::A0, -256);
+  __ StoreSToOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ StoreSToOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "swc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n"
+      "swc1 $f0, -256($a0)\n"
+      "swc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "swc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreSToOffset");
+}
+
+TEST_F(AssemblerMIPSTest, StoreDToOffset) {
+  __ StoreDToOffset(mips::F0, mips::A0, 0);
+  __ StoreDToOffset(mips::F0, mips::A0, 4);
+  __ StoreDToOffset(mips::F0, mips::A0, 256);
+  __ StoreDToOffset(mips::F0, mips::A0, 0x8000);
+  __ StoreDToOffset(mips::F0, mips::A0, 0x10000);
+  __ StoreDToOffset(mips::F0, mips::A0, 0x12345678);
+  __ StoreDToOffset(mips::F0, mips::A0, -256);
+  __ StoreDToOffset(mips::F0, mips::A0, 0xFFFF8000);
+  __ StoreDToOffset(mips::F0, mips::A0, 0xABCDEF00);
+
+  const char* expected =
+      "sdc1 $f0, 0($a0)\n"
+      "swc1 $f0, 4($a0)\n"
+      "swc1 $f1, 8($a0)\n"
+      "sdc1 $f0, 256($a0)\n"
+      "ori $at, $zero, 0x8000\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 1\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "lui $at, 0x1234\n"
+      "ori $at, 0x5678\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n"
+      "sdc1 $f0, -256($a0)\n"
+      "sdc1 $f0, 0xFFFF8000($a0)\n"
+      "lui $at, 0xABCD\n"
+      "ori $at, 0xEF00\n"
+      "addu $at, $at, $a0\n"
+      "sdc1 $f0, 0($at)\n";
+  DriverStr(expected, "StoreDToOffset");
+}
+
+TEST_F(AssemblerMIPSTest, B) {
+  mips::MipsLabel label1, label2;
+  __ B(&label1);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label1);
+  __ B(&label2);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label2);
+  __ B(&label1);
+
+  std::string expected =
+      ".set noreorder\n"
+      "b 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "b 2f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "b 1b\n"
+      "nop\n";
+  DriverStr(expected, "B");
+}
+
+TEST_F(AssemblerMIPSTest, Beq) {
+  mips::MipsLabel label;
+  __ Beq(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Beq(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "beq $a0, $a1, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "beq $a2, $a3, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Beq");
+}
+
+TEST_F(AssemblerMIPSTest, Bne) {
+  mips::MipsLabel label;
+  __ Bne(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bne(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bne $a0, $a1, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bne $a2, $a3, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bne");
+}
+
+TEST_F(AssemblerMIPSTest, Beqz) {
+  mips::MipsLabel label;
+  __ Beqz(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Beqz(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "beq $zero, $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "beq $zero, $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Beqz");
+}
+
+TEST_F(AssemblerMIPSTest, Bnez) {
+  mips::MipsLabel label;
+  __ Bnez(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bnez(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bne $zero, $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bne $zero, $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bnez");
+}
+
+TEST_F(AssemblerMIPSTest, Bltz) {
+  mips::MipsLabel label;
+  __ Bltz(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bltz(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bltz $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bltz $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bltz");
+}
+
+TEST_F(AssemblerMIPSTest, Bgez) {
+  mips::MipsLabel label;
+  __ Bgez(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bgez(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bgez $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bgez $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bgez");
+}
+
+TEST_F(AssemblerMIPSTest, Blez) {
+  mips::MipsLabel label;
+  __ Blez(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Blez(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "blez $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "blez $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Blez");
+}
+
+TEST_F(AssemblerMIPSTest, Bgtz) {
+  mips::MipsLabel label;
+  __ Bgtz(mips::A0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bgtz(mips::A1, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bgtz $a0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bgtz $a1, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bgtz");
+}
+
+TEST_F(AssemblerMIPSTest, Blt) {
+  mips::MipsLabel label;
+  __ Blt(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Blt(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "slt $at, $a0, $a1\n"
+      "bne $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "slt $at, $a2, $a3\n"
+      "bne $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Blt");
+}
+
+TEST_F(AssemblerMIPSTest, Bge) {
+  mips::MipsLabel label;
+  __ Bge(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bge(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "slt $at, $a0, $a1\n"
+      "beq $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "slt $at, $a2, $a3\n"
+      "beq $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bge");
+}
+
+TEST_F(AssemblerMIPSTest, Bltu) {
+  mips::MipsLabel label;
+  __ Bltu(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bltu(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "sltu $at, $a0, $a1\n"
+      "bne $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "sltu $at, $a2, $a3\n"
+      "bne $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bltu");
+}
+
+TEST_F(AssemblerMIPSTest, Bgeu) {
+  mips::MipsLabel label;
+  __ Bgeu(mips::A0, mips::A1, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bgeu(mips::A2, mips::A3, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "sltu $at, $a0, $a1\n"
+      "beq $zero, $at, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "sltu $at, $a2, $a3\n"
+      "beq $zero, $at, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bgeu");
+}
+
+#undef __
+
+}  // namespace art
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index b078f3e..00e8995 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -335,6 +335,10 @@
   EmitR(0, rs, rt, rd, 0, 0x04);
 }
 
+void Mips64Assembler::Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 1, 0x06);
+}
+
 void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
@@ -351,6 +355,10 @@
   EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3a);
 }
 
+void Mips64Assembler::Drotr(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(1), rt, rd, shamt, 0x3a);
+}
+
 void Mips64Assembler::Dsra(GpuRegister rd, GpuRegister rt, int shamt) {
   EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3b);
 }
@@ -363,6 +371,10 @@
   EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3e);
 }
 
+void Mips64Assembler::Drotr32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(1), rt, rd, shamt, 0x3e);
+}
+
 void Mips64Assembler::Dsra32(GpuRegister rd, GpuRegister rt, int shamt) {
   EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3f);
 }
@@ -375,6 +387,10 @@
   EmitR(0, rs, rt, rd, 0, 0x16);
 }
 
+void Mips64Assembler::Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 1, 0x16);
+}
+
 void Mips64Assembler::Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x17);
 }
@@ -773,6 +789,10 @@
   EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x21);
 }
 
+void Mips64Assembler::Cvtsl(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FpuRegister>(0), fs, fd, 0x20);
+}
+
 void Mips64Assembler::Cvtdl(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x15, static_cast<FpuRegister>(0), fs, fd, 0x21);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index d083eb4..33f22d2 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -123,15 +123,19 @@
   void Sra(GpuRegister rd, GpuRegister rt, int shamt);
   void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
   void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
   void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs);
   void Dsll(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
   void Dsrl(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Drotr(GpuRegister rd, GpuRegister rt, int shamt);
   void Dsra(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
   void Dsll32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
   void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Drotr32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
   void Dsra32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
   void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
   void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
+  void Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
   void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
 
   void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -230,6 +234,7 @@
   void Cvtdw(FpuRegister fd, FpuRegister fs);
   void Cvtsd(FpuRegister fd, FpuRegister fs);
   void Cvtds(FpuRegister fd, FpuRegister fs);
+  void Cvtsl(FpuRegister fd, FpuRegister fs);
   void Cvtdl(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 2071aca..4413906 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -215,6 +215,22 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::AbsD, "abs.d ${reg1}, ${reg2}"), "abs.d");
 }
 
+TEST_F(AssemblerMIPS64Test, MovS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::MovS, "mov.s ${reg1}, ${reg2}"), "mov.s");
+}
+
+TEST_F(AssemblerMIPS64Test, MovD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::MovD, "mov.d ${reg1}, ${reg2}"), "mov.d");
+}
+
+TEST_F(AssemblerMIPS64Test, NegS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::NegS, "neg.s ${reg1}, ${reg2}"), "neg.s");
+}
+
+TEST_F(AssemblerMIPS64Test, NegD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::NegD, "neg.d ${reg1}, ${reg2}"), "neg.d");
+}
+
 TEST_F(AssemblerMIPS64Test, RoundLS) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundLS, "round.l.s ${reg1}, ${reg2}"), "round.l.s");
 }
@@ -307,6 +323,34 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
 }
 
+TEST_F(AssemblerMIPS64Test, CvtDS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "cvt.d.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CvtDW) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "cvt.d.w");
+}
+
+TEST_F(AssemblerMIPS64Test, CvtSL) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "cvt.s.l");
+}
+
+TEST_F(AssemblerMIPS64Test, CvtSD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "cvt.s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CvtSW) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
+}
+
+////////////////
+// CALL / JMP //
+////////////////
+
+TEST_F(AssemblerMIPS64Test, Jalr) {
+  DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+}
+
 //////////
 // MISC //
 //////////
@@ -319,6 +363,14 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dbitswap, "dbitswap ${reg1}, ${reg2}"), "dbitswap");
 }
 
+TEST_F(AssemblerMIPS64Test, Seb) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Seb, "seb ${reg1}, ${reg2}"), "seb");
+}
+
+TEST_F(AssemblerMIPS64Test, Seh) {
+  DriverStr(RepeatRR(&mips64::Mips64Assembler::Seh, "seh ${reg1}, ${reg2}"), "seh");
+}
+
 TEST_F(AssemblerMIPS64Test, Dsbh) {
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Dsbh, "dsbh ${reg1}, ${reg2}"), "dsbh");
 }
@@ -331,6 +383,75 @@
   DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh");
 }
 
+TEST_F(AssemblerMIPS64Test, Sll) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sll, 5, "sll ${reg1}, ${reg2}, {imm}"), "sll");
+}
+
+TEST_F(AssemblerMIPS64Test, Srl) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Srl, 5, "srl ${reg1}, ${reg2}, {imm}"), "srl");
+}
+
+TEST_F(AssemblerMIPS64Test, Rotr) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Rotr, 5, "rotr ${reg1}, ${reg2}, {imm}"), "rotr");
+}
+
+TEST_F(AssemblerMIPS64Test, Sra) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sra, 5, "sra ${reg1}, ${reg2}, {imm}"), "sra");
+}
+
+TEST_F(AssemblerMIPS64Test, Sllv) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Sllv, "sllv ${reg1}, ${reg2}, ${reg3}"), "sllv");
+}
+
+TEST_F(AssemblerMIPS64Test, Srlv) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Srlv, "srlv ${reg1}, ${reg2}, ${reg3}"), "srlv");
+}
+
+TEST_F(AssemblerMIPS64Test, Rotrv) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Rotrv, "rotrv ${reg1}, ${reg2}, ${reg3}"), "rotrv");
+}
+
+TEST_F(AssemblerMIPS64Test, Srav) {
+  DriverStr(RepeatRRR(&mips64::Mips64Assembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "srav");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsll) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsll, 5, "dsll ${reg1}, ${reg2}, {imm}"), "dsll");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsrl) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsrl, 5, "dsrl ${reg1}, ${reg2}, {imm}"), "dsrl");
+}
+
+TEST_F(AssemblerMIPS64Test, Drotr) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Drotr, 5, "drotr ${reg1}, ${reg2}, {imm}"),
+            "drotr");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsra) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsra, 5, "dsra ${reg1}, ${reg2}, {imm}"), "dsra");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsll32) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsll32, 5, "dsll32 ${reg1}, ${reg2}, {imm}"),
+            "dsll32");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsrl32) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsrl32, 5, "dsrl32 ${reg1}, ${reg2}, {imm}"),
+            "dsrl32");
+}
+
+TEST_F(AssemblerMIPS64Test, Drotr32) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Drotr32, 5, "drotr32 ${reg1}, ${reg2}, {imm}"),
+            "drotr32");
+}
+
+TEST_F(AssemblerMIPS64Test, Dsra32) {
+  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsra32, 5, "dsra32 ${reg1}, ${reg2}, {imm}"),
+            "dsra32");
+}
+
 TEST_F(AssemblerMIPS64Test, Sc) {
   DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sc, -9, "sc ${reg1}, {imm}(${reg2})"), "sc");
 }
@@ -347,10 +468,6 @@
   DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lld, -9, "lld ${reg1}, {imm}(${reg2})"), "lld");
 }
 
-TEST_F(AssemblerMIPS64Test, Rotr) {
-  DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Rotr, 5, "rotr ${reg1}, ${reg2}, {imm}"), "rotr");
-}
-
 TEST_F(AssemblerMIPS64Test, Seleqz) {
   DriverStr(RepeatRRR(&mips64::Mips64Assembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"),
             "seleqz");
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 04e815a..d6caa3c 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -720,6 +720,14 @@
 }
 
 
+void X86Assembler::ucomiss(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -729,6 +737,15 @@
 }
 
 
+void X86Assembler::ucomisd(XmmRegister a, const Address& b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitOperand(a, b);
+}
+
+
 void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -2369,44 +2386,48 @@
   }
 }
 
-int ConstantArea::AddInt32(int32_t v) {
-  for (size_t i = 0, e = buffer_.size(); i < e; i++) {
-    if (v == buffer_[i]) {
-      return i * kEntrySize;
-    }
-  }
-
-  // Didn't match anything.
-  int result = buffer_.size() * kEntrySize;
+size_t ConstantArea::AppendInt32(int32_t v) {
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v);
   return result;
 }
 
-int ConstantArea::AddInt64(int64_t v) {
+size_t ConstantArea::AddInt32(int32_t v) {
+  for (size_t i = 0, e = buffer_.size(); i < e; i++) {
+    if (v == buffer_[i]) {
+      return i * elem_size_;
+    }
+  }
+
+  // Didn't match anything.
+  return AppendInt32(v);
+}
+
+size_t ConstantArea::AddInt64(int64_t v) {
   int32_t v_low = Low32Bits(v);
   int32_t v_high = High32Bits(v);
   if (buffer_.size() > 1) {
     // Ensure we don't pass the end of the buffer.
     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
-        return i * kEntrySize;
+        return i * elem_size_;
       }
     }
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * kEntrySize;
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v_low);
   buffer_.push_back(v_high);
   return result;
 }
 
-int ConstantArea::AddDouble(double v) {
+size_t ConstantArea::AddDouble(double v) {
   // Treat the value as a 64-bit integer value.
   return AddInt64(bit_cast<int64_t, double>(v));
 }
 
-int ConstantArea::AddFloat(float v) {
+size_t ConstantArea::AddFloat(float v) {
   // Treat the value as a 32-bit integer value.
   return AddInt32(bit_cast<int32_t, float>(v));
 }
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 93ecdf5..655af9c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -166,21 +166,6 @@
     Init(base_in, disp.Int32Value());
   }
 
-  void Init(Register base_in, int32_t disp) {
-    if (disp == 0 && base_in != EBP) {
-      SetModRM(0, base_in);
-      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
-    } else if (disp >= -128 && disp <= 127) {
-      SetModRM(1, base_in);
-      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
-      SetDisp8(disp);
-    } else {
-      SetModRM(2, base_in);
-      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
-      SetDisp32(disp);
-    }
-  }
-
   Address(Register index_in, ScaleFactor scale_in, int32_t disp) {
     CHECK_NE(index_in, ESP);  // Illegal addressing mode.
     SetModRM(0, ESP);
@@ -189,19 +174,15 @@
   }
 
   Address(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
-    CHECK_NE(index_in, ESP);  // Illegal addressing mode.
-    if (disp == 0 && base_in != EBP) {
-      SetModRM(0, ESP);
-      SetSIB(scale_in, index_in, base_in);
-    } else if (disp >= -128 && disp <= 127) {
-      SetModRM(1, ESP);
-      SetSIB(scale_in, index_in, base_in);
-      SetDisp8(disp);
-    } else {
-      SetModRM(2, ESP);
-      SetSIB(scale_in, index_in, base_in);
-      SetDisp32(disp);
-    }
+    Init(base_in, index_in, scale_in, disp);
+  }
+
+  Address(Register base_in,
+          Register index_in,
+          ScaleFactor scale_in,
+          int32_t disp, AssemblerFixup *fixup) {
+    Init(base_in, index_in, scale_in, disp);
+    SetFixup(fixup);
   }
 
   static Address Absolute(uintptr_t addr) {
@@ -217,6 +198,37 @@
 
  private:
   Address() {}
+
+  void Init(Register base_in, int32_t disp) {
+    if (disp == 0 && base_in != EBP) {
+      SetModRM(0, base_in);
+      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
+    } else if (disp >= -128 && disp <= 127) {
+      SetModRM(1, base_in);
+      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
+      SetDisp8(disp);
+    } else {
+      SetModRM(2, base_in);
+      if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
+      SetDisp32(disp);
+    }
+  }
+
+  void Init(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
+    CHECK_NE(index_in, ESP);  // Illegal addressing mode.
+    if (disp == 0 && base_in != EBP) {
+      SetModRM(0, ESP);
+      SetSIB(scale_in, index_in, base_in);
+    } else if (disp >= -128 && disp <= 127) {
+      SetModRM(1, ESP);
+      SetSIB(scale_in, index_in, base_in);
+      SetDisp8(disp);
+    } else {
+      SetModRM(2, ESP);
+      SetSIB(scale_in, index_in, base_in);
+      SetDisp32(disp);
+    }
+  }
 };
 
 
@@ -252,40 +264,39 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v);
+  size_t AddDouble(double v);
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v);
+  size_t AddFloat(float v);
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v);
+  size_t AddInt32(int32_t v);
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v);
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v);
+  size_t AddInt64(int64_t v);
 
   bool IsEmpty() const {
     return buffer_.size() == 0;
   }
 
+  size_t GetSize() const {
+    return buffer_.size() * elem_size_;
+  }
+
   const std::vector<int32_t>& GetBuffer() const {
     return buffer_;
   }
 
-  void AddFixup(AssemblerFixup* fixup) {
-    fixups_.push_back(fixup);
-  }
-
-  const std::vector<AssemblerFixup*>& GetFixups() const {
-    return fixups_;
-  }
-
  private:
-  static constexpr size_t kEntrySize = sizeof(int32_t);
+  static constexpr size_t elem_size_ = sizeof(int32_t);
   std::vector<int32_t> buffer_;
-  std::vector<AssemblerFixup*> fixups_;
 };
 
 class X86Assembler FINAL : public Assembler {
@@ -406,7 +417,9 @@
   void comiss(XmmRegister a, XmmRegister b);
   void comisd(XmmRegister a, XmmRegister b);
   void ucomiss(XmmRegister a, XmmRegister b);
+  void ucomiss(XmmRegister a, const Address& b);
   void ucomisd(XmmRegister a, XmmRegister b);
+  void ucomisd(XmmRegister a, const Address& b);
 
   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
@@ -740,26 +753,36 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+  size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+  size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+  size_t AddInt32(int32_t v) {
+    return constant_area_.AddInt32(v);
+  }
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v) {
+    return constant_area_.AppendInt32(v);
+  }
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+  size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
 
   // Add the contents of the constant area to the assembler buffer.
   void AddConstantArea();
 
   // Is the constant area empty? Return true if there are no literals in the constant area.
   bool IsConstantAreaEmpty() const { return constant_area_.IsEmpty(); }
-  void AddConstantAreaFixup(AssemblerFixup* fixup) { constant_area_.AddFixup(fixup); }
+
+  // Return the current size of the constant area.
+  size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
 
  private:
   inline void EmitUint8(uint8_t value);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 16f9db4..a9b991c 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -306,6 +306,19 @@
   DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
 }
 
+TEST_F(AssemblerX86Test, UComissAddr) {
+  GetAssembler()->ucomiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "ucomiss 0(%EAX), %xmm0\n";
+  DriverStr(expected, "ucomiss");
+}
+
+TEST_F(AssemblerX86Test, UComisdAddr) {
+  GetAssembler()->ucomisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
+  const char* expected = "ucomisd 0(%EAX), %xmm0\n";
+  DriverStr(expected, "ucomisd");
+}
+
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 6e7d74d..9eb5e67 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -3122,7 +3122,14 @@
   }
 }
 
-int ConstantArea::AddInt32(int32_t v) {
+size_t ConstantArea::AppendInt32(int32_t v) {
+  size_t result = buffer_.size() * elem_size_;
+  buffer_.push_back(v);
+  return result;
+}
+
+size_t ConstantArea::AddInt32(int32_t v) {
+  // Look for an existing match.
   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
     if (v == buffer_[i]) {
       return i * elem_size_;
@@ -3130,12 +3137,10 @@
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * elem_size_;
-  buffer_.push_back(v);
-  return result;
+  return AppendInt32(v);
 }
 
-int ConstantArea::AddInt64(int64_t v) {
+size_t ConstantArea::AddInt64(int64_t v) {
   int32_t v_low = v;
   int32_t v_high = v >> 32;
   if (buffer_.size() > 1) {
@@ -3148,18 +3153,18 @@
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * elem_size_;
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v_low);
   buffer_.push_back(v_high);
   return result;
 }
 
-int ConstantArea::AddDouble(double v) {
+size_t ConstantArea::AddDouble(double v) {
   // Treat the value as a 64-bit integer value.
   return AddInt64(bit_cast<int64_t, double>(v));
 }
 
-int ConstantArea::AddFloat(float v) {
+size_t ConstantArea::AddFloat(float v) {
   // Treat the value as a 32-bit integer value.
   return AddInt32(bit_cast<int32_t, float>(v));
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 255f551..01d28e3 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -269,36 +269,40 @@
  * Class to handle constant area values.
  */
 class ConstantArea {
-  public:
-    ConstantArea() {}
+ public:
+  ConstantArea() {}
 
-    // Add a double to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddDouble(double v);
+  // Add a double to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddDouble(double v);
 
-    // Add a float to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddFloat(float v);
+  // Add a float to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddFloat(float v);
 
-    // Add an int32_t to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddInt32(int32_t v);
+  // Add an int32_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddInt32(int32_t v);
 
-    // Add an int64_t to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddInt64(int64_t v);
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v);
 
-    int GetSize() const {
-      return buffer_.size() * elem_size_;
-    }
+  // Add an int64_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddInt64(int64_t v);
 
-    const std::vector<int32_t>& GetBuffer() const {
-      return buffer_;
-    }
+  size_t GetSize() const {
+    return buffer_.size() * elem_size_;
+  }
 
-  private:
-    static constexpr size_t elem_size_ = sizeof(int32_t);
-    std::vector<int32_t> buffer_;
+  const std::vector<int32_t>& GetBuffer() const {
+    return buffer_;
+  }
+
+ private:
+  static constexpr size_t elem_size_ = sizeof(int32_t);
+  std::vector<int32_t> buffer_;
 };
 
 
@@ -806,19 +810,27 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+  size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+  size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+  size_t AddInt32(int32_t v) {
+    return constant_area_.AddInt32(v);
+  }
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v) {
+    return constant_area_.AppendInt32(v);
+  }
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+  size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
 
   // Add the contents of the constant area to the assembler buffer.
   void AddConstantArea();
@@ -826,6 +838,9 @@
   // Is the constant area empty? Return true if there are no literals in the constant area.
   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
 
+  // Return the current size of the constant area.
+  size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
+
   //
   // Heap poisoning.
   //
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 680e2d7..2653807 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -126,11 +126,12 @@
 
     // However, we prefer to drop this when we saw --zip-fd.
     if (saw_zip_fd) {
-      // Drop anything --zip-X, --dex-X, --oat-X, --swap-X.
+      // Drop anything --zip-X, --dex-X, --oat-X, --swap-X, or --app-image-X
       if (StartsWith(original_argv[i], "--zip-") ||
           StartsWith(original_argv[i], "--dex-") ||
           StartsWith(original_argv[i], "--oat-") ||
-          StartsWith(original_argv[i], "--swap-")) {
+          StartsWith(original_argv[i], "--swap-") ||
+          StartsWith(original_argv[i], "--app-image-")) {
         continue;
       }
     }
@@ -336,6 +337,12 @@
   UsageError("  --swap-fd=<file-descriptor>:  specifies a file to use for swap (by descriptor).");
   UsageError("      Example: --swap-fd=10");
   UsageError("");
+  UsageError("  --app-image-fd=<file-descriptor>: specify output file descriptor for app image.");
+  UsageError("      Example: --app-image-fd=10");
+  UsageError("");
+  UsageError("  --app-image-file=<file-name>: specify a file name for app image.");
+  UsageError("      Example: --app-image-file=/data/dalvik-cache/system@app@Calculator.apk.art");
+  UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
 }
@@ -445,38 +452,6 @@
   pthread_t pthread_;
 };
 
-static void ParseStringAfterChar(const std::string& s, char c, std::string* parsed_value) {
-  std::string::size_type colon = s.find(c);
-  if (colon == std::string::npos) {
-    Usage("Missing char %c in option %s\n", c, s.c_str());
-  }
-  // Add one to remove the char we were trimming until.
-  *parsed_value = s.substr(colon + 1);
-}
-
-static void ParseDouble(const std::string& option, char after_char, double min, double max,
-                        double* parsed_value) {
-  std::string substring;
-  ParseStringAfterChar(option, after_char, &substring);
-  bool sane_val = true;
-  double value;
-  if (false) {
-    // TODO: this doesn't seem to work on the emulator.  b/15114595
-    std::stringstream iss(substring);
-    iss >> value;
-    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
-    sane_val = iss.eof() && (value >= min) && (value <= max);
-  } else {
-    char* end = nullptr;
-    value = strtod(substring.c_str(), &end);
-    sane_val = *end == '\0' && value >= min && value <= max;
-  }
-  if (!sane_val) {
-    Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
-  }
-  *parsed_value = value;
-}
-
 static constexpr size_t kMinDexFilesForSwap = 2;
 static constexpr size_t kMinDexFileCumulativeSizeForSwap = 20 * MB;
 
@@ -516,7 +491,8 @@
       compiled_classes_filename_(nullptr),
       compiled_methods_zip_filename_(nullptr),
       compiled_methods_filename_(nullptr),
-      image_(false),
+      app_image_(false),
+      boot_image_(false),
       is_host_(false),
       driver_(nullptr),
       dump_stats_(false),
@@ -525,6 +501,7 @@
       dump_slow_timing_(kIsDebugBuild),
       dump_cfg_append_(false),
       swap_fd_(-1),
+      app_image_fd_(kInvalidImageFd),
       timings_(timings) {}
 
   ~Dex2Oat() {
@@ -537,76 +514,39 @@
     // the runtime.
     LogCompletionTime();
 
-    if (kIsDebugBuild || (RUNNING_ON_MEMORY_TOOL && kMemoryToolDetectsLeaks)) {
-      delete runtime_;  // See field declaration for why this is manual.
-      delete driver_;
-      delete verification_results_;
+    if (!kIsDebugBuild && !(RUNNING_ON_MEMORY_TOOL && kMemoryToolDetectsLeaks)) {
+      // We want to just exit on non-debug builds, not bringing the runtime down
+      // in an orderly fashion. So release the following fields.
+      driver_.release();
+      image_writer_.release();
+      for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
+        dex_file.release();
+      }
+      oat_file_.release();
+      runtime_.release();
+      verification_results_.release();
+      key_value_store_.release();
     }
   }
 
   struct ParserOptions {
     std::string oat_symbols;
     std::string boot_image_filename;
-    const char* compiler_filter_string = nullptr;
-    CompilerOptions::CompilerFilter compiler_filter = CompilerOptions::kDefaultCompilerFilter;
-    bool compile_pic = false;
-    int huge_method_threshold = CompilerOptions::kDefaultHugeMethodThreshold;
-    int large_method_threshold = CompilerOptions::kDefaultLargeMethodThreshold;
-    int small_method_threshold = CompilerOptions::kDefaultSmallMethodThreshold;
-    int tiny_method_threshold = CompilerOptions::kDefaultTinyMethodThreshold;
-    int num_dex_methods_threshold = CompilerOptions::kDefaultNumDexMethodsThreshold;
-    static constexpr int kUnsetInlineDepthLimit = -1;
-    int inline_depth_limit = kUnsetInlineDepthLimit;
-    static constexpr int kUnsetInlineMaxCodeUnits = -1;
-    int inline_max_code_units = kUnsetInlineMaxCodeUnits;
-
-    // Profile file to use
-    double top_k_profile_threshold = CompilerOptions::kDefaultTopKProfileThreshold;
-
-    bool debuggable = false;
-    bool include_patch_information = CompilerOptions::kDefaultIncludePatchInformation;
-    bool generate_debug_info = kIsDebugBuild;
     bool watch_dog_enabled = true;
-    bool abort_on_hard_verifier_error = false;
     bool requested_specific_compiler = false;
-
-    bool implicit_null_checks = false;
-    bool implicit_so_checks = false;
-    bool implicit_suspend_checks = false;
-
-    PassManagerOptions pass_manager_options;
-
     std::string error_msg;
   };
 
-  template <typename T>
-  static void ParseUintOption(const StringPiece& option,
-                              const std::string& option_name,
-                              T* out,
-                              bool is_long_option = true) {
-    std::string option_prefix = option_name + (is_long_option ? "=" : "");
-    DCHECK(option.starts_with(option_prefix));
-    const char* value_string = option.substr(option_prefix.size()).data();
-    int64_t parsed_integer_value;
-    if (!ParseInt(value_string, &parsed_integer_value)) {
-      Usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
-    }
-    if (parsed_integer_value < 0) {
-      Usage("%s passed a negative value %d", option_name.c_str(), parsed_integer_value);
-    }
-    *out = dchecked_integral_cast<T>(parsed_integer_value);
-  }
-
   void ParseZipFd(const StringPiece& option) {
-    ParseUintOption(option, "--zip-fd", &zip_fd_);
+    ParseUintOption(option, "--zip-fd", &zip_fd_, Usage);
   }
 
   void ParseOatFd(const StringPiece& option) {
-    ParseUintOption(option, "--oat-fd", &oat_fd_);
+    ParseUintOption(option, "--oat-fd", &oat_fd_, Usage);
   }
 
   void ParseJ(const StringPiece& option) {
-    ParseUintOption(option, "-j", &thread_count_, /* is_long_option */ false);
+    ParseUintOption(option, "-j", &thread_count_, Usage, /* is_long_option */ false);
   }
 
   void ParseBase(const StringPiece& option) {
@@ -677,80 +617,17 @@
     }
   }
 
-  void ParseHugeMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--huge-method-max", &parser_options->huge_method_threshold);
-  }
-
-  void ParseLargeMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--large-method-max", &parser_options->large_method_threshold);
-  }
-
-  void ParseSmallMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--small-method-max", &parser_options->small_method_threshold);
-  }
-
-  void ParseTinyMethodMax(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--tiny-method-max", &parser_options->tiny_method_threshold);
-  }
-
-  void ParseNumDexMethods(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--num-dex-methods", &parser_options->num_dex_methods_threshold);
-  }
-
-  void ParseInlineDepthLimit(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--inline-depth-limit", &parser_options->inline_depth_limit);
-  }
-
-  void ParseInlineMaxCodeUnits(const StringPiece& option, ParserOptions* parser_options) {
-    ParseUintOption(option, "--inline-max-code-units=", &parser_options->inline_max_code_units);
-  }
-
-  void ParseDisablePasses(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--disable-passes="));
-    const std::string disable_passes = option.substr(strlen("--disable-passes=")).data();
-    parser_options->pass_manager_options.SetDisablePassList(disable_passes);
-  }
-
-  void ParsePrintPasses(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--print-passes="));
-    const std::string print_passes = option.substr(strlen("--print-passes=")).data();
-    parser_options->pass_manager_options.SetPrintPassList(print_passes);
-  }
-
-  void ParseDumpCfgPasses(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--dump-cfg-passes="));
-    const std::string dump_passes_string = option.substr(strlen("--dump-cfg-passes=")).data();
-    parser_options->pass_manager_options.SetDumpPassList(dump_passes_string);
-  }
-
-  void ParsePassOptions(const StringPiece& option, ParserOptions* parser_options) {
-    DCHECK(option.starts_with("--pass-options="));
-    const std::string pass_options = option.substr(strlen("--pass-options=")).data();
-    parser_options->pass_manager_options.SetOverriddenPassOptions(pass_options);
-  }
-
-  void ParseDumpInitFailures(const StringPiece& option) {
-    DCHECK(option.starts_with("--dump-init-failures="));
-    std::string file_name = option.substr(strlen("--dump-init-failures=")).data();
-    init_failure_output_.reset(new std::ofstream(file_name));
-    if (init_failure_output_.get() == nullptr) {
-      LOG(ERROR) << "Failed to allocate ofstream";
-    } else if (init_failure_output_->fail()) {
-      LOG(ERROR) << "Failed to open " << file_name << " for writing the initialization "
-                 << "failures.";
-      init_failure_output_.reset();
-    }
-  }
-
-  void ParseSwapFd(const StringPiece& option) {
-    ParseUintOption(option, "--swap-fd", &swap_fd_);
-  }
-
   void ProcessOptions(ParserOptions* parser_options) {
-    image_ = (!image_filename_.empty());
-    if (image_) {
+    boot_image_ = !image_filename_.empty();
+    app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty();
+
+    if (IsAppImage() && IsBootImage()) {
+      Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
+    }
+
+    if (IsBootImage()) {
       // We need the boot image to always be debuggable.
-      parser_options->debuggable = true;
+      compiler_options_->debuggable_ = true;
     }
 
     if (oat_filename_.empty() && oat_fd_ == -1) {
@@ -781,7 +658,7 @@
       android_root_ += android_root_env_var;
     }
 
-    if (!image_ && parser_options->boot_image_filename.empty()) {
+    if (!boot_image_ && parser_options->boot_image_filename.empty()) {
       parser_options->boot_image_filename += android_root_;
       parser_options->boot_image_filename += "/framework/boot.art";
     }
@@ -790,7 +667,7 @@
       boot_image_option_ += parser_options->boot_image_filename;
     }
 
-    if (image_classes_filename_ != nullptr && !image_) {
+    if (image_classes_filename_ != nullptr && !IsBootImage()) {
       Usage("--image-classes should only be used with --image");
     }
 
@@ -802,7 +679,7 @@
       Usage("--image-classes-zip should be used with --image-classes");
     }
 
-    if (compiled_classes_filename_ != nullptr && !image_) {
+    if (compiled_classes_filename_ != nullptr && !IsBootImage()) {
       Usage("--compiled-classes should only be used with --image");
     }
 
@@ -874,44 +751,19 @@
       }
     }
 
-    if (parser_options->compiler_filter_string == nullptr) {
-      parser_options->compiler_filter_string = "speed";
-    }
-
-    CHECK(parser_options->compiler_filter_string != nullptr);
-    if (strcmp(parser_options->compiler_filter_string, "verify-none") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kVerifyNone;
-    } else if (strcmp(parser_options->compiler_filter_string, "interpret-only") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kInterpretOnly;
-    } else if (strcmp(parser_options->compiler_filter_string, "verify-at-runtime") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kVerifyAtRuntime;
-    } else if (strcmp(parser_options->compiler_filter_string, "space") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kSpace;
-    } else if (strcmp(parser_options->compiler_filter_string, "balanced") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kBalanced;
-    } else if (strcmp(parser_options->compiler_filter_string, "speed") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kSpeed;
-    } else if (strcmp(parser_options->compiler_filter_string, "everything") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kEverything;
-    } else if (strcmp(parser_options->compiler_filter_string, "time") == 0) {
-      parser_options->compiler_filter = CompilerOptions::kTime;
-    } else {
-      Usage("Unknown --compiler-filter value %s", parser_options->compiler_filter_string);
-    }
-
     // It they are not set, use default values for inlining settings.
     // TODO: We should rethink the compiler filter. We mostly save
     // time here, which is orthogonal to space.
-    if (parser_options->inline_depth_limit == ParserOptions::kUnsetInlineDepthLimit) {
-      parser_options->inline_depth_limit =
-          (parser_options->compiler_filter == CompilerOptions::kSpace)
+    if (compiler_options_->inline_depth_limit_ == CompilerOptions::kUnsetInlineDepthLimit) {
+      compiler_options_->inline_depth_limit_ =
+          (compiler_options_->compiler_filter_ == CompilerOptions::kSpace)
           // Implementation of the space filter: limit inlining depth.
           ? CompilerOptions::kSpaceFilterInlineDepthLimit
           : CompilerOptions::kDefaultInlineDepthLimit;
     }
-    if (parser_options->inline_max_code_units == ParserOptions::kUnsetInlineMaxCodeUnits) {
-      parser_options->inline_max_code_units =
-          (parser_options->compiler_filter == CompilerOptions::kSpace)
+    if (compiler_options_->inline_max_code_units_ == CompilerOptions::kUnsetInlineMaxCodeUnits) {
+      compiler_options_->inline_max_code_units_ =
+          (compiler_options_->compiler_filter_ == CompilerOptions::kSpace)
           // Implementation of the space filter: limit inlining max code units.
           ? CompilerOptions::kSpaceFilterInlineMaxCodeUnits
           : CompilerOptions::kDefaultInlineMaxCodeUnits;
@@ -927,8 +779,8 @@
       case kX86_64:
       case kMips:
       case kMips64:
-        parser_options->implicit_null_checks = true;
-        parser_options->implicit_so_checks = true;
+        compiler_options_->implicit_null_checks_ = true;
+        compiler_options_->implicit_so_checks_ = true;
         break;
 
       default:
@@ -936,29 +788,7 @@
         break;
     }
 
-    compiler_options_.reset(new CompilerOptions(parser_options->compiler_filter,
-                                                parser_options->huge_method_threshold,
-                                                parser_options->large_method_threshold,
-                                                parser_options->small_method_threshold,
-                                                parser_options->tiny_method_threshold,
-                                                parser_options->num_dex_methods_threshold,
-                                                parser_options->inline_depth_limit,
-                                                parser_options->inline_max_code_units,
-                                                parser_options->include_patch_information,
-                                                parser_options->top_k_profile_threshold,
-                                                parser_options->debuggable,
-                                                parser_options->generate_debug_info,
-                                                parser_options->implicit_null_checks,
-                                                parser_options->implicit_so_checks,
-                                                parser_options->implicit_suspend_checks,
-                                                parser_options->compile_pic,
-                                                verbose_methods_.empty() ?
-                                                    nullptr :
-                                                    &verbose_methods_,
-                                                new PassManagerOptions(
-                                                    parser_options->pass_manager_options),
-                                                init_failure_output_.get(),
-                                                parser_options->abort_on_hard_verifier_error));
+    compiler_options_->verbose_methods_ = verbose_methods_.empty() ? nullptr : &verbose_methods_;
 
     // Done with usage checks, enable watchdog if requested
     if (parser_options->watch_dog_enabled) {
@@ -969,7 +799,7 @@
     key_value_store_.reset(new SafeMap<std::string, std::string>());
   }
 
-  void InsertCompileOptions(int argc, char** argv, ParserOptions* parser_options) {
+  void InsertCompileOptions(int argc, char** argv) {
     std::ostringstream oss;
     for (int i = 0; i < argc; ++i) {
       if (i > 0) {
@@ -983,10 +813,10 @@
     key_value_store_->Put(OatHeader::kDex2OatHostKey, oss.str());
     key_value_store_->Put(
         OatHeader::kPicKey,
-        parser_options->compile_pic ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+        compiler_options_->compile_pic_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
     key_value_store_->Put(
         OatHeader::kDebuggableKey,
-        parser_options->debuggable ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+        compiler_options_->debuggable_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
   }
 
   // Parse the arguments from the command line. In case of an unrecognized option or impossible
@@ -1007,6 +837,7 @@
     }
 
     std::unique_ptr<ParserOptions> parser_options(new ParserOptions());
+    compiler_options_.reset(new CompilerOptions());
 
     for (int i = 0; i < argc; i++) {
       const StringPiece option(argv[i]);
@@ -1064,24 +895,11 @@
         ParseInstructionSetFeatures(option, parser_options.get());
       } else if (option.starts_with("--compiler-backend=")) {
         ParseCompilerBackend(option, parser_options.get());
-      } else if (option.starts_with("--compiler-filter=")) {
-        parser_options->compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
-      } else if (option == "--compile-pic") {
-        parser_options->compile_pic = true;
-      } else if (option.starts_with("--huge-method-max=")) {
-        ParseHugeMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--large-method-max=")) {
-        ParseLargeMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--small-method-max=")) {
-        ParseSmallMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--tiny-method-max=")) {
-        ParseTinyMethodMax(option, parser_options.get());
-      } else if (option.starts_with("--num-dex-methods=")) {
-        ParseNumDexMethods(option, parser_options.get());
-      } else if (option.starts_with("--inline-depth-limit=")) {
-        ParseInlineDepthLimit(option, parser_options.get());
-      } else if (option.starts_with("--inline-max-code-units=")) {
-        ParseInlineMaxCodeUnits(option, parser_options.get());
+      } else if (option.starts_with("--profile-file=")) {
+        profile_file_ = option.substr(strlen("--profile-file=")).data();
+        VLOG(compiler) << "dex2oat: profile file is " << profile_file_;
+      } else if (option == "--no-profile-file") {
+        // No profile
       } else if (option == "--host") {
         is_host_ = true;
       } else if (option == "--runtime-arg") {
@@ -1102,52 +920,20 @@
         dump_cfg_append_ = true;
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
-      } else if (option == "--generate-debug-info" || option == "-g") {
-        parser_options->generate_debug_info = true;
-      } else if (option == "--no-generate-debug-info") {
-        parser_options->generate_debug_info = false;
-      } else if (option == "--debuggable") {
-        parser_options->debuggable = true;
-        parser_options->generate_debug_info = true;
-      } else if (option.starts_with("--profile-file=")) {
-        profile_file_ = option.substr(strlen("--profile-file=")).data();
-        VLOG(compiler) << "dex2oat: profile file is " << profile_file_;
-      } else if (option == "--no-profile-file") {
-        // No profile
-      } else if (option.starts_with("--top-k-profile-threshold=")) {
-        ParseDouble(option.data(), '=', 0.0, 100.0, &parser_options->top_k_profile_threshold);
-      } else if (option == "--print-pass-names") {
-        parser_options->pass_manager_options.SetPrintPassNames(true);
-      } else if (option.starts_with("--disable-passes=")) {
-        ParseDisablePasses(option, parser_options.get());
-      } else if (option.starts_with("--print-passes=")) {
-        ParsePrintPasses(option, parser_options.get());
-      } else if (option == "--print-all-passes") {
-        parser_options->pass_manager_options.SetPrintAllPasses();
-      } else if (option.starts_with("--dump-cfg-passes=")) {
-        ParseDumpCfgPasses(option, parser_options.get());
-      } else if (option == "--print-pass-options") {
-        parser_options->pass_manager_options.SetPrintPassOptions(true);
-      } else if (option.starts_with("--pass-options=")) {
-        ParsePassOptions(option, parser_options.get());
-      } else if (option == "--include-patch-information") {
-        parser_options->include_patch_information = true;
-      } else if (option == "--no-include-patch-information") {
-        parser_options->include_patch_information = false;
+      } else if (option.starts_with("--swap-file=")) {
+        swap_file_name_ = option.substr(strlen("--swap-file=")).data();
+      } else if (option.starts_with("--swap-fd=")) {
+        ParseUintOption(option, "--swap-fd", &swap_fd_, Usage);
+      } else if (option.starts_with("--app-image-file=")) {
+        app_image_file_name_ = option.substr(strlen("--app-image-file=")).data();
+      } else if (option.starts_with("--app-image-fd=")) {
+        ParseUintOption(option, "--app-image-fd", &app_image_fd_, Usage);
       } else if (option.starts_with("--verbose-methods=")) {
         // TODO: rather than switch off compiler logging, make all VLOG(compiler) messages
         //       conditional on having verbost methods.
         gLogVerbosity.compiler = false;
         Split(option.substr(strlen("--verbose-methods=")).ToString(), ',', &verbose_methods_);
-      } else if (option.starts_with("--dump-init-failures=")) {
-        ParseDumpInitFailures(option);
-      } else if (option.starts_with("--swap-file=")) {
-        swap_file_name_ = option.substr(strlen("--swap-file=")).data();
-      } else if (option.starts_with("--swap-fd=")) {
-        ParseSwapFd(option);
-      } else if (option == "--abort-on-hard-verifier-error") {
-        parser_options->abort_on_hard_verifier_error = true;
-      } else {
+      } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
     }
@@ -1155,7 +941,7 @@
     ProcessOptions(parser_options.get());
 
     // Insert some compiler things.
-    InsertCompileOptions(argc, argv, parser_options.get());
+    InsertCompileOptions(argc, argv);
   }
 
   // Check whether the oat output file is writable, and open it for later. Also open a swap file,
@@ -1203,7 +989,6 @@
                                       // released immediately.
       unlink(swap_file_name_.c_str());
     }
-
     return true;
   }
 
@@ -1241,11 +1026,11 @@
       runtime_options.push_back(std::make_pair(runtime_args_[i], nullptr));
     }
 
-    verification_results_ = new VerificationResults(compiler_options_.get());
+    verification_results_.reset(new VerificationResults(compiler_options_.get()));
     callbacks_.reset(new QuickCompilerCallbacks(
-        verification_results_,
+        verification_results_.get(),
         &method_inliner_map_,
-        image_ ?
+        IsBootImage() ?
             CompilerCallbacks::CallbackMode::kCompileBootImage :
             CompilerCallbacks::CallbackMode::kCompileApp));
     runtime_options.push_back(std::make_pair("compilercallbacks", callbacks_.get()));
@@ -1255,7 +1040,7 @@
     // Only allow no boot image for the runtime if we're compiling one. When we compile an app,
     // we don't want fallback mode, it will abort as we do not push a boot classpath (it might
     // have been stripped in preopting, anyways).
-    if (!image_) {
+    if (!IsBootImage()) {
       runtime_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
     }
     // Disable libsigchain. We don't don't need it during compilation and it prevents us
@@ -1294,7 +1079,7 @@
             "': " << error_msg;
         return false;
       }
-    } else if (image_) {
+    } else if (IsBootImage()) {
       image_classes_.reset(new std::unordered_set<std::string>);
     }
     // If --compiled-classes was specified, calculate the full list of classes to compile in the
@@ -1401,12 +1186,13 @@
       }
       ScopedObjectAccess soa(self);
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
-          class_linker->RegisterDexFile(*dex_file)));
+          class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
+      dex_file->CreateTypeLookupTable();
     }
 
     // If we use a swap file, ensure we are above the threshold to make it necessary.
     if (swap_fd_ != -1) {
-      if (!UseSwap(image_, dex_files_)) {
+      if (!UseSwap(IsBootImage(), dex_files_)) {
         close(swap_fd_);
         swap_fd_ = -1;
         VLOG(compiler) << "Decided to run without swap.";
@@ -1420,7 +1206,7 @@
      * If we're not in interpret-only or verify-none mode, go ahead and compile small applications.
      * Don't bother to check if we're doing the image.
      */
-    if (!image_ &&
+    if (!IsBootImage() &&
         compiler_options_->IsCompilationEnabled() &&
         compiler_kind_ == Compiler::kQuick) {
       size_t num_methods = 0;
@@ -1468,25 +1254,26 @@
       class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
     }
 
-    driver_ = new CompilerDriver(compiler_options_.get(),
-                                 verification_results_,
-                                 &method_inliner_map_,
-                                 compiler_kind_,
-                                 instruction_set_,
-                                 instruction_set_features_.get(),
-                                 image_,
-                                 image_classes_.release(),
-                                 compiled_classes_.release(),
-                                 nullptr,
-                                 thread_count_,
-                                 dump_stats_,
-                                 dump_passes_,
-                                 dump_cfg_file_name_,
-                                 dump_cfg_append_,
-                                 compiler_phases_timings_.get(),
-                                 swap_fd_,
-                                 profile_file_);
+    driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                     verification_results_.get(),
+                                     &method_inliner_map_,
+                                     compiler_kind_,
+                                     instruction_set_,
+                                     instruction_set_features_.get(),
+                                     IsBootImage(),
+                                     image_classes_.release(),
+                                     compiled_classes_.release(),
+                                     nullptr,
+                                     thread_count_,
+                                     dump_stats_,
+                                     dump_passes_,
+                                     dump_cfg_file_name_,
+                                     dump_cfg_append_,
+                                     compiler_phases_timings_.get(),
+                                     swap_fd_,
+                                     profile_file_));
 
+    driver_->SetDexFilesForOatFile(dex_files_);
     driver_->CompileAll(class_loader, dex_files_, timings_);
   }
 
@@ -1568,7 +1355,7 @@
       uint32_t image_file_location_oat_checksum = 0;
       uintptr_t image_file_location_oat_data_begin = 0;
       int32_t image_patch_delta = 0;
-      if (image_) {
+      if (IsImage()) {
         PrepareImageWriter(image_base_);
       } else {
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
@@ -1587,13 +1374,13 @@
       oat_writer.reset(new OatWriter(dex_files_, image_file_location_oat_checksum,
                                      image_file_location_oat_data_begin,
                                      image_patch_delta,
-                                     driver_,
+                                     driver_.get(),
                                      image_writer_.get(),
                                      timings_,
                                      key_value_store_.get()));
     }
 
-    if (image_) {
+    if (IsImage()) {
       // The OatWriter constructor has already updated offsets in methods and we need to
       // prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
@@ -1618,7 +1405,7 @@
 
   // If we are compiling an image, invoke the image creation routine. Else just skip.
   bool HandleImage() {
-    if (image_) {
+    if (IsImage()) {
       TimingLogger::ScopedTiming t("dex2oat ImageWriter", timings_);
       if (!CreateImageFile()) {
         return false;
@@ -1701,7 +1488,15 @@
   }
 
   bool IsImage() const {
-    return image_;
+    return IsAppImage() || IsBootImage();
+  }
+
+  bool IsAppImage() const {
+    return app_image_;
+  }
+
+  bool IsBootImage() const {
+    return boot_image_;
   }
 
   bool IsHost() const {
@@ -1776,22 +1571,21 @@
       LOG(ERROR) << "Failed to create runtime";
       return false;
     }
-    Runtime* runtime = Runtime::Current();
-    runtime->SetInstructionSet(instruction_set_);
+    runtime_.reset(Runtime::Current());
+    runtime_->SetInstructionSet(instruction_set_);
     for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
       Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
-      if (!runtime->HasCalleeSaveMethod(type)) {
-        runtime->SetCalleeSaveMethod(runtime->CreateCalleeSaveMethod(), type);
+      if (!runtime_->HasCalleeSaveMethod(type)) {
+        runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(), type);
       }
     }
-    runtime->GetClassLinker()->FixupDexCaches(runtime->GetResolutionMethod());
+    runtime_->GetClassLinker()->FixupDexCaches(runtime_->GetResolutionMethod());
 
     // Initialize maps for unstarted runtime. This needs to be here, as running clinits needs this
     // set up.
     interpreter::UnstartedRuntime::Initialize();
 
-    runtime->GetClassLinker()->RunRootClinits();
-    runtime_ = runtime;
+    runtime_->GetClassLinker()->RunRootClinits();
 
     return true;
   }
@@ -1804,7 +1598,10 @@
   bool CreateImageFile()
       REQUIRES(!Locks::mutator_lock_) {
     CHECK(image_writer_ != nullptr);
-    if (!image_writer_->Write(image_filename_, oat_unstripped_, oat_location_)) {
+    if (!image_writer_->Write(app_image_fd_,
+                              IsBootImage() ? image_filename_ : app_image_file_name_,
+                              oat_unstripped_,
+                              oat_location_)) {
       LOG(ERROR) << "Failed to create image file " << image_filename_;
       return false;
     }
@@ -1813,8 +1610,8 @@
     // Destroy ImageWriter before doing FixupElf.
     image_writer_.reset();
 
-    // Do not fix up the ELF file if we are --compile-pic
-    if (!compiler_options_->GetCompilePic()) {
+    // Do not fix up the ELF file if we are --compile-pic or compiing the app image
+    if (!compiler_options_->GetCompilePic() && IsBootImage()) {
       std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_unstripped_.c_str()));
       if (oat_file.get() == nullptr) {
         PLOG(ERROR) << "Failed to open ELF file: " << oat_unstripped_;
@@ -1940,9 +1737,7 @@
 
   std::unique_ptr<SafeMap<std::string, std::string> > key_value_store_;
 
-  // Not a unique_ptr as we want to just exit on non-debug builds, not bringing the compiler down
-  // in an orderly fashion. The destructor takes care of deleting this.
-  VerificationResults* verification_results_;
+  std::unique_ptr<VerificationResults> verification_results_;
 
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
@@ -1950,9 +1745,7 @@
   // Ownership for the class path files.
   std::vector<std::unique_ptr<const DexFile>> class_path_files_;
 
-  // Not a unique_ptr as we want to just exit on non-debug builds, not bringing the runtime down
-  // in an orderly fashion. The destructor takes care of deleting this.
-  Runtime* runtime_;
+  std::unique_ptr<Runtime> runtime_;
 
   size_t thread_count_;
   uint64_t start_ns_;
@@ -1980,17 +1773,16 @@
   std::unique_ptr<std::unordered_set<std::string>> image_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_classes_;
   std::unique_ptr<std::unordered_set<std::string>> compiled_methods_;
-  bool image_;
-  std::unique_ptr<ImageWriter> image_writer_;
+  bool app_image_;
+  bool boot_image_;
   bool is_host_;
   std::string android_root_;
   std::vector<const DexFile*> dex_files_;
   std::vector<jobject> dex_caches_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
-  // Not a unique_ptr as we want to just exit on non-debug builds, not bringing the driver down
-  // in an orderly fashion. The destructor takes care of deleting this.
-  CompilerDriver* driver_;
+  std::unique_ptr<ImageWriter> image_writer_;
+  std::unique_ptr<CompilerDriver> driver_;
 
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
@@ -2001,10 +1793,11 @@
   bool dump_cfg_append_;
   std::string swap_file_name_;
   int swap_fd_;
+  std::string app_image_file_name_;
+  int app_image_fd_;
   std::string profile_file_;  // Profile file to use
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
-  std::unique_ptr<std::ostream> init_failure_output_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
@@ -2130,7 +1923,7 @@
   //   3) Compiling with --host
   //   4) Compiling on the host (not a target build)
   // Otherwise, print a stripped command line.
-  if (kIsDebugBuild || dex2oat.IsImage() || dex2oat.IsHost() || !kIsTargetBuild) {
+  if (kIsDebugBuild || dex2oat.IsBootImage() || dex2oat.IsHost() || !kIsTargetBuild) {
     LOG(INFO) << CommandLine();
   } else {
     LOG(INFO) << StrippedCommandLine();
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 282db5d..52e6c02 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -775,7 +775,7 @@
     // case Instruction::k35ms:       // [opt] invoke-virtual+super
     // case Instruction::k35mi:       // [opt] inline invoke
       {
-        u4 arg[5];
+        u4 arg[Instruction::kMaxVarArgRegs];
         pDecInsn->GetVarArgs(arg);
         fputs(" {", gOutFile);
         for (int i = 0, n = pDecInsn->VRegA(); i < n; i++) {
@@ -788,6 +788,21 @@
         fprintf(gOutFile, "}, %s", indexBuf);
       }
       break;
+    case Instruction::k25x:        // op vC, {vD, vE, vF, vG} (B: count)
+      {
+        u4 arg[Instruction::kMaxVarArgRegs25x];
+        pDecInsn->GetAllArgs25x(arg);
+        fprintf(gOutFile, " v%d, {", arg[0]);
+        for (int i = 0, n = pDecInsn->VRegB(); i < n; i++) {
+          if (i == 0) {
+            fprintf(gOutFile, "v%d", arg[Instruction::kLambdaVirtualRegisterWidth + i]);
+          } else {
+            fprintf(gOutFile, ", v%d", arg[Instruction::kLambdaVirtualRegisterWidth + i]);
+          }
+        }  // for
+        fputc('}', gOutFile);
+      }
+      break;
     case Instruction::k3rc:        // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
     // NOT SUPPORTED:
     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index c55d285..c2f23aa 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -58,9 +58,10 @@
   // 0, 1, movci
   { kRTypeMask, 2, "srl", "DTA", },
   { kRTypeMask, 3, "sra", "DTA", },
-  { kRTypeMask, 4, "sllv", "DTS", },
-  { kRTypeMask, 6, "srlv", "DTS", },
-  { kRTypeMask, 7, "srav", "DTS", },
+  { kRTypeMask | (0x1f << 6), 4, "sllv", "DTS", },
+  { kRTypeMask | (0x1f << 6), 6, "srlv", "DTS", },
+  { kRTypeMask | (0x1f << 6), (1 << 6) | 6, "rotrv", "DTS", },
+  { kRTypeMask | (0x1f << 6), 7, "srav", "DTS", },
   { kRTypeMask, 8, "jr", "S", },
   { kRTypeMask | (0x1f << 11), 9 | (31 << 11), "jalr", "S", },  // rd = 31 is implicit.
   { kRTypeMask | (0x1f << 11), 9, "jr", "S", },  // rd = 0 is implicit.
@@ -74,9 +75,10 @@
   { kRTypeMask, 17, "mthi", "S", },
   { kRTypeMask, 18, "mflo", "D", },
   { kRTypeMask, 19, "mtlo", "S", },
-  { kRTypeMask, 20, "dsllv", "DTS", },
-  { kRTypeMask, 22, "dsrlv", "DTS", },
-  { kRTypeMask, 23, "dsrav", "DTS", },
+  { kRTypeMask | (0x1f << 6), 20, "dsllv", "DTS", },
+  { kRTypeMask | (0x1f << 6), 22, "dsrlv", "DTS", },
+  { kRTypeMask | (0x1f << 6), (1 << 6) | 22, "drotrv", "DTS", },
+  { kRTypeMask | (0x1f << 6), 23, "dsrav", "DTS", },
   { kRTypeMask | (0x1f << 6), 24, "mult", "ST", },
   { kRTypeMask | (0x1f << 6), 25, "multu", "ST", },
   { kRTypeMask | (0x1f << 6), 26, "div", "ST", },
@@ -99,13 +101,14 @@
   { kRTypeMask, 46, "dsub", "DST", },
   { kRTypeMask, 47, "dsubu", "DST", },
   // TODO: tge[u], tlt[u], teg, tne
-  { kRTypeMask, 56, "dsll", "DTA", },
-  { kRTypeMask, 58, "dsrl", "DTA", },
-  { kRTypeMask, 59, "dsra", "DTA", },
-  { kRTypeMask, 60, "dsll32", "DTA", },
-  { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", },
-  { kRTypeMask, 62, "dsrl32", "DTA", },
-  { kRTypeMask, 63, "dsra32", "DTA", },
+  { kRTypeMask | (0x1f << 21), 56, "dsll", "DTA", },
+  { kRTypeMask | (0x1f << 21), 58, "dsrl", "DTA", },
+  { kRTypeMask | (0x1f << 21), (1 << 21) | 58, "drotr", "DTA", },
+  { kRTypeMask | (0x1f << 21), 59, "dsra", "DTA", },
+  { kRTypeMask | (0x1f << 21), 60, "dsll32", "DTA", },
+  { kRTypeMask | (0x1f << 21), 62, "dsrl32", "DTA", },
+  { kRTypeMask | (0x1f << 21), (1 << 21) | 62, "drotr32", "DTA", },
+  { kRTypeMask | (0x1f << 21), 63, "dsra32", "DTA", },
 
   // SPECIAL0
   { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
@@ -280,6 +283,7 @@
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
+  { kJTypeMask, 50u << kOpcodeShift, "bc", "P" },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
   { kITypeMask | (0x1f << 21), 54u << kOpcodeShift, "jic", "Ti" },
   { kITypeMask | (1 << 21), (54u << kOpcodeShift) | (1 << 21), "beqzc", "Sb" },  // TODO: de-dup?
@@ -290,6 +294,7 @@
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
   { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
+  { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
   { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
   { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
@@ -432,6 +437,22 @@
               }
             }
             break;
+          case 'P':  // 26-bit offset in bc.
+            {
+              int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1);
+              offset <<= 2;
+              offset += 4;
+              args << FormatInstructionPointer(instr_ptr + offset);
+              args << StringPrintf("  ; %+d", offset);
+            }
+            break;
+          case 'p':  // 19-bit offset in addiupc.
+            {
+              int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1);
+              args << offset << "  ; move r" << rs << ", ";
+              args << FormatInstructionPointer(instr_ptr + (offset << 2));
+            }
+            break;
           case 'S': args << 'r' << rs; break;
           case 's': args << 'f' << rs; break;
           case 'T': args << 'r' << rt; break;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index e248604..ea61b43 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -49,10 +49,12 @@
 #include "mirror/object_array-inl.h"
 #include "oat.h"
 #include "oat_file-inl.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
+#include "stack_map.h"
 #include "ScopedLocalRef.h"
 #include "thread_list.h"
 #include "verifier/dex_gc_map.h"
@@ -1417,8 +1419,10 @@
                                          uint32_t method_access_flags) {
     if ((method_access_flags & kAccNative) == 0) {
       ScopedObjectAccess soa(Thread::Current());
+      Runtime* const runtime = Runtime::Current();
       Handle<mirror::DexCache> dex_cache(
-          hs->NewHandle(Runtime::Current()->GetClassLinker()->RegisterDexFile(*dex_file)));
+          hs->NewHandle(runtime->GetClassLinker()->RegisterDexFile(*dex_file,
+                                                                   runtime->GetLinearAlloc())));
       DCHECK(options_.class_loader_ != nullptr);
       return verifier::MethodVerifier::VerifyMethodAndDump(
           soa.Self(), vios, dex_method_idx, dex_file, dex_cache, *options_.class_loader_,
@@ -1563,13 +1567,15 @@
     }
     os << "\n";
 
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Runtime* const runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
     std::string image_filename = image_space_.GetImageFilename();
     std::string oat_location = ImageHeader::GetOatLocationFromImageLocation(image_filename);
     os << "OAT LOCATION: " << oat_location;
     os << "\n";
     std::string error_msg;
-    const OatFile* oat_file = class_linker->FindOpenedOatFileFromOatLocation(oat_location);
+    const OatFile* oat_file = runtime->GetOatFileManager().FindOpenedOatFileFromOatLocation(
+        oat_location);
     if (oat_file == nullptr) {
       oat_file = OatFile::Open(oat_location, oat_location,
                                nullptr, nullptr, false, nullptr,
@@ -1594,7 +1600,7 @@
     os << "OBJECTS:\n" << std::flush;
 
     // Loop through all the image spaces and dump their objects.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
+    gc::Heap* heap = runtime->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
     Thread* self = Thread::Current();
     {
@@ -1958,24 +1964,28 @@
     DCHECK(method != nullptr);
     const auto image_pointer_size =
         InstructionSetPointerSize(state->oat_dumper_->GetOatInstructionSet());
+    const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
+    const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
+    OatQuickMethodHeader* method_header = reinterpret_cast<OatQuickMethodHeader*>(
+        reinterpret_cast<uintptr_t>(quick_oat_code_begin) - sizeof(OatQuickMethodHeader));
     if (method->IsNative()) {
-      DCHECK(method->GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
-      DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
+      if (!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(quick_oat_code_begin)) {
+        DCHECK(method_header->GetNativeGcMap() == nullptr) << PrettyMethod(method);
+        DCHECK(method_header->GetMappingTable() == nullptr) << PrettyMethod(method);
+      }
       bool first_occurrence;
-      const void* quick_oat_code = state->GetQuickOatCodeBegin(method);
       uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
-      state->ComputeOatSize(quick_oat_code, &first_occurrence);
+      state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
       if (first_occurrence) {
         state->stats_.native_to_managed_code_bytes += quick_oat_code_size;
       }
-      if (quick_oat_code != method->GetEntryPointFromQuickCompiledCodePtrSize(image_pointer_size)) {
-        indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code);
+      if (quick_oat_code_begin !=
+            method->GetEntryPointFromQuickCompiledCodePtrSize(image_pointer_size)) {
+        indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code_begin);
       }
     } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
       method->IsResolutionMethod() || method->IsImtConflictMethod() ||
       method->IsImtUnimplementedMethod() || method->IsClassInitializer()) {
-      DCHECK(method->GetNativeGcMap(image_pointer_size) == nullptr) << PrettyMethod(method);
-      DCHECK(method->GetMappingTable(image_pointer_size) == nullptr) << PrettyMethod(method);
     } else {
       const DexFile::CodeItem* code_item = method->GetCodeItem();
       size_t dex_instruction_bytes = code_item->insns_size_in_code_units_ * 2;
@@ -1983,29 +1993,27 @@
 
       bool first_occurrence;
       size_t gc_map_bytes = state->ComputeOatSize(
-          method->GetNativeGcMap(image_pointer_size), &first_occurrence);
+          method_header->GetNativeGcMap(), &first_occurrence);
       if (first_occurrence) {
         state->stats_.gc_map_bytes += gc_map_bytes;
       }
 
       size_t pc_mapping_table_bytes = state->ComputeOatSize(
-          method->GetMappingTable(image_pointer_size), &first_occurrence);
+          method_header->GetMappingTable(), &first_occurrence);
       if (first_occurrence) {
         state->stats_.pc_mapping_table_bytes += pc_mapping_table_bytes;
       }
 
       size_t vmap_table_bytes = 0u;
-      if (!method->IsOptimized(image_pointer_size)) {
+      if (!method_header->IsOptimized()) {
         // Method compiled with the optimizing compiler have no vmap table.
         vmap_table_bytes = state->ComputeOatSize(
-            method->GetVmapTable(image_pointer_size), &first_occurrence);
+            method_header->GetVmapTable(), &first_occurrence);
         if (first_occurrence) {
           state->stats_.vmap_table_bytes += vmap_table_bytes;
         }
       }
 
-      const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
-      const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
       uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
       state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
       if (first_occurrence) {
@@ -2394,13 +2402,13 @@
   // Need to register dex files to get a working dex cache.
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = runtime->GetClassLinker();
-  class_linker->RegisterOatFile(oat_file);
+  runtime->GetOatFileManager().RegisterOatFile(std::unique_ptr<const OatFile>(oat_file));
   std::vector<const DexFile*> class_path;
   for (const OatFile::OatDexFile* odf : oat_file->GetOatDexFiles()) {
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(odf, &error_msg);
     CHECK(dex_file != nullptr) << error_msg;
-    class_linker->RegisterDexFile(*dex_file);
+    class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
     class_path.push_back(dex_file);
   }
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 8d81f2a..1fdffe3 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -25,6 +25,7 @@
   barrier.cc \
   base/allocator.cc \
   base/arena_allocator.cc \
+  base/arena_bit_vector.cc \
   base/bit_vector.cc \
   base/hex_dump.cc \
   base/logging.cc \
@@ -46,6 +47,7 @@
   dex_file_verifier.cc \
   dex_instruction.cc \
   elf_file.cc \
+  fault_handler.cc \
   gc/allocation_record.cc \
   gc/allocator/dlmalloc.cc \
   gc/allocator/rosalloc.cc \
@@ -154,11 +156,14 @@
   oat.cc \
   oat_file.cc \
   oat_file_assistant.cc \
+  oat_file_manager.cc \
+  oat_quick_method_header.cc \
   object_lock.cc \
   offsets.cc \
   os_linux.cc \
   parsed_options.cc \
   primitive.cc \
+  profiler.cc \
   quick_exception_handler.cc \
   quick/inline_method_analyser.cc \
   reference_table.cc \
@@ -173,8 +178,7 @@
   thread_pool.cc \
   trace.cc \
   transaction.cc \
-  profiler.cc \
-  fault_handler.cc \
+  type_lookup_table.cc \
   utf.cc \
   utils.cc \
   verifier/dex_gc_map.cc \
@@ -203,7 +207,6 @@
   arch/x86/registers_x86.cc \
   arch/x86_64/registers_x86_64.cc \
   entrypoints/entrypoint_utils.cc \
-  entrypoints/interpreter/interpreter_entrypoints.cc \
   entrypoints/jni/jni_entrypoints.cc \
   entrypoints/math_entrypoints.cc \
   entrypoints/quick/quick_alloc_entrypoints.cc \
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index 4a45f49..d6ba304 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -39,7 +39,7 @@
     runtime->SetInstructionSet(isa);
     ArtMethod* save_method = runtime->CreateCalleeSaveMethod();
     runtime->SetCalleeSaveMethod(save_method, type);
-    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    QuickMethodFrameInfo frame_info =  runtime->GetRuntimeMethodFrameInfo(save_method);
     EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
         << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
         << frame_info.FpSpillMask() << std::dec;
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 8f6b1ff..9cbec1e 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -16,9 +16,9 @@
 
 #include "context_arm.h"
 
-#include "art_method-inl.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace arm {
@@ -37,23 +37,21 @@
   arg0_ = 0;
 }
 
-void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+void ArmContext::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& frame_info) {
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
   uint32_t core_regs = frame_info.CoreSpillMask();
   DCHECK_EQ(0u, core_regs & (static_cast<uint32_t>(-1) << kNumberOfCoreRegisters));
   for (uint32_t core_reg : HighToLowBits(core_regs)) {
-    gprs_[core_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()));
 
   // FP registers come second, from the highest down to the lowest.
   for (uint32_t fp_reg : HighToLowBits(frame_info.FpSpillMask())) {
-    fprs_[fp_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    fprs_[fp_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()) + POPCOUNT(frame_info.FpSpillMask()));
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index ea31055..2623ee9 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -35,7 +35,7 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) OVERRIDE;
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 1599025..76c7c4f 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -47,12 +46,7 @@
 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index d09631b..631b784 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -891,7 +891,110 @@
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
+    // r2, r3, r12: free.
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    cbz    r2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
+                                                              // Check class status.
+    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
+    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    r3, r3, r3
+    add    r2, r2, r3
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable
+    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
+    bne    .Lart_quick_alloc_object_rosalloc_slow_path
+
+    ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
+                                                              // allocation stack has room.
+                                                              // TODO: consider using ldrd.
+    ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
+    cmp    r3, r12
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+
+    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3)
+    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
+                                                              // local allocation
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Compute the rosalloc bracket index
+                                                              // from the size.
+                                                              // Align up the size by the rosalloc
+                                                              // bracket quantum size and divide
+                                                              // by the quantum size and subtract
+                                                              // by 1. This code is a shorter but
+                                                              // equivalent version.
+    sub    r3, r3, #1
+    lsr    r3, r3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
+                                                              // Load the rosalloc run (r12)
+    add    r12, r9, r3, lsl #POINTER_SIZE_SHIFT
+    ldr    r12, [r12, #THREAD_ROSALLOC_RUNS_OFFSET]
+                                                              // Load the free list head (r3). This
+                                                              // will be the return val.
+    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
+    cbz    r3, .Lart_quick_alloc_object_rosalloc_slow_path
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
+    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
+                                                              // and update the list head with the
+                                                              // next pointer.
+    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
+                                                              // Store the class pointer in the
+                                                              // header. This also overwrites the
+                                                              // next pointer. The offsets are
+                                                              // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF r2
+    str    r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
+                                                              // Push the new object onto the thread
+                                                              // local allocation stack and
+                                                              // increment the thread local
+                                                              // allocation stack top.
+    ldr    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
+    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
+    str    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
+                                                              // Decrement the size of the free list
+    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+    sub    r1, #1
+                                                              // TODO: consider combining this store
+                                                              // and the list head store above using
+                                                              // strd.
+    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that the code after this allocation
+                                                              // site will see the right values in
+                                                              // the fields of the class.
+                                                              // Alternatively we could use "ishst"
+                                                              // if we use load-acquire for the
+                                                              // class status load.)
+    dmb    ish
+    mov    r0, r3                                             // Set the return value and return.
+    bx     lr
+
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
+    mov    r2, r9                     @ pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_rosalloc
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 4477631..d5d1ec7 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -18,9 +18,9 @@
 
 #include "context_arm64.h"
 
-#include "art_method-inl.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace arm64 {
@@ -39,21 +39,19 @@
   arg0_ = 0;
 }
 
-void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+void Arm64Context::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& frame_info) {
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
   for (uint32_t core_reg : HighToLowBits(frame_info.CoreSpillMask())) {
-    gprs_[core_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()));
 
   // FP registers come second, from the highest down to the lowest.
   for (uint32_t fp_reg : HighToLowBits(frame_info.FpSpillMask())) {
-    fprs_[fp_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    fprs_[fp_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()) + POPCOUNT(frame_info.FpSpillMask()));
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 11314e0..105e784 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -35,7 +35,7 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) OVERRIDE;
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index e9c816f..371cbb2 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -30,12 +29,7 @@
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index be5a15e..9ccabad 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1437,7 +1437,107 @@
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    cbz    x2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
+                                                              // Check class status.
+    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
+    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    x3, x3, x3
+    add    x2, x2, x3
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable
+    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tst    x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
+    bne    .Lart_quick_alloc_object_rosalloc_slow_path
+    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
+                                                              // allocation stack has room.
+                                                              // ldp won't work due to large offset.
+    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
+    cmp    x3, x4
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+    ldr    w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x3)
+    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
+                                                              // local allocation
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Compute the rosalloc bracket index
+                                                              // from the size.
+                                                              // Align up the size by the rosalloc
+                                                              // bracket quantum size and divide
+                                                              // by the quantum size and subtract
+                                                              // by 1. This code is a shorter but
+                                                              // equivalent version.
+    sub    x3, x3, #1
+    lsr    x3, x3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
+                                                              // Load the rosalloc run (x4)
+    add    x4, xSELF, x3, lsl #POINTER_SIZE_SHIFT
+    ldr    x4, [x4, #THREAD_ROSALLOC_RUNS_OFFSET]
+                                                              // Load the free list head (x3). This
+                                                              // will be the return val.
+    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
+    cbz    x3, .Lart_quick_alloc_object_rosalloc_slow_path
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
+                                                              // and update the list head with the
+                                                              // next pointer.
+    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
+                                                              // Store the class pointer in the
+                                                              // header. This also overwrites the
+                                                              // next pointer. The offsets are
+                                                              // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF w2
+    str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+                                                              // Push the new object onto the thread
+                                                              // local allocation stack and
+                                                              // increment the thread local
+                                                              // allocation stack top.
+    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
+    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
+    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
+                                                              // Decrement the size of the free list
+    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+    sub    x1, x1, #1
+                                                              // TODO: consider combining this store
+                                                              // and the list head store above using
+                                                              // strd.
+    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that the code after this allocation
+                                                              // site will see the right values in
+                                                              // the fields of the class.
+                                                              // Alternatively we could use "ishst"
+                                                              // if we use load-acquire for the
+                                                              // class status load.)
+    dmb    ish
+    mov    x0, x3                                             // Set the return value and return.
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME      // save callee saves in case of GC
+    mov    x2, xSELF                       // pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_rosalloc
 
     /*
      * Called by managed code when the thread has been asked to suspend.
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 9af7c04..a500648 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -25,7 +25,7 @@
 
 namespace art {
 
-class StackVisitor;
+class QuickMethodFrameInfo;
 
 // Representation of a thread's context on the executing machine, used to implement long jumps in
 // the quick stack frame layout.
@@ -39,10 +39,18 @@
   // Re-initializes the registers for context re-use.
   virtual void Reset() = 0;
 
+  static uintptr_t* CalleeSaveAddress(uint8_t* frame, int num, size_t frame_size) {
+    // Callee saves are held at the top of the frame
+    uint8_t* save_addr = frame + frame_size - ((num + 1) * sizeof(void*));
+#if defined(__i386__) || defined(__x86_64__)
+    save_addr -= sizeof(void*);  // account for return address
+#endif
+    return reinterpret_cast<uintptr_t*>(save_addr);
+  }
+
   // Reads values from callee saves in the given frame. The frame also holds
   // the method that holds the layout.
-  virtual void FillCalleeSaves(const StackVisitor& fr)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) = 0;
 
   // Sets the stack pointer value.
   virtual void SetSP(uintptr_t new_sp) = 0;
diff --git a/runtime/arch/instruction_set.h b/runtime/arch/instruction_set.h
index 9cfd2eb..ff9c0b3 100644
--- a/runtime/arch/instruction_set.h
+++ b/runtime/arch/instruction_set.h
@@ -107,6 +107,22 @@
   }
 }
 
+static inline bool IsValidInstructionSet(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+    case kArm64:
+    case kX86:
+    case kX86_64:
+    case kMips:
+    case kMips64:
+      return true;
+    case kNone:
+    default:
+      return false;
+  }
+}
+
 size_t GetInstructionSetAlignment(InstructionSet isa);
 
 static inline bool Is64BitInstructionSet(InstructionSet isa) {
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 08ab356..375a03a 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -16,7 +16,6 @@
 
 #include "context_mips.h"
 
-#include "art_method-inl.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
 
@@ -29,29 +28,27 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[RA] = &ra_;
+  gprs_[T9] = &t9_;
   gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = MipsContext::kBadGprBase + SP;
-  ra_ = MipsContext::kBadGprBase + RA;
+  t9_ = MipsContext::kBadGprBase + T9;
   arg0_ = 0;
 }
 
-void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+void MipsContext::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& frame_info) {
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
   for (uint32_t core_reg : HighToLowBits(frame_info.CoreSpillMask())) {
-    gprs_[core_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()));
 
   // FP registers come second, from the highest down to the lowest.
   for (uint32_t fp_reg : HighToLowBits(frame_info.FpSpillMask())) {
-    fprs_[fp_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    fprs_[fp_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()) + POPCOUNT(frame_info.FpSpillMask()));
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 0affe53..7dcff63 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -34,14 +34,14 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) OVERRIDE;
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     SetGPR(SP, new_sp);
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    SetGPR(RA, new_pc);
+    SetGPR(T9, new_pc);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
@@ -86,9 +86,10 @@
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfFRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame, as
-  // well as the first argument.
-  uintptr_t sp_, ra_, arg0_;
+  // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
+  // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+  // also need the first argument for single-frame deopt.
+  uintptr_t sp_, t9_, arg0_;
 };
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 6721e54..59421dd 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -15,7 +15,6 @@
  */
 
 #include "atomic.h"
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -59,12 +58,7 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index ba58c3f..0691f2a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -374,7 +374,7 @@
     lw      $ra, 124($a0)
     lw      $a0, 16($a0)
     move    $v0, $zero          # clear result registers r0 and r1
-    jalr    $zero, $ra          # do long jump
+    jalr    $zero, $t9          # do long jump
     move    $v1, $zero
 END art_quick_do_long_jump
 
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index 2c17f1c..bd1ac3b 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -16,7 +16,6 @@
 
 #include "context_mips64.h"
 
-#include "art_method-inl.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
 
@@ -37,21 +36,19 @@
   arg0_ = 0;
 }
 
-void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+void Mips64Context::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& frame_info) {
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
   for (uint32_t core_reg : HighToLowBits(frame_info.CoreSpillMask())) {
-    gprs_[core_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()));
 
   // FP registers come second, from the highest down to the lowest.
   for (uint32_t fp_reg : HighToLowBits(frame_info.FpSpillMask())) {
-    fprs_[fp_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    fprs_[fp_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()) + POPCOUNT(frame_info.FpSpillMask()));
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index 84b1c9b..89fbf8f 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -34,7 +34,7 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) OVERRIDE;
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 9f1f0e0..417d5fc 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -15,7 +15,6 @@
  */
 
 #include "atomic.h"
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -57,12 +56,7 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 68156ae..66c8aad 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1615,5 +1615,70 @@
     move     $a0, rSELF                       # pass Thread::current
 END art_quick_deoptimize_from_compiled_code
 
-UNIMPLEMENTED art_quick_indexof
-UNIMPLEMENTED art_quick_string_compareto
+  .set push
+  .set noat
+/* java.lang.String.compareTo(String anotherString) */
+ENTRY_NO_GP art_quick_string_compareto
+/* $a0 holds address of "this" */
+/* $a1 holds address of "anotherString" */
+  beq    $a0,$a1,9f     # this and anotherString are the same object
+  move   $v0,$zero
+
+  lw     $a2,MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
+  lw     $a3,MIRROR_STRING_COUNT_OFFSET($a1)    # anotherString.length()
+  sltu   $at,$a2,$a3
+  seleqz $t2,$a3,$at
+  selnez $at,$a2,$at
+  or     $t2,$t2,$at    # $t2 now holds min(this.length(),anotherString.length())
+
+  beqz   $t2,9f         # while min(this.length(),anotherString.length())-i != 0
+  subu   $v0,$a2,$a3    # if $t2==0 return
+                        #     (this.length() - anotherString.length())
+1:
+  lhu    $t0,MIRROR_STRING_VALUE_OFFSET($a0)    # while this.charAt(i) == anotherString.charAt(i)
+  lhu    $t1,MIRROR_STRING_VALUE_OFFSET($a1)
+  bne    $t0,$t1,9f     # if this.charAt(i) != anotherString.charAt(i)
+  subu   $v0,$t0,$t1    #     return (this.charAt(i) - anotherString.charAt(i))
+  daddiu $a0,$a0,2      # point at this.charAt(i++)
+  subu   $t2,$t2,1      # new value of
+                        # min(this.length(),anotherString.length())-i
+  bnez   $t2,1b
+  daddiu $a1,$a1,2      # point at anotherString.charAt(i++)
+  subu   $v0,$a2,$a3
+
+9:
+  j      $ra
+  nop
+END art_quick_string_compareto
+
+/* java.lang.String.indexOf(int ch, int fromIndex=0) */
+ENTRY_NO_GP art_quick_indexof
+/* $a0 holds address of "this" */
+/* $a1 holds address of "ch" */
+/* $a2 holds address of "fromIndex" */
+  lw    $t0,MIRROR_STRING_COUNT_OFFSET($a0)     # this.length()
+  subu  $t0,$t0,$a2     # this.length() - offset
+  blez  $t0,6f          # if this.length()-offset <= 0
+  li    $v0,-1          #     return -1;
+
+  sll   $v0,$a2,1       # $a0 += $a2 * 2
+  daddu $a0,$a0,$v0     #  "  "   "  " "
+  move  $v0,$a2         # Set i to offset.
+
+1:
+  lhu   $t3,MIRROR_STRING_VALUE_OFFSET($a0)     # if this.charAt(i) == ch
+  beq   $t3,$a1,6f                              #     return i;
+  daddu $a0,$a0,2       # i++
+  subu  $t0,$t0,1       # this.length() - i
+  bnez  $t0,1b          # while this.length() - i > 0
+  addu  $v0,$v0,1       # i++
+
+  li    $v0,-1          # if this.length() - i <= 0
+                        #     return -1;
+
+6:
+  j     $ra
+  nop
+END art_quick_indexof
+
+  .set pop
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index ef5edbb..fbacdbc 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -113,7 +113,8 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0d2457e..1d10e5d 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -71,351 +71,7 @@
   // TODO: Set up a frame according to referrer's specs.
   size_t Invoke3WithReferrer(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
                              ArtMethod* referrer) {
-    // Push a transition back into managed code onto the linked list in thread.
-    ManagedStack fragment;
-    self->PushManagedStackFragment(&fragment);
-
-    size_t result;
-    size_t fpr_result = 0;
-#if defined(__i386__)
-    // TODO: Set the thread?
-    __asm__ __volatile__(
-        "subl $12, %%esp\n\t"       // Align stack.
-        "pushl %[referrer]\n\t"     // Store referrer.
-        "call *%%edi\n\t"           // Call the stub
-        "addl $16, %%esp"           // Pop referrer
-        : "=a" (result)
-          // Use the result from eax
-        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
-          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-        : "memory");  // clobber.
-    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
-    //       but compilation fails when declaring that.
-#elif defined(__arm__)
-    __asm__ __volatile__(
-        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
-        ".cfi_adjust_cfa_offset 52\n\t"
-        "push {r9}\n\t"
-        ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, %[referrer]\n\n"
-        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "ldr r9, [sp, #8]\n\t"
-
-        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
-        "sub sp, sp, #20\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #4]\n\t"
-        "str %[arg2], [sp, #8]\n\t"
-        "str %[code], [sp, #12]\n\t"
-        "str %[self], [sp, #16]\n\t"
-        "ldr r0, [sp]\n\t"
-        "ldr r1, [sp, #4]\n\t"
-        "ldr r2, [sp, #8]\n\t"
-        "ldr r3, [sp, #12]\n\t"
-        "ldr r9, [sp, #16]\n\t"
-        "add sp, sp, #20\n\t"
-
-        "blx r3\n\t"                // Call the stub
-        "add sp, sp, #12\n\t"       // Pop null and padding
-        ".cfi_adjust_cfa_offset -12\n\t"
-        "pop {r1-r12, lr}\n\t"      // Restore state
-        ".cfi_adjust_cfa_offset -52\n\t"
-        "mov %[result], r0\n\t"     // Save the result
-        : [result] "=r" (result)
-          // Use the result from r0
-        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-          [referrer] "r"(referrer)
-        : "r0", "memory");  // clobber.
-#elif defined(__aarch64__)
-    __asm__ __volatile__(
-        // Spill x0-x7 which we say we don't clobber. May contain args.
-        "sub sp, sp, #64\n\t"
-        ".cfi_adjust_cfa_offset 64\n\t"
-        "stp x0, x1, [sp]\n\t"
-        "stp x2, x3, [sp, #16]\n\t"
-        "stp x4, x5, [sp, #32]\n\t"
-        "stp x6, x7, [sp, #48]\n\t"
-
-        "sub sp, sp, #16\n\t"          // Reserve stack space, 16B aligned
-        ".cfi_adjust_cfa_offset 16\n\t"
-        "str %[referrer], [sp]\n\t"    // referrer
-
-        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
-        "sub sp, sp, #48\n\t"
-        ".cfi_adjust_cfa_offset 48\n\t"
-        // All things are "r" constraints, so direct str/stp should work.
-        "stp %[arg0], %[arg1], [sp]\n\t"
-        "stp %[arg2], %[code], [sp, #16]\n\t"
-        "str %[self], [sp, #32]\n\t"
-
-        // Now we definitely have x0-x3 free, use it to garble d8 - d15
-        "movk x0, #0xfad0\n\t"
-        "movk x0, #0xebad, lsl #16\n\t"
-        "movk x0, #0xfad0, lsl #32\n\t"
-        "movk x0, #0xebad, lsl #48\n\t"
-        "fmov d8, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d9, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d10, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d11, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d12, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d13, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d14, x0\n\t"
-        "add x0, x0, 1\n\t"
-        "fmov d15, x0\n\t"
-
-        // Load call params into the right registers.
-        "ldp x0, x1, [sp]\n\t"
-        "ldp x2, x3, [sp, #16]\n\t"
-        "ldr x19, [sp, #32]\n\t"
-        "add sp, sp, #48\n\t"
-        ".cfi_adjust_cfa_offset -48\n\t"
-
-
-        "blr x3\n\t"              // Call the stub
-        "mov x8, x0\n\t"          // Store result
-        "add sp, sp, #16\n\t"     // Drop the quick "frame"
-        ".cfi_adjust_cfa_offset -16\n\t"
-
-        // Test d8 - d15. We can use x1 and x2.
-        "movk x1, #0xfad0\n\t"
-        "movk x1, #0xebad, lsl #16\n\t"
-        "movk x1, #0xfad0, lsl #32\n\t"
-        "movk x1, #0xebad, lsl #48\n\t"
-        "fmov x2, d8\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d9\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d10\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d11\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d12\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d13\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d14\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-        "add x1, x1, 1\n\t"
-
-        "fmov x2, d15\n\t"
-        "cmp x1, x2\n\t"
-        "b.ne 1f\n\t"
-
-        "mov x9, #0\n\t"              // Use x9 as flag, in clobber list
-
-        // Finish up.
-        "2:\n\t"
-        "ldp x0, x1, [sp]\n\t"        // Restore stuff not named clobbered, may contain fpr_result
-        "ldp x2, x3, [sp, #16]\n\t"
-        "ldp x4, x5, [sp, #32]\n\t"
-        "ldp x6, x7, [sp, #48]\n\t"
-        "add sp, sp, #64\n\t"         // Free stack space, now sp as on entry
-        ".cfi_adjust_cfa_offset -64\n\t"
-
-        "str x9, %[fpr_result]\n\t"   // Store the FPR comparison result
-        "mov %[result], x8\n\t"              // Store the call result
-
-        "b 3f\n\t"                     // Goto end
-
-        // Failed fpr verification.
-        "1:\n\t"
-        "mov x9, #1\n\t"
-        "b 2b\n\t"                     // Goto finish-up
-
-        // End
-        "3:\n\t"
-        : [result] "=r" (result)
-          // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-          [referrer] "r"(referrer), [fpr_result] "m" (fpr_result)
-        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20",
-          "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30",
-          "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
-          "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
-          "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
-          "memory");  // clobber.
-#elif defined(__mips__) && !defined(__LP64__)
-    __asm__ __volatile__ (
-        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
-        "addiu $sp, $sp, -64\n\t"
-        "sw $a0, 0($sp)\n\t"
-        "sw $a1, 4($sp)\n\t"
-        "sw $a2, 8($sp)\n\t"
-        "sw $a3, 12($sp)\n\t"
-        "sw $t0, 16($sp)\n\t"
-        "sw $t1, 20($sp)\n\t"
-        "sw $t2, 24($sp)\n\t"
-        "sw $t3, 28($sp)\n\t"
-        "sw $t4, 32($sp)\n\t"
-        "sw $t5, 36($sp)\n\t"
-        "sw $t6, 40($sp)\n\t"
-        "sw $t7, 44($sp)\n\t"
-        // Spill gp register since it is caller save.
-        "sw $gp, 52($sp)\n\t"
-
-        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
-        "sw %[referrer], 0($sp)\n\t"
-
-        // Push everything on the stack, so we don't rely on the order.
-        "addiu $sp, $sp, -20\n\t"
-        "sw %[arg0], 0($sp)\n\t"
-        "sw %[arg1], 4($sp)\n\t"
-        "sw %[arg2], 8($sp)\n\t"
-        "sw %[code], 12($sp)\n\t"
-        "sw %[self], 16($sp)\n\t"
-
-        // Load call params into the right registers.
-        "lw $a0, 0($sp)\n\t"
-        "lw $a1, 4($sp)\n\t"
-        "lw $a2, 8($sp)\n\t"
-        "lw $t9, 12($sp)\n\t"
-        "lw $s1, 16($sp)\n\t"
-        "addiu $sp, $sp, 20\n\t"
-
-        "jalr $t9\n\t"             // Call the stub.
-        "nop\n\t"
-        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
-
-        // Restore stuff not named clobbered.
-        "lw $a0, 0($sp)\n\t"
-        "lw $a1, 4($sp)\n\t"
-        "lw $a2, 8($sp)\n\t"
-        "lw $a3, 12($sp)\n\t"
-        "lw $t0, 16($sp)\n\t"
-        "lw $t1, 20($sp)\n\t"
-        "lw $t2, 24($sp)\n\t"
-        "lw $t3, 28($sp)\n\t"
-        "lw $t4, 32($sp)\n\t"
-        "lw $t5, 36($sp)\n\t"
-        "lw $t6, 40($sp)\n\t"
-        "lw $t7, 44($sp)\n\t"
-        // Restore gp.
-        "lw $gp, 52($sp)\n\t"
-        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
-
-        "move %[result], $v0\n\t"  // Store the call result.
-        : [result] "=r" (result)
-        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-          [referrer] "r"(referrer)
-        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
-          "fp", "ra",
-          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
-          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
-          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
-          "memory");  // clobber.
-#elif defined(__mips__) && defined(__LP64__)
-    __asm__ __volatile__ (
-        // Spill a0-a7 which we say we don't clobber. May contain args.
-        "daddiu $sp, $sp, -64\n\t"
-        "sd $a0, 0($sp)\n\t"
-        "sd $a1, 8($sp)\n\t"
-        "sd $a2, 16($sp)\n\t"
-        "sd $a3, 24($sp)\n\t"
-        "sd $a4, 32($sp)\n\t"
-        "sd $a5, 40($sp)\n\t"
-        "sd $a6, 48($sp)\n\t"
-        "sd $a7, 56($sp)\n\t"
-
-        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
-        "sd %[referrer], 0($sp)\n\t"
-
-        // Push everything on the stack, so we don't rely on the order.
-        "daddiu $sp, $sp, -40\n\t"
-        "sd %[arg0], 0($sp)\n\t"
-        "sd %[arg1], 8($sp)\n\t"
-        "sd %[arg2], 16($sp)\n\t"
-        "sd %[code], 24($sp)\n\t"
-        "sd %[self], 32($sp)\n\t"
-
-        // Load call params into the right registers.
-        "ld $a0, 0($sp)\n\t"
-        "ld $a1, 8($sp)\n\t"
-        "ld $a2, 16($sp)\n\t"
-        "ld $t9, 24($sp)\n\t"
-        "ld $s1, 32($sp)\n\t"
-        "daddiu $sp, $sp, 40\n\t"
-
-        "jalr $t9\n\t"              // Call the stub.
-        "nop\n\t"
-        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
-
-        // Restore stuff not named clobbered.
-        "ld $a0, 0($sp)\n\t"
-        "ld $a1, 8($sp)\n\t"
-        "ld $a2, 16($sp)\n\t"
-        "ld $a3, 24($sp)\n\t"
-        "ld $a4, 32($sp)\n\t"
-        "ld $a5, 40($sp)\n\t"
-        "ld $a6, 48($sp)\n\t"
-        "ld $a7, 56($sp)\n\t"
-        "daddiu $sp, $sp, 64\n\t"
-
-        "move %[result], $v0\n\t"   // Store the call result.
-        : [result] "=r" (result)
-        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-          [referrer] "r"(referrer)
-        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-          "t8", "t9", "k0", "k1", "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
-          "memory");  // clobber.
-#elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
-    // Note: Uses the native convention
-    // TODO: Set the thread?
-    __asm__ __volatile__(
-        "pushq %[referrer]\n\t"        // Push referrer
-        "pushq (%%rsp)\n\t"             // & 16B alignment padding
-        ".cfi_adjust_cfa_offset 16\n\t"
-        "call *%%rax\n\t"              // Call the stub
-        "addq $16, %%rsp\n\t"          // Pop null and padding
-        ".cfi_adjust_cfa_offset -16\n\t"
-        : "=a" (result)
-          // Use the result from rax
-        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "c"(referrer)
-          // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rbx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
-          "memory");  // clobber all
-    // TODO: Should we clobber the other registers?
-#else
-    UNUSED(arg0, arg1, arg2, code, referrer);
-    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
-    result = 0;
-#endif
-    // Pop transition.
-    self->PopManagedStackFragment(fragment);
-
-    fp_result = fpr_result;
-    EXPECT_EQ(0U, fp_result);
-
-    return result;
+    return Invoke3WithReferrerAndHidden(arg0, arg1, arg2, code, self, referrer, 0);
   }
 
   // TODO: Set up a frame according to referrer's specs.
@@ -429,19 +85,55 @@
     size_t fpr_result = 0;
 #if defined(__i386__)
     // TODO: Set the thread?
+#define PUSH(reg) "push " # reg "\n\t .cfi_adjust_cfa_offset 4\n\t"
+#define POP(reg) "pop " # reg "\n\t .cfi_adjust_cfa_offset -4\n\t"
     __asm__ __volatile__(
-        "movd %[hidden], %%xmm7\n\t"
-        "subl $12, %%esp\n\t"       // Align stack.
-        "pushl %[referrer]\n\t"     // Store referrer
+        "movd %[hidden], %%xmm7\n\t"  // This is a memory op, so do this early. If it is off of
+                                      // esp, then we won't be able to access it after spilling.
+
+        // Spill 6 registers.
+        PUSH(%%ebx)
+        PUSH(%%ecx)
+        PUSH(%%edx)
+        PUSH(%%esi)
+        PUSH(%%edi)
+        PUSH(%%ebp)
+
+        // Store the inputs to the stack, but keep the referrer up top, less work.
+        PUSH(%[referrer])           // Align stack.
+        PUSH(%[referrer])           // Store referrer
+
+        PUSH(%[arg0])
+        PUSH(%[arg1])
+        PUSH(%[arg2])
+        PUSH(%[code])
+        // Now read them back into the required registers.
+        POP(%%edi)
+        POP(%%edx)
+        POP(%%ecx)
+        POP(%%eax)
+        // Call is prepared now.
+
         "call *%%edi\n\t"           // Call the stub
-        "addl $16, %%esp"           // Pop referrer
+        "addl $8, %%esp\n\t"        // Pop referrer and padding.
+        ".cfi_adjust_cfa_offset -8\n\t"
+
+        // Restore 6 registers.
+        POP(%%ebp)
+        POP(%%edi)
+        POP(%%esi)
+        POP(%%edx)
+        POP(%%ecx)
+        POP(%%ebx)
+
         : "=a" (result)
           // Use the result from eax
-        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer), [hidden]"m"(hidden)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code),
+          [referrer]"r"(referrer), [hidden]"m"(hidden)
           // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-        : "memory");  // clobber.
-    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
-    //       but compilation fails when declaring that.
+        : "memory", "xmm7");  // clobber.
+#undef PUSH
+#undef POP
 #elif defined(__arm__)
     __asm__ __volatile__(
         "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
@@ -743,23 +435,72 @@
           "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
           "f27", "f28", "f29", "f30", "f31",
           "memory");  // clobber.
-#elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
-    // Note: Uses the native convention
+#elif defined(__x86_64__) && !defined(__APPLE__)
+#define PUSH(reg) "pushq " # reg "\n\t .cfi_adjust_cfa_offset 8\n\t"
+#define POP(reg) "popq " # reg "\n\t .cfi_adjust_cfa_offset -8\n\t"
+    // Note: Uses the native convention. We do a callee-save regimen by manually spilling and
+    //       restoring almost all registers.
     // TODO: Set the thread?
     __asm__ __volatile__(
-        "pushq %[referrer]\n\t"        // Push referrer
-        "pushq (%%rsp)\n\t"            // & 16B alignment padding
-        ".cfi_adjust_cfa_offset 16\n\t"
-        "call *%%rbx\n\t"              // Call the stub
-        "addq $16, %%rsp\n\t"          // Pop null and padding
+        // Spill almost everything (except rax, rsp). 14 registers.
+        PUSH(%%rbx)
+        PUSH(%%rcx)
+        PUSH(%%rdx)
+        PUSH(%%rsi)
+        PUSH(%%rdi)
+        PUSH(%%rbp)
+        PUSH(%%r8)
+        PUSH(%%r9)
+        PUSH(%%r10)
+        PUSH(%%r11)
+        PUSH(%%r12)
+        PUSH(%%r13)
+        PUSH(%%r14)
+        PUSH(%%r15)
+
+        PUSH(%[referrer])              // Push referrer & 16B alignment padding
+        PUSH(%[referrer])
+
+        // Now juggle the input registers.
+        PUSH(%[arg0])
+        PUSH(%[arg1])
+        PUSH(%[arg2])
+        PUSH(%[hidden])
+        PUSH(%[code])
+        POP(%%r8)
+        POP(%%rax)
+        POP(%%rdx)
+        POP(%%rsi)
+        POP(%%rdi)
+
+        "call *%%r8\n\t"                  // Call the stub
+        "addq $16, %%rsp\n\t"             // Pop null and padding
         ".cfi_adjust_cfa_offset -16\n\t"
+
+        POP(%%r15)
+        POP(%%r14)
+        POP(%%r13)
+        POP(%%r12)
+        POP(%%r11)
+        POP(%%r10)
+        POP(%%r9)
+        POP(%%r8)
+        POP(%%rbp)
+        POP(%%rdi)
+        POP(%%rsi)
+        POP(%%rdx)
+        POP(%%rcx)
+        POP(%%rbx)
+
         : "=a" (result)
         // Use the result from rax
-        : "D"(arg0), "S"(arg1), "d"(arg2), "b"(code), [referrer] "c"(referrer), [hidden] "a"(hidden)
-        // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
-          "memory");  // clobber all
-    // TODO: Should we clobber the other registers?
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into some other
+        // register. We can't use "b" (rbx), as ASAN uses this for the frame pointer.
+        : "memory");  // We spill and restore (almost) all registers, so only mention memory here.
+#undef PUSH
+#undef POP
 #else
     UNUSED(arg0, arg1, arg2, code, referrer, hidden);
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 987ad60..077d2db 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -16,7 +16,6 @@
 
 #include "context_x86.h"
 
-#include "art_method-inl.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
 
@@ -36,9 +35,7 @@
   arg0_ = 0;
 }
 
-void X86Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+void X86Context::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& frame_info) {
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
@@ -46,7 +43,7 @@
       frame_info.CoreSpillMask() & ~(static_cast<uint32_t>(-1) << kNumberOfCpuRegisters);
   DCHECK_EQ(1, POPCOUNT(frame_info.CoreSpillMask() & ~core_regs));  // Return address spill.
   for (uint32_t core_reg : HighToLowBits(core_regs)) {
-    gprs_[core_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()) - 1);
@@ -57,9 +54,9 @@
   for (uint32_t fp_reg : HighToLowBits(fp_regs)) {
     // Two void* per XMM register.
     fprs_[2 * fp_reg] = reinterpret_cast<uint32_t*>(
-        fr.CalleeSaveAddress(spill_pos + 1, frame_info.FrameSizeInBytes()));
+        CalleeSaveAddress(frame, spill_pos + 1, frame_info.FrameSizeInBytes()));
     fprs_[2 * fp_reg + 1] = reinterpret_cast<uint32_t*>(
-        fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes()));
+        CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes()));
     spill_pos += 2;
   }
   DCHECK_EQ(spill_pos,
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index 59beb12..f482d9f 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -34,7 +34,7 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) OVERRIDE;
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     SetGPR(ESP, new_sp);
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 10fc281..019546f 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -31,12 +30,7 @@
 // Read barrier entrypoints.
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 4a106e4..2f485ae 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -788,6 +788,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 3dc7d71..7c49e9c 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -16,7 +16,6 @@
 
 #include "context_x86_64.h"
 
-#include "art_method-inl.h"
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
 
@@ -36,9 +35,7 @@
   arg0_ = 0;
 }
 
-void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
-  ArtMethod* method = fr.GetMethod();
-  const QuickMethodFrameInfo frame_info = method->GetQuickFrameInfo();
+void X86_64Context::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& frame_info) {
   int spill_pos = 0;
 
   // Core registers come first, from the highest down to the lowest.
@@ -46,7 +43,7 @@
       frame_info.CoreSpillMask() & ~(static_cast<uint32_t>(-1) << kNumberOfCpuRegisters);
   DCHECK_EQ(1, POPCOUNT(frame_info.CoreSpillMask() & ~core_regs));  // Return address spill.
   for (uint32_t core_reg : HighToLowBits(core_regs)) {
-    gprs_[core_reg] = fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes());
+    gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()) - 1);
@@ -56,7 +53,7 @@
   DCHECK_EQ(0u, fp_regs & (static_cast<uint32_t>(-1) << kNumberOfFloatRegisters));
   for (uint32_t fp_reg : HighToLowBits(fp_regs)) {
     fprs_[fp_reg] = reinterpret_cast<uint64_t*>(
-        fr.CalleeSaveAddress(spill_pos, frame_info.FrameSizeInBytes()));
+        CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes()));
     ++spill_pos;
   }
   DCHECK_EQ(spill_pos,
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index f05b7f0..46f2b63 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -34,7 +34,7 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+  void FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr) OVERRIDE;
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     SetGPR(RSP, new_sp);
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 5cc72e3..eae09ee 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -32,16 +31,11 @@
 // Read barrier entrypoints.
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
-  UNUSED(ipoints, jpoints, qpoints);
+  UNUSED(jpoints, qpoints);
   UNIMPLEMENTED(FATAL);
 #else
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 5c413d2..95f0ccb 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -809,6 +809,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 // A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 632a50f..cf548ad 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -212,18 +212,6 @@
   return type;
 }
 
-inline uint32_t ArtMethod::GetCodeSize() {
-  DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
-  return GetCodeSize(EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode()));
-}
-
-inline uint32_t ArtMethod::GetCodeSize(const void* code) {
-  if (code == nullptr) {
-    return 0u;
-  }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-}
-
 inline bool ArtMethod::CheckIncompatibleClassChange(InvokeType type) {
   switch (type) {
     case kStatic:
@@ -231,8 +219,9 @@
     case kDirect:
       return !IsDirect() || IsStatic();
     case kVirtual: {
+      // We have an error if we are direct or a non-default, non-miranda interface method.
       mirror::Class* methods_class = GetDeclaringClass();
-      return IsDirect() || (methods_class->IsInterface() && !IsMiranda());
+      return IsDirect() || (methods_class->IsInterface() && !IsDefault() && !IsMiranda());
     }
     case kSuper:
       // Constructors and static methods are called with invoke-direct.
@@ -248,85 +237,6 @@
   }
 }
 
-inline uint32_t ArtMethod::GetQuickOatCodeOffset() {
-  DCHECK(!Runtime::Current()->IsStarted());
-  return PointerToLowMemUInt32(GetEntryPointFromQuickCompiledCode());
-}
-
-inline void ArtMethod::SetQuickOatCodeOffset(uint32_t code_offset) {
-  DCHECK(!Runtime::Current()->IsStarted());
-  SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(code_offset));
-}
-
-inline const uint8_t* ArtMethod::GetMappingTable(size_t pointer_size) {
-  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
-  if (code_pointer == nullptr) {
-    return nullptr;
-  }
-  return GetMappingTable(code_pointer, pointer_size);
-}
-
-inline const uint8_t* ArtMethod::GetMappingTable(const void* code_pointer, size_t pointer_size) {
-  DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].mapping_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
-}
-
-inline const uint8_t* ArtMethod::GetVmapTable(size_t pointer_size) {
-  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
-  if (code_pointer == nullptr) {
-    return nullptr;
-  }
-  return GetVmapTable(code_pointer, pointer_size);
-}
-
-inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer, size_t pointer_size) {
-  CHECK(!IsOptimized(pointer_size)) << "Unimplemented vmap table for optimized compiler";
-  DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
-}
-
-inline CodeInfo ArtMethod::GetOptimizedCodeInfo() {
-  DCHECK(IsOptimized(sizeof(void*)));
-  const void* code_pointer = GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].vmap_table_offset_;
-  const void* data =
-      reinterpret_cast<const void*>(reinterpret_cast<const uint8_t*>(code_pointer) - offset);
-  return CodeInfo(data);
-}
-
-inline const uint8_t* ArtMethod::GetNativeGcMap(size_t pointer_size) {
-  const void* code_pointer = GetQuickOatCodePointer(pointer_size);
-  if (code_pointer == nullptr) {
-    return nullptr;
-  }
-  return GetNativeGcMap(code_pointer, pointer_size);
-}
-
-inline const uint8_t* ArtMethod::GetNativeGcMap(const void* code_pointer, size_t pointer_size) {
-  DCHECK(code_pointer != nullptr);
-  DCHECK_EQ(code_pointer, GetQuickOatCodePointer(pointer_size));
-  uint32_t offset =
-      reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].gc_map_offset_;
-  if (UNLIKELY(offset == 0u)) {
-    return nullptr;
-  }
-  return reinterpret_cast<const uint8_t*>(code_pointer) - offset;
-}
-
 inline bool ArtMethod::IsRuntimeMethod() {
   return dex_method_index_ == DexFile::kDexNoIndex;
 }
@@ -367,20 +277,6 @@
   return result;
 }
 
-inline uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc) {
-  const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(
-      this, sizeof(void*));
-  return pc - reinterpret_cast<uintptr_t>(code);
-}
-
-inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
-  DCHECK(code_pointer != nullptr);
-  if (kIsDebugBuild && !IsProxyMethod()) {
-    CHECK_EQ(code_pointer, GetQuickOatCodePointer(sizeof(void*)));
-  }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code_pointer)[-1].frame_info_;
-}
-
 inline const DexFile* ArtMethod::GetDexFile() {
   return GetDexCache()->GetDexFile();
 }
@@ -572,12 +468,6 @@
   }
 }
 
-inline void ArtMethod::CopyFrom(const ArtMethod* src, size_t image_pointer_size) {
-  memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
-         Size(image_pointer_size));
-  declaring_class_ = GcRoot<mirror::Class>(const_cast<ArtMethod*>(src)->GetDeclaringClass());
-}
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_METHOD_INL_H_
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 92648b9..f4a5f23 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -163,115 +163,18 @@
     return dex_method_idx;
   }
   const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
-  const DexFile::StringId* other_descriptor =
-      other_dexfile.FindStringId(mid_declaring_class_descriptor);
-  if (other_descriptor != nullptr) {
-    const DexFile::TypeId* other_type_id =
-        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
-    if (other_type_id != nullptr) {
-      const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
-          *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
-          other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
-      if (other_mid != nullptr) {
-        return other_dexfile.GetIndexForMethodId(*other_mid);
-      }
+  const DexFile::TypeId* other_type_id = other_dexfile.FindTypeId(mid_declaring_class_descriptor);
+  if (other_type_id != nullptr) {
+    const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
+        *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
+        other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
+    if (other_mid != nullptr) {
+      return other_dexfile.GetIndexForMethodId(*other_mid);
     }
   }
   return DexFile::kDexNoIndex;
 }
 
-uint32_t ArtMethod::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
-  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
-  if (IsOptimized(sizeof(void*))) {
-    CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
-    StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset, encoding);
-    if (stack_map.IsValid()) {
-      return stack_map.GetDexPc(encoding);
-    }
-  } else {
-    MappingTable table(entry_point != nullptr ?
-        GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
-    if (table.TotalSize() == 0) {
-      // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
-      // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
-      DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
-      return DexFile::kDexNoIndex;   // Special no mapping case
-    }
-    // Assume the caller wants a pc-to-dex mapping so check here first.
-    typedef MappingTable::PcToDexIterator It;
-    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
-    // Now check dex-to-pc mappings.
-    typedef MappingTable::DexToPcIterator It2;
-    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.NativePcOffset() == sought_offset) {
-        return cur.DexPc();
-      }
-    }
-  }
-  if (abort_on_failure) {
-      LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
-             << " current entry_point=" << GetQuickOatEntryPoint(sizeof(void*))
-             << ") in " << PrettyMethod(this);
-  }
-  return DexFile::kDexNoIndex;
-}
-
-uintptr_t ArtMethod::ToNativeQuickPc(const uint32_t dex_pc,
-                                     bool is_for_catch_handler,
-                                     bool abort_on_failure) {
-  const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
-  if (IsOptimized(sizeof(void*))) {
-    // Optimized code does not have a mapping table. Search for the dex-to-pc
-    // mapping in stack maps.
-    CodeInfo code_info = GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
-
-    // All stack maps are stored in the same CodeItem section, safepoint stack
-    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
-    // the order of iteration.
-    StackMap stack_map =
-        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
-                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
-    if (stack_map.IsValid()) {
-      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
-    }
-  } else {
-    MappingTable table(entry_point != nullptr ?
-        GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
-    if (table.TotalSize() == 0) {
-      DCHECK_EQ(dex_pc, 0U);
-      return 0;   // Special no mapping/pc == 0 case
-    }
-    // Assume the caller wants a dex-to-pc mapping so check here first.
-    typedef MappingTable::DexToPcIterator It;
-    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
-    // Now check pc-to-dex mappings.
-    typedef MappingTable::PcToDexIterator It2;
-    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-      if (cur.DexPc() == dex_pc) {
-        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-      }
-    }
-  }
-
-  if (abort_on_failure) {
-    LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
-               << " in " << PrettyMethod(this);
-  }
-  return UINTPTR_MAX;
-}
-
 uint32_t ArtMethod::FindCatchBlock(Handle<mirror::Class> exception_type,
                                    uint32_t dex_pc, bool* has_no_move_exception) {
   const DexFile::CodeItem* code_item = GetCodeItem();
@@ -322,76 +225,6 @@
   return found_dex_pc;
 }
 
-void ArtMethod::AssertPcIsWithinQuickCode(uintptr_t pc) {
-  if (IsNative() || IsRuntimeMethod() || IsProxyMethod()) {
-    return;
-  }
-  if (pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
-    return;
-  }
-  const void* code = GetEntryPointFromQuickCompiledCode();
-  if (code == GetQuickInstrumentationEntryPoint()) {
-    return;
-  }
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  if (class_linker->IsQuickToInterpreterBridge(code) ||
-      class_linker->IsQuickResolutionStub(code)) {
-    return;
-  }
-  // If we are the JIT then we may have just compiled the method after the
-  // IsQuickToInterpreterBridge check.
-  jit::Jit* const jit = Runtime::Current()->GetJit();
-  if (jit != nullptr &&
-      jit->GetCodeCache()->ContainsCodePtr(reinterpret_cast<const void*>(code))) {
-    return;
-  }
-  /*
-   * During a stack walk, a return PC may point past-the-end of the code
-   * in the case that the last instruction is a call that isn't expected to
-   * return.  Thus, we check <= code + GetCodeSize().
-   *
-   * NOTE: For Thumb both pc and code are offset by 1 indicating the Thumb state.
-   */
-  CHECK(PcIsWithinQuickCode(reinterpret_cast<uintptr_t>(code), pc))
-      << PrettyMethod(this)
-      << " pc=" << std::hex << pc
-      << " code=" << code
-      << " size=" << GetCodeSize(
-          EntryPointToCodePointer(reinterpret_cast<const void*>(code)));
-}
-
-bool ArtMethod::IsEntrypointInterpreter() {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* oat_quick_code = class_linker->GetOatMethodQuickCodeFor(this);
-  return oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode();
-}
-
-const void* ArtMethod::GetQuickOatEntryPoint(size_t pointer_size) {
-  if (IsAbstract() || IsRuntimeMethod() || IsProxyMethod()) {
-    return nullptr;
-  }
-  Runtime* runtime = Runtime::Current();
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  const void* code = runtime->GetInstrumentation()->GetQuickCodeFor(this, pointer_size);
-  // On failure, instead of null we get the quick-generic-jni-trampoline for native method
-  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
-  // for non-native methods.
-  if (class_linker->IsQuickToInterpreterBridge(code) ||
-      class_linker->IsQuickGenericJniStub(code)) {
-    return nullptr;
-  }
-  return code;
-}
-
-#ifndef NDEBUG
-uintptr_t ArtMethod::NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point) {
-  CHECK_NE(quick_entry_point, GetQuickToInterpreterBridge());
-  CHECK_EQ(quick_entry_point,
-           Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*)));
-  return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
-}
-#endif
-
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
                        const char* shorty) {
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
@@ -435,8 +268,9 @@
 
       // Ensure that we won't be accidentally calling quick compiled code when -Xint.
       if (kIsDebugBuild && runtime->GetInstrumentation()->IsForcedInterpretOnly()) {
-        DCHECK(!runtime->UseJit());
-        CHECK(IsEntrypointInterpreter())
+        CHECK(!runtime->UseJit());
+        const void* oat_quick_code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(this);
+        CHECK(oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode())
             << "Don't call compiled code when -Xint " << PrettyMethod(this);
       }
 
@@ -480,74 +314,6 @@
   self->PopManagedStackFragment(fragment);
 }
 
-// Counts the number of references in the parameter list of the corresponding method.
-// Note: Thus does _not_ include "this" for non-static methods.
-static uint32_t GetNumberOfReferenceArgsWithoutReceiver(ArtMethod* method)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  uint32_t shorty_len;
-  const char* shorty = method->GetShorty(&shorty_len);
-  uint32_t refs = 0;
-  for (uint32_t i = 1; i < shorty_len ; ++i) {
-    if (shorty[i] == 'L') {
-      refs++;
-    }
-  }
-  return refs;
-}
-
-QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo() {
-  Runtime* runtime = Runtime::Current();
-
-  if (UNLIKELY(IsAbstract())) {
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-  }
-
-  // This goes before IsProxyMethod since runtime methods have a null declaring class.
-  if (UNLIKELY(IsRuntimeMethod())) {
-    return runtime->GetRuntimeMethodFrameInfo(this);
-  }
-
-  // For Proxy method we add special handling for the direct method case  (there is only one
-  // direct method - constructor). Direct method is cloned from original
-  // java.lang.reflect.Proxy class together with code and as a result it is executed as usual
-  // quick compiled method without any stubs. So the frame info should be returned as it is a
-  // quick method not a stub. However, if instrumentation stubs are installed, the
-  // instrumentation->GetQuickCodeFor() returns the artQuickProxyInvokeHandler instead of an
-  // oat code pointer, thus we have to add a special case here.
-  if (UNLIKELY(IsProxyMethod())) {
-    if (IsDirect()) {
-      CHECK(IsConstructor());
-      return GetQuickFrameInfo(EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode()));
-    } else {
-      return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-    }
-  }
-
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(this, sizeof(void*));
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  // On failure, instead of null we get the quick-generic-jni-trampoline for native method
-  // indicating the generic JNI, or the quick-to-interpreter-bridge (but not the trampoline)
-  // for non-native methods. And we really shouldn't see a failure for non-native methods here.
-  DCHECK(!class_linker->IsQuickToInterpreterBridge(entry_point));
-
-  if (class_linker->IsQuickGenericJniStub(entry_point)) {
-    // Generic JNI frame.
-    DCHECK(IsNative());
-    uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(this) + 1;
-    size_t scope_size = HandleScope::SizeOf(handle_refs);
-    QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-
-    // Callee saves + handle scope + method ref + alignment
-    // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
-    size_t frame_size = RoundUp(callee_info.FrameSizeInBytes() - sizeof(void*) +
-                                sizeof(ArtMethod*) + scope_size, kStackAlignment);
-    return QuickMethodFrameInfo(frame_size, callee_info.CoreSpillMask(), callee_info.FpSpillMask());
-  }
-
-  const void* code_pointer = EntryPointToCodePointer(entry_point);
-  return GetQuickFrameInfo(code_pointer);
-}
-
 void ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
   CHECK(IsNative()) << PrettyMethod(this);
   CHECK(!IsFastNative()) << PrettyMethod(this);
@@ -600,17 +366,120 @@
   return oat_method.GetVmapTable();
 }
 
-ProfilingInfo* ArtMethod::CreateProfilingInfo() {
-  DCHECK(!Runtime::Current()->IsAotCompiler());
-  ProfilingInfo* info = ProfilingInfo::Create(this);
-  MemberOffset offset = ArtMethod::EntryPointFromJniOffset(sizeof(void*));
-  uintptr_t pointer = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
-  if (!reinterpret_cast<Atomic<ProfilingInfo*>*>(pointer)->
-          CompareExchangeStrongSequentiallyConsistent(nullptr, info)) {
-    return GetProfilingInfo(sizeof(void*));
-  } else {
-    return info;
+const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) {
+  if (IsRuntimeMethod()) {
+    return nullptr;
   }
+
+  Runtime* runtime = Runtime::Current();
+  const void* existing_entry_point = GetEntryPointFromQuickCompiledCode();
+  DCHECK(existing_entry_point != nullptr);
+  ClassLinker* class_linker = runtime->GetClassLinker();
+
+  if (class_linker->IsQuickGenericJniStub(existing_entry_point)) {
+    // The generic JNI does not have any method header.
+    return nullptr;
+  }
+
+  if (existing_entry_point == GetQuickProxyInvokeHandler()) {
+    DCHECK(IsProxyMethod() && !IsConstructor());
+    // The proxy entry point does not have any method header.
+    return nullptr;
+  }
+
+  // Check whether the current entry point contains this pc.
+  if (!class_linker->IsQuickResolutionStub(existing_entry_point) &&
+      !class_linker->IsQuickToInterpreterBridge(existing_entry_point)) {
+    OatQuickMethodHeader* method_header =
+        OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+
+    if (method_header->Contains(pc)) {
+      return method_header;
+    }
+  }
+
+  // Check whether the pc is in the JIT code cache.
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit::JitCodeCache* code_cache = jit->GetCodeCache();
+    OatQuickMethodHeader* method_header = code_cache->LookupMethodHeader(pc, this);
+    if (method_header != nullptr) {
+      DCHECK(method_header->Contains(pc));
+      return method_header;
+    } else {
+      DCHECK(!code_cache->ContainsPc(reinterpret_cast<const void*>(pc))) << std::hex << pc;
+    }
+  }
+
+  // The code has to be in an oat file.
+  bool found;
+  OatFile::OatMethod oat_method = class_linker->FindOatMethodFor(this, &found);
+  if (!found) {
+    if (class_linker->IsQuickResolutionStub(existing_entry_point)) {
+      // We are running the generic jni stub, but the entry point of the method has not
+      // been updated yet.
+      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      DCHECK(IsNative());
+      return nullptr;
+    }
+    if (existing_entry_point == GetQuickInstrumentationEntryPoint()) {
+      // We are running the generic jni stub, but the method is being instrumented.
+      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      DCHECK(IsNative());
+      return nullptr;
+    }
+    // Only for unit tests.
+    // TODO(ngeoffray): Update these tests to pass the right pc?
+    return OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+  }
+  const void* oat_entry_point = oat_method.GetQuickCode();
+  if (oat_entry_point == nullptr || class_linker->IsQuickGenericJniStub(oat_entry_point)) {
+    DCHECK(IsNative());
+    return nullptr;
+  }
+
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromEntryPoint(oat_entry_point);
+  if (pc == 0) {
+    // This is a downcall, it can only happen for a native method.
+    DCHECK(IsNative());
+    return method_header;
+  }
+
+  if (pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
+    // If we're instrumenting, just return the compiled OAT code.
+    // TODO(ngeoffray): Avoid this call path.
+    return method_header;
+  }
+
+  DCHECK(method_header->Contains(pc))
+      << PrettyMethod(this)
+      << std::hex << pc << " " << oat_entry_point
+      << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
+  return method_header;
+}
+
+
+void ArtMethod::CopyFrom(ArtMethod* src, size_t image_pointer_size) {
+  memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
+         Size(image_pointer_size));
+  declaring_class_ = GcRoot<mirror::Class>(const_cast<ArtMethod*>(src)->GetDeclaringClass());
+
+  // If the entry point of the method we are copying from is from JIT code, we just
+  // put the entry point of the new method to interpreter. We could set the entry point
+  // to the JIT code, but this would require taking the JIT code cache lock to notify
+  // it, which we do not want at this level.
+  Runtime* runtime = Runtime::Current();
+  if (runtime->GetJit() != nullptr) {
+    if (runtime->GetJit()->GetCodeCache()->ContainsPc(GetEntryPointFromQuickCompiledCode())) {
+      SetEntryPointFromQuickCompiledCodePtrSize(GetQuickToInterpreterBridge(), image_pointer_size);
+    }
+  }
+  // Clear the profiling info for the same reasons as the JIT code.
+  if (!src->IsNative()) {
+    SetProfilingInfoPtrSize(nullptr, image_pointer_size);
+  }
+  // Clear hotness to let the JIT properly decide when to compile this method.
+  hotness_count_ = 0;
 }
 
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 0315c3a..ce9f202 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_ART_METHOD_H_
 #define ART_RUNTIME_ART_METHOD_H_
 
+#include "base/bit_utils.h"
 #include "base/casts.h"
 #include "dex_file.h"
 #include "gc_root.h"
@@ -24,15 +25,14 @@
 #include "method_reference.h"
 #include "modifiers.h"
 #include "mirror/object.h"
-#include "quick/quick_method_frame_info.h"
 #include "read_barrier_option.h"
 #include "stack.h"
-#include "stack_map.h"
 #include "utils.h"
 
 namespace art {
 
 union JValue;
+class OatQuickMethodHeader;
 class ProfilingInfo;
 class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
@@ -49,8 +49,8 @@
   ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
       method_index_(0) { }
 
-  ArtMethod(const ArtMethod& src, size_t image_pointer_size) {
-    CopyFrom(&src, image_pointer_size);
+  ArtMethod(ArtMethod* src, size_t image_pointer_size) {
+    CopyFrom(src, image_pointer_size);
   }
 
   static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
@@ -136,6 +136,11 @@
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
+  // This is set by the class linker.
+  bool IsDefault() {
+    return (GetAccessFlags() & kAccDefault) != 0;
+  }
+
   bool IsNative() {
     return (GetAccessFlags() & kAccNative) != 0;
   }
@@ -164,14 +169,9 @@
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
 
-  bool IsOptimized(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_) {
-    // Temporary solution for detecting if a method has been optimized: the compiler
-    // does not create a GC map. Instead, the vmap table contains the stack map
-    // (as in stack_map.h).
-    return !IsNative()
-        && GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
-        && GetQuickOatCodePointer(pointer_size) != nullptr
-        && GetNativeGcMap(pointer_size) == nullptr;
+  // Returns true if this method could be overridden by a default method.
+  bool IsOverridableByDefaultMethod() {
+    return IsDefault() || IsAbstract();
   }
 
   bool CheckIncompatibleClassChange(InvokeType type) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -280,94 +280,6 @@
                      entry_point_from_quick_compiled_code, pointer_size);
   }
 
-  uint32_t GetCodeSize() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Check whether the given PC is within the quick compiled code associated with this method's
-  // quick entrypoint. This code isn't robust for instrumentation, etc. and is only used for
-  // debug purposes.
-  bool PcIsWithinQuickCode(uintptr_t pc) {
-    return PcIsWithinQuickCode(
-        reinterpret_cast<uintptr_t>(GetEntryPointFromQuickCompiledCode()), pc);
-  }
-
-  void AssertPcIsWithinQuickCode(uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns true if the entrypoint points to the interpreter, as
-  // opposed to the compiled code, that is, this method will be
-  // interpretered on invocation.
-  bool IsEntrypointInterpreter() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  uint32_t GetQuickOatCodeOffset();
-  void SetQuickOatCodeOffset(uint32_t code_offset);
-
-  ALWAYS_INLINE static const void* EntryPointToCodePointer(const void* entry_point) {
-    uintptr_t code = reinterpret_cast<uintptr_t>(entry_point);
-    // TODO: Make this Thumb2 specific. It is benign on other architectures as code is always at
-    //       least 2 byte aligned.
-    code &= ~0x1;
-    return reinterpret_cast<const void*>(code);
-  }
-
-  // Actual entry point pointer to compiled oat code or null.
-  const void* GetQuickOatEntryPoint(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  // Actual pointer to compiled oat code or null.
-  const void* GetQuickOatCodePointer(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    return EntryPointToCodePointer(GetQuickOatEntryPoint(pointer_size));
-  }
-
-  // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
-  const uint8_t* GetMappingTable(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const uint8_t* GetMappingTable(const void* code_pointer, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
-  const uint8_t* GetVmapTable(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const uint8_t* GetVmapTable(const void* code_pointer, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  CodeInfo GetOptimizedCodeInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Callers should wrap the uint8_t* in a GcMap instance for convenient access.
-  const uint8_t* GetNativeGcMap(size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const uint8_t* GetNativeGcMap(const void* code_pointer, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  template <bool kCheckFrameSize = true>
-  uint32_t GetFrameSizeInBytes() SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t result = GetQuickFrameInfo().FrameSizeInBytes();
-    if (kCheckFrameSize) {
-      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
-    }
-    return result;
-  }
-
-  QuickMethodFrameInfo GetQuickFrameInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-  QuickMethodFrameInfo GetQuickFrameInfo(const void* code_pointer)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  FrameOffset GetReturnPcOffset() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetReturnPcOffset(GetFrameSizeInBytes());
-  }
-
-  FrameOffset GetReturnPcOffset(uint32_t frame_size_in_bytes)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK_EQ(frame_size_in_bytes, GetFrameSizeInBytes());
-    return FrameOffset(frame_size_in_bytes - sizeof(void*));
-  }
-
-  FrameOffset GetHandleScopeOffset() SHARED_REQUIRES(Locks::mutator_lock_) {
-    constexpr size_t handle_scope_offset = sizeof(ArtMethod*);
-    DCHECK_LT(handle_scope_offset, GetFrameSizeInBytes());
-    return FrameOffset(handle_scope_offset);
-  }
-
   void RegisterNative(const void* native_method, bool is_fast)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -393,12 +305,22 @@
         PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
-  ProfilingInfo* CreateProfilingInfo() SHARED_REQUIRES(Locks::mutator_lock_);
-
   ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
     return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
   }
 
+  ALWAYS_INLINE void SetProfilingInfo(ProfilingInfo* info) {
+    SetEntryPointFromJniPtrSize(info, sizeof(void*));
+  }
+
+  ALWAYS_INLINE void SetProfilingInfoPtrSize(ProfilingInfo* info, size_t pointer_size) {
+    SetEntryPointFromJniPtrSize(info, pointer_size);
+  }
+
+  static MemberOffset ProfilingInfoOffset() {
+    return EntryPointFromJniOffset(sizeof(void*));
+  }
+
   void* GetEntryPointFromJni() {
     return GetEntryPointFromJniPtrSize(sizeof(void*));
   }
@@ -429,27 +351,6 @@
 
   bool IsImtUnimplementedMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  uintptr_t NativeQuickPcOffset(const uintptr_t pc) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifdef NDEBUG
-  uintptr_t NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    return pc - reinterpret_cast<uintptr_t>(quick_entry_point);
-  }
-#else
-  uintptr_t NativeQuickPcOffset(const uintptr_t pc, const void* quick_entry_point)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-#endif
-
-  // Converts a native PC to a dex PC.
-  uint32_t ToDexPc(const uintptr_t pc, bool abort_on_failure = true)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Converts a dex PC to a native PC.
-  uintptr_t ToNativeQuickPc(const uint32_t dex_pc,
-                            bool is_for_catch_handler,
-                            bool abort_on_failure = true)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   MethodReference ToMethodReference() SHARED_REQUIRES(Locks::mutator_lock_) {
     return MethodReference(GetDexFile(), GetDexMethodIndex());
   }
@@ -532,7 +433,7 @@
     return pointer_size;
   }
 
-  void CopyFrom(const ArtMethod* src, size_t image_pointer_size)
+  void CopyFrom(ArtMethod* src, size_t image_pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
@@ -542,6 +443,17 @@
     return ++hotness_count_;
   }
 
+  void ClearCounter() {
+    hotness_count_ = 0;
+  }
+
+  const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the method header for the compiled code containing 'pc'. Note that runtime
+  // methods will return null for this method, as they are not oat based.
+  const OatQuickMethodHeader* GetOatQuickMethodHeader(uintptr_t pc)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
@@ -622,24 +534,6 @@
     }
   }
 
-  // Code points to the start of the quick code.
-  static uint32_t GetCodeSize(const void* code);
-
-  static bool PcIsWithinQuickCode(uintptr_t code, uintptr_t pc) {
-    if (code == 0) {
-      return pc == 0;
-    }
-    /*
-     * During a stack walk, a return PC may point past-the-end of the code
-     * in the case that the last instruction is a call that isn't expected to
-     * return.  Thus, we check <= code + GetCodeSize().
-     *
-     * NOTE: For Thumb both pc and code are offset by 1 indicating the Thumb state.
-     */
-    return code <= pc && pc <= code + GetCodeSize(
-        EntryPointToCodePointer(reinterpret_cast<const void*>(code)));
-  }
-
   DISALLOW_COPY_AND_ASSIGN(ArtMethod);  // Need to use CopyFrom to deal with 32 vs 64 bits.
 };
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 1b569fe..69f6fe9 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -19,6 +19,7 @@
 
 #if defined(__cplusplus)
 #include "art_method.h"
+#include "gc/allocator/rosalloc.h"
 #include "lock_word.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
@@ -53,6 +54,14 @@
 #define ADD_TEST_EQ(x, y)
 #endif
 
+#if defined(__LP64__)
+#define POINTER_SIZE_SHIFT 3
+#else
+#define POINTER_SIZE_SHIFT 2
+#endif
+ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT),
+            static_cast<size_t>(__SIZEOF_POINTER__))
+
 // Size of references to the heap on the stack.
 #define STACK_REFERENCE_SIZE 4
 ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>))
@@ -62,6 +71,10 @@
 ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE),
             sizeof(art::mirror::CompressedReference<art::mirror::Object>))
 
+#define COMPRESSED_REFERENCE_SIZE_SHIFT 2
+ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT),
+            static_cast<size_t>(COMPRESSED_REFERENCE_SIZE))
+
 // Note: these callee save methods loads require read barriers.
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
@@ -109,7 +122,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 152 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
@@ -120,6 +133,18 @@
 #define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.rosalloc_runs.
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
+            art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
+#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 34 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
+            art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end.
+#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 35 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
+            art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offsets within java.lang.Object.
 #define MIRROR_OBJECT_CLASS_OFFSET 0
@@ -236,6 +261,44 @@
 ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED),
             ~static_cast<uint32_t>(art::kObjectAlignment - 1))
 
+#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128
+ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize))
+
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 4
+ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSizeShift))
+
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 15
+ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
+
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff0
+ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32),
+            ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
+
+#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff0
+ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64),
+            ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1))
+
+#define ROSALLOC_RUN_FREE_LIST_OFFSET 8
+ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset()))
+
+#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0
+ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset()))
+
+#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16
+ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset()))
+
+#define ROSALLOC_SLOT_NEXT_OFFSET 0
+ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET,
+            static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset()))
+// Assert this so that we can avoid zeroing the next field by installing the class pointer.
+ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET)
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/base/allocator.cc b/runtime/base/allocator.cc
index 4f2fc07..f1d0a5f 100644
--- a/runtime/base/allocator.cc
+++ b/runtime/base/allocator.cc
@@ -49,15 +49,13 @@
   explicit NoopAllocator() {}
   ~NoopAllocator() {}
 
-  void* Alloc(size_t size) {
-    UNUSED(size);
+  void* Alloc(size_t size ATTRIBUTE_UNUSED) {
     LOG(FATAL) << "NoopAllocator::Alloc should not be called";
     UNREACHABLE();
   }
 
-  void Free(void* p) {
+  void Free(void* p ATTRIBUTE_UNUSED) {
     // Noop.
-    UNUSED(p);
   }
 
  private:
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 3422625..969f5b9 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -19,8 +19,10 @@
 
 #include <map>
 #include <set>
+#include <unordered_map>
 
 #include "atomic.h"
+#include "base/hash_map.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/type_static_if.h"
@@ -115,9 +117,7 @@
 
   // Used internally by STL data structures.
   template <class U>
-  TrackingAllocatorImpl(const TrackingAllocatorImpl<U, kTag>& alloc) noexcept {
-    UNUSED(alloc);
-  }
+  TrackingAllocatorImpl(const TrackingAllocatorImpl<U, kTag>& alloc ATTRIBUTE_UNUSED) noexcept {}
 
   // Used internally by STL data structures.
   TrackingAllocatorImpl() noexcept {
@@ -131,8 +131,7 @@
     typedef TrackingAllocatorImpl<U, kTag> other;
   };
 
-  pointer allocate(size_type n, const_pointer hint = 0) {
-    UNUSED(hint);
+  pointer allocate(size_type n, const_pointer hint ATTRIBUTE_UNUSED = 0) {
     const size_t size = n * sizeof(T);
     TrackedAllocators::RegisterAllocation(GetTag(), size);
     return reinterpret_cast<pointer>(malloc(size));
@@ -153,20 +152,33 @@
 template<class T, AllocatorTag kTag>
 // C++ doesn't allow template typedefs. This is a workaround template typedef which is
 // TrackingAllocatorImpl<T> if kEnableTrackingAllocator is true, std::allocator<T> otherwise.
-class TrackingAllocator : public TypeStaticIf<kEnableTrackingAllocator,
-                                              TrackingAllocatorImpl<T, kTag>,
-                                              std::allocator<T>>::type {
-};
+using TrackingAllocator = typename TypeStaticIf<kEnableTrackingAllocator,
+                                                TrackingAllocatorImpl<T, kTag>,
+                                                std::allocator<T>>::type;
 
 template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
-class AllocationTrackingMultiMap : public std::multimap<
-    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>> {
-};
+using AllocationTrackingMultiMap = std::multimap<
+    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>>;
 
 template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
-class AllocationTrackingSet : public std::set<Key, Compare, TrackingAllocator<Key, kTag>> {
-};
+using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
 
+template<class Key,
+         class T,
+         AllocatorTag kTag,
+         class Hash = std::hash<Key>,
+         class Pred = std::equal_to<Key>>
+using AllocationTrackingUnorderedMap = std::unordered_map<
+    Key, T, Hash, Pred, TrackingAllocator<std::pair<const Key, T>, kTag>>;
+
+template<class Key,
+         class T,
+         class EmptyFn,
+         AllocatorTag kTag,
+         class Hash = std::hash<Key>,
+         class Pred = std::equal_to<Key>>
+using AllocationTrackingHashMap = HashMap<
+    Key, T, EmptyFn, Hash, Pred, TrackingAllocator<std::pair<Key, T>, kTag>>;
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ALLOCATOR_H_
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 6f2aa46..71afa0f 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -93,6 +93,9 @@
   "StackMapStm  ",
   "CodeGen      ",
   "ParallelMove ",
+  "GraphChecker ",
+  "LSE          ",
+  "Verifier     ",
 };
 
 template <bool kCount>
@@ -156,6 +159,18 @@
 // Explicitly instantiate the used implementation.
 template class ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations>;
 
+void ArenaAllocatorMemoryTool::DoMakeDefined(void* ptr, size_t size) {
+  MEMORY_TOOL_MAKE_DEFINED(ptr, size);
+}
+
+void ArenaAllocatorMemoryTool::DoMakeUndefined(void* ptr, size_t size) {
+  MEMORY_TOOL_MAKE_UNDEFINED(ptr, size);
+}
+
+void ArenaAllocatorMemoryTool::DoMakeInaccessible(void* ptr, size_t size) {
+  MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
+}
+
 Arena::Arena() : bytes_allocated_(0), next_(nullptr) {
 }
 
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 565b416..ace6c38 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -105,6 +105,9 @@
   kArenaAllocStackMapStream,
   kArenaAllocCodeGenerator,
   kArenaAllocParallelMoveResolver,
+  kArenaAllocGraphChecker,
+  kArenaAllocLSE,
+  kArenaAllocVerifier,
   kNumArenaAllocKinds
 };
 
@@ -118,13 +121,13 @@
   ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default;
   ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete;
 
-  void Copy(const ArenaAllocatorStatsImpl& other) { UNUSED(other); }
-  void RecordAlloc(size_t bytes, ArenaAllocKind kind) { UNUSED(bytes, kind); }
+  void Copy(const ArenaAllocatorStatsImpl& other ATTRIBUTE_UNUSED) {}
+  void RecordAlloc(size_t bytes ATTRIBUTE_UNUSED, ArenaAllocKind kind ATTRIBUTE_UNUSED) {}
   size_t NumAllocations() const { return 0u; }
   size_t BytesAllocated() const { return 0u; }
-  void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const {
-    UNUSED(os); UNUSED(first); UNUSED(lost_bytes_adjustment);
-  }
+  void Dump(std::ostream& os ATTRIBUTE_UNUSED,
+            const Arena* first ATTRIBUTE_UNUSED,
+            ssize_t lost_bytes_adjustment ATTRIBUTE_UNUSED) const {}
 };
 
 template <bool kCount>
@@ -180,20 +183,25 @@
   using ArenaAllocatorMemoryToolCheck::IsRunningOnMemoryTool;
 
   void MakeDefined(void* ptr, size_t size) {
-    if (IsRunningOnMemoryTool()) {
-      MEMORY_TOOL_MAKE_DEFINED(ptr, size);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      DoMakeDefined(ptr, size);
     }
   }
   void MakeUndefined(void* ptr, size_t size) {
-    if (IsRunningOnMemoryTool()) {
-      MEMORY_TOOL_MAKE_UNDEFINED(ptr, size);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      DoMakeUndefined(ptr, size);
     }
   }
   void MakeInaccessible(void* ptr, size_t size) {
-    if (IsRunningOnMemoryTool()) {
-      MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      DoMakeInaccessible(ptr, size);
     }
   }
+
+ private:
+  void DoMakeDefined(void* ptr, size_t size);
+  void DoMakeUndefined(void* ptr, size_t size);
+  void DoMakeInaccessible(void* ptr, size_t size);
 };
 
 class Arena {
diff --git a/compiler/utils/arena_bit_vector.cc b/runtime/base/arena_bit_vector.cc
similarity index 100%
rename from compiler/utils/arena_bit_vector.cc
rename to runtime/base/arena_bit_vector.cc
diff --git a/compiler/utils/arena_bit_vector.h b/runtime/base/arena_bit_vector.h
similarity index 92%
rename from compiler/utils/arena_bit_vector.h
rename to runtime/base/arena_bit_vector.h
index f2a7452..d606166 100644
--- a/compiler/utils/arena_bit_vector.h
+++ b/runtime/base/arena_bit_vector.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
-#define ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
+#ifndef ART_RUNTIME_BASE_ARENA_BIT_VECTOR_H_
+#define ART_RUNTIME_BASE_ARENA_BIT_VECTOR_H_
 
 #include "base/arena_object.h"
 #include "base/bit_vector.h"
@@ -65,4 +65,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
+#endif  // ART_RUNTIME_BASE_ARENA_BIT_VECTOR_H_
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index e7ea09d..e2d4c24 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -20,9 +20,12 @@
 #include <deque>
 #include <queue>
 #include <set>
-#include <vector>
+#include <utility>
 
 #include "arena_allocator.h"
+#include "base/dchecked_vector.h"
+#include "hash_map.h"
+#include "hash_set.h"
 #include "safe_map.h"
 
 namespace art {
@@ -48,7 +51,7 @@
 using ArenaQueue = std::queue<T, ArenaDeque<T>>;
 
 template <typename T>
-using ArenaVector = std::vector<T, ArenaAllocatorAdapter<T>>;
+using ArenaVector = dchecked_vector<T, ArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
 using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>;
@@ -57,6 +60,24 @@
 using ArenaSafeMap =
     SafeMap<K, V, Comparator, ArenaAllocatorAdapter<std::pair<const K, V>>>;
 
+template <typename T,
+          typename EmptyFn = DefaultEmptyFn<T>,
+          typename HashFn = std::hash<T>,
+          typename Pred = std::equal_to<T>>
+using ArenaHashSet = HashSet<T, EmptyFn, HashFn, Pred, ArenaAllocatorAdapter<T>>;
+
+template <typename Key,
+          typename Value,
+          typename EmptyFn = DefaultEmptyFn<std::pair<Key, Value>>,
+          typename HashFn = std::hash<Key>,
+          typename Pred = std::equal_to<Key>>
+using ArenaHashMap = HashMap<Key,
+                             Value,
+                             EmptyFn,
+                             HashFn,
+                             Pred,
+                             ArenaAllocatorAdapter<std::pair<Key, Value>>>;
+
 // Implementation details below.
 
 template <bool kCount>
@@ -155,8 +176,8 @@
   pointer address(reference x) const { return &x; }
   const_pointer address(const_reference x) const { return &x; }
 
-  pointer allocate(size_type n, ArenaAllocatorAdapter<void>::pointer hint = nullptr) {
-    UNUSED(hint);
+  pointer allocate(size_type n,
+                   ArenaAllocatorAdapter<void>::pointer hint ATTRIBUTE_UNUSED = nullptr) {
     DCHECK_LE(n, max_size());
     return arena_allocator_->AllocArray<T>(n, ArenaAllocatorAdapterKind::Kind());
   }
@@ -164,11 +185,13 @@
     arena_allocator_->MakeInaccessible(p, sizeof(T) * n);
   }
 
-  void construct(pointer p, const_reference val) {
-    new (static_cast<void*>(p)) value_type(val);
+  template <typename U, typename... Args>
+  void construct(U* p, Args&&... args) {
+    ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
   }
-  void destroy(pointer p) {
-    p->~value_type();
+  template <typename U>
+  void destroy(U* p) {
+    p->~U();
   }
 
  private:
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index cfd3d24..5e97a63 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -312,10 +312,6 @@
   }
 }
 
-#if defined(__clang__) && defined(__ARM_64BIT_STATE)
-// b/19180814 When POPCOUNT is inlined, boot up failed on arm64 devices.
-__attribute__((optnone))
-#endif
 uint32_t BitVector::NumSetBits(const uint32_t* storage, uint32_t end) {
   uint32_t word_end = WordIndex(end);
   uint32_t partial_word_bits = end & 0x1f;
diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h
new file mode 100644
index 0000000..2bd12df
--- /dev/null
+++ b/runtime/base/dchecked_vector.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_DCHECKED_VECTOR_H_
+#define ART_RUNTIME_BASE_DCHECKED_VECTOR_H_
+
+#include <algorithm>
+#include <type_traits>
+#include <vector>
+
+#include "base/logging.h"
+
+namespace art {
+
+// Template class serving as a replacement for std::vector<> but adding
+// DCHECK()s for the subscript operator, front(), back(), pop_back(),
+// and for insert()/emplace()/erase() positions.
+//
+// Note: The element accessor at() is specified as throwing std::out_of_range
+// but we do not use exceptions, so this accessor is deliberately hidden.
+// Note: The common pattern &v[0] used to retrieve pointer to the data is not
+// valid for an empty dchecked_vector<>. Use data() to avoid checking empty().
+template <typename T, typename Alloc>
+class dchecked_vector : private std::vector<T, Alloc> {
+ private:
+  // std::vector<> has a slightly different specialization for bool. We don't provide that.
+  static_assert(!std::is_same<T, bool>::value, "Not implemented for bool.");
+  using Base = std::vector<T, Alloc>;
+
+ public:
+  using typename Base::value_type;
+  using typename Base::allocator_type;
+  using typename Base::reference;
+  using typename Base::const_reference;
+  using typename Base::pointer;
+  using typename Base::const_pointer;
+  using typename Base::iterator;
+  using typename Base::const_iterator;
+  using typename Base::reverse_iterator;
+  using typename Base::const_reverse_iterator;
+  using typename Base::size_type;
+  using typename Base::difference_type;
+
+  // Construct/copy/destroy.
+  dchecked_vector()
+      : Base() { }
+  explicit dchecked_vector(const allocator_type& alloc)
+      : Base(alloc) { }
+  // Note that we cannot forward to std::vector(size_type, const allocator_type&) because it is not
+  // available in C++11, which is the latest GCC can support. http://b/25022512
+  explicit dchecked_vector(size_type n, const allocator_type& alloc = allocator_type())
+      : Base(alloc) { resize(n); }
+  dchecked_vector(size_type n,
+                  const value_type& value,
+                  const allocator_type& alloc = allocator_type())
+      : Base(n, value, alloc) { }
+  template <typename InputIterator>
+  dchecked_vector(InputIterator first,
+                  InputIterator last,
+                  const allocator_type& alloc = allocator_type())
+      : Base(first, last, alloc) { }
+  dchecked_vector(const dchecked_vector& src)
+      : Base(src) { }
+  dchecked_vector(const dchecked_vector& src, const allocator_type& alloc)
+      : Base(src, alloc) { }
+  dchecked_vector(dchecked_vector&& src)
+      : Base(std::move(src)) { }
+  dchecked_vector(dchecked_vector&& src, const allocator_type& alloc)
+      : Base(std::move(src), alloc) { }
+  dchecked_vector(std::initializer_list<value_type> il,
+                  const allocator_type& alloc = allocator_type())
+      : Base(il, alloc) { }
+  ~dchecked_vector() = default;
+  dchecked_vector& operator=(const dchecked_vector& src) {
+    Base::operator=(src);
+    return *this;
+  }
+  dchecked_vector& operator=(dchecked_vector&& src) {
+    Base::operator=(std::move(src));
+    return *this;
+  }
+  dchecked_vector& operator=(std::initializer_list<value_type> il) {
+    Base::operator=(il);
+    return *this;
+  }
+
+  // Iterators.
+  using Base::begin;
+  using Base::end;
+  using Base::rbegin;
+  using Base::rend;
+  using Base::cbegin;
+  using Base::cend;
+  using Base::crbegin;
+  using Base::crend;
+
+  // Capacity.
+  using Base::size;
+  using Base::max_size;
+  using Base::resize;
+  using Base::capacity;
+  using Base::empty;
+  using Base::reserve;
+  using Base::shrink_to_fit;
+
+  // Element access: inherited.
+  // Note: Deliberately not providing at().
+  using Base::data;
+
+  // Element access: subscript operator. Check index.
+  reference operator[](size_type n) {
+    DCHECK_LT(n, size());
+    return Base::operator[](n);
+  }
+  const_reference operator[](size_type n) const {
+    DCHECK_LT(n, size());
+    return Base::operator[](n);
+  }
+
+  // Element access: front(), back(). Check not empty.
+  reference front() { DCHECK(!empty()); return Base::front(); }
+  const_reference front() const { DCHECK(!empty()); return Base::front(); }
+  reference back() { DCHECK(!empty()); return Base::back(); }
+  const_reference back() const { DCHECK(!empty()); return Base::back(); }
+
+  // Modifiers: inherited.
+  using Base::assign;
+  using Base::push_back;
+  using Base::clear;
+  using Base::emplace_back;
+
+  // Modifiers: pop_back(). Check not empty.
+  void pop_back() { DCHECK(!empty()); Base::pop_back(); }
+
+  // Modifiers: swap(). Swap only with another dchecked_vector instead of a plain vector.
+  void swap(dchecked_vector& other) { Base::swap(other); }
+
+  // Modifiers: insert(). Check position.
+  iterator insert(const_iterator position, const value_type& value) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, value);
+  }
+  iterator insert(const_iterator position, size_type n, const value_type& value) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, n, value);
+  }
+  template <typename InputIterator>
+  iterator insert(const_iterator position, InputIterator first, InputIterator last) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, first, last);
+  }
+  iterator insert(const_iterator position, value_type&& value) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, std::move(value));
+  }
+  iterator insert(const_iterator position, std::initializer_list<value_type> il) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, il);
+  }
+
+  // Modifiers: erase(). Check position.
+  iterator erase(const_iterator position) {
+    DCHECK(cbegin() <= position && position < cend());
+    return Base::erase(position);
+  }
+  iterator erase(const_iterator first, const_iterator last) {
+    DCHECK(cbegin() <= first && first <= cend());
+    DCHECK(first <= last && last <= cend());
+    return Base::erase(first, last);
+  }
+
+  // Modifiers: emplace(). Check position.
+  template <typename... Args>
+  iterator emplace(const_iterator position, Args&&... args) {
+    DCHECK(cbegin() <= position && position <= cend());
+    Base::emplace(position, std::forward(args...));
+  }
+
+  // Allocator.
+  using Base::get_allocator;
+};
+
+// Non-member swap(), found by argument-dependent lookup for an unqualified call.
+template <typename T, typename Alloc>
+void swap(dchecked_vector<T, Alloc>& lhs, dchecked_vector<T, Alloc>& rhs) {
+  lhs.swap(rhs);
+}
+
+// Non-member relational operators.
+template <typename T, typename Alloc>
+bool operator==(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+template <typename T, typename Alloc>
+bool operator!=(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return !(lhs == rhs);
+}
+template <typename T, typename Alloc>
+bool operator<(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+template <typename T, typename Alloc>
+bool operator<=(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return !(rhs < lhs);
+}
+template <typename T, typename Alloc>
+bool operator>(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return rhs < lhs;
+}
+template <typename T, typename Alloc>
+bool operator>=(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return !(lhs < rhs);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_DCHECKED_VECTOR_H_
diff --git a/runtime/base/debug_stack.h b/runtime/base/debug_stack.h
index 03f4575..e19aecb 100644
--- a/runtime/base/debug_stack.h
+++ b/runtime/base/debug_stack.h
@@ -54,7 +54,7 @@
 template <>
 class DebugStackReferenceImpl<false> {
  public:
-  explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<false>* counter) { UNUSED(counter); }
+  explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<false>* counter ATTRIBUTE_UNUSED) {}
   DebugStackReferenceImpl(const DebugStackReferenceImpl& other) = default;
   DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) = default;
   void CheckTop() { }
@@ -63,7 +63,7 @@
 template <>
 class DebugStackIndirectTopRefImpl<false> {
  public:
-  explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<false>* ref) { UNUSED(ref); }
+  explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<false>* ref ATTRIBUTE_UNUSED) {}
   DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) = default;
   DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) = default;
   void CheckTop() { }
diff --git a/runtime/base/hash_map.h b/runtime/base/hash_map.h
index eab80ff..b18d586 100644
--- a/runtime/base/hash_map.h
+++ b/runtime/base/hash_map.h
@@ -51,8 +51,22 @@
 template <class Key, class Value, class EmptyFn,
     class HashFn = std::hash<Key>, class Pred = std::equal_to<Key>,
     class Alloc = std::allocator<std::pair<Key, Value>>>
-class HashMap : public HashSet<std::pair<Key, Value>, EmptyFn, HashMapWrapper<HashFn>,
-                               HashMapWrapper<Pred>, Alloc> {
+class HashMap : public HashSet<std::pair<Key, Value>,
+                               EmptyFn,
+                               HashMapWrapper<HashFn>,
+                               HashMapWrapper<Pred>,
+                               Alloc> {
+ private:
+  using Base = HashSet<std::pair<Key, Value>,
+                       EmptyFn,
+                       HashMapWrapper<HashFn>,
+                       HashMapWrapper<Pred>,
+                       Alloc>;
+
+ public:
+  HashMap() : Base() { }
+  explicit HashMap(const Alloc& alloc)
+      : Base(alloc) { }
 };
 
 }  // namespace art
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index d110fe3..95baa82 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_BASE_HASH_SET_H_
 
 #include <functional>
+#include <iterator>
 #include <memory>
 #include <stdint.h>
 #include <utility>
@@ -45,7 +46,7 @@
   void MakeEmpty(T*& item) const {
     item = nullptr;
   }
-  bool IsEmpty(const T*& item) const {
+  bool IsEmpty(T* const& item) const {
     return item == nullptr;
   }
 };
@@ -59,7 +60,7 @@
     class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
 class HashSet {
   template <class Elem, class HashSetType>
-  class BaseIterator {
+  class BaseIterator : std::iterator<std::forward_iterator_tag, Elem> {
    public:
     BaseIterator(const BaseIterator&) = default;
     BaseIterator(BaseIterator&&) = default;
@@ -82,7 +83,7 @@
     }
 
     BaseIterator operator++(int) {
-      Iterator temp = *this;
+      BaseIterator temp = *this;
       this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
       return temp;
     }
@@ -96,7 +97,7 @@
       return &**this;
     }
 
-    // TODO: Operator -- --(int)
+    // TODO: Operator -- --(int)  (and use std::bidirectional_iterator_tag)
 
    private:
     size_t index_;
@@ -115,34 +116,91 @@
   };
 
  public:
-  static constexpr double kDefaultMinLoadFactor = 0.5;
-  static constexpr double kDefaultMaxLoadFactor = 0.9;
-  static constexpr size_t kMinBuckets = 1000;
+  using value_type = T;
+  using allocator_type = Alloc;
+  using reference = T&;
+  using const_reference = const T&;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using iterator = BaseIterator<T, HashSet>;
+  using const_iterator = BaseIterator<const T, const HashSet>;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
 
-  typedef BaseIterator<T, HashSet> Iterator;
-  typedef BaseIterator<const T, const HashSet> ConstIterator;
+  static constexpr double kDefaultMinLoadFactor = 0.4;
+  static constexpr double kDefaultMaxLoadFactor = 0.7;
+  static constexpr size_t kMinBuckets = 1000;
 
   // If we don't own the data, this will create a new array which owns the data.
   void Clear() {
     DeallocateStorage();
-    AllocateStorage(1);
     num_elements_ = 0;
     elements_until_expand_ = 0;
   }
 
-  HashSet() : num_elements_(0), num_buckets_(0), owns_data_(false), data_(nullptr),
-      min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
-    Clear();
+  HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
+
+  HashSet(double min_load_factor, double max_load_factor)
+      : num_elements_(0u),
+        num_buckets_(0u),
+        elements_until_expand_(0u),
+        owns_data_(false),
+        data_(nullptr),
+        min_load_factor_(min_load_factor),
+        max_load_factor_(max_load_factor) {
+    DCHECK_GT(min_load_factor, 0.0);
+    DCHECK_LT(max_load_factor, 1.0);
   }
 
-  HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), owns_data_(false),
-      data_(nullptr) {
-    *this = other;
+  explicit HashSet(const allocator_type& alloc)
+      : allocfn_(alloc),
+        hashfn_(),
+        emptyfn_(),
+        pred_(),
+        num_elements_(0u),
+        num_buckets_(0u),
+        elements_until_expand_(0u),
+        owns_data_(false),
+        data_(nullptr),
+        min_load_factor_(kDefaultMinLoadFactor),
+        max_load_factor_(kDefaultMaxLoadFactor) {
   }
 
-  HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), owns_data_(false),
-      data_(nullptr) {
-    *this = std::move(other);
+  HashSet(const HashSet& other)
+      : allocfn_(other.allocfn_),
+        hashfn_(other.hashfn_),
+        emptyfn_(other.emptyfn_),
+        pred_(other.pred_),
+        num_elements_(other.num_elements_),
+        num_buckets_(0),
+        elements_until_expand_(other.elements_until_expand_),
+        owns_data_(false),
+        data_(nullptr),
+        min_load_factor_(other.min_load_factor_),
+        max_load_factor_(other.max_load_factor_) {
+    AllocateStorage(other.NumBuckets());
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      ElementForIndex(i) = other.data_[i];
+    }
+  }
+
+  HashSet(HashSet&& other)
+      : allocfn_(std::move(other.allocfn_)),
+        hashfn_(std::move(other.hashfn_)),
+        emptyfn_(std::move(other.emptyfn_)),
+        pred_(std::move(other.pred_)),
+        num_elements_(other.num_elements_),
+        num_buckets_(other.num_buckets_),
+        elements_until_expand_(other.elements_until_expand_),
+        owns_data_(other.owns_data_),
+        data_(other.data_),
+        min_load_factor_(other.min_load_factor_),
+        max_load_factor_(other.max_load_factor_) {
+    other.num_elements_ = 0u;
+    other.num_buckets_ = 0u;
+    other.elements_until_expand_ = 0u;
+    other.owns_data_ = false;
+    other.data_ = nullptr;
   }
 
   // Construct from existing data.
@@ -199,32 +257,18 @@
   }
 
   HashSet& operator=(HashSet&& other) {
-    std::swap(data_, other.data_);
-    std::swap(num_buckets_, other.num_buckets_);
-    std::swap(num_elements_, other.num_elements_);
-    std::swap(elements_until_expand_, other.elements_until_expand_);
-    std::swap(min_load_factor_, other.min_load_factor_);
-    std::swap(max_load_factor_, other.max_load_factor_);
-    std::swap(owns_data_, other.owns_data_);
+    HashSet(std::move(other)).swap(*this);
     return *this;
   }
 
   HashSet& operator=(const HashSet& other) {
-    DeallocateStorage();
-    AllocateStorage(other.NumBuckets());
-    for (size_t i = 0; i < num_buckets_; ++i) {
-      ElementForIndex(i) = other.data_[i];
-    }
-    num_elements_ = other.num_elements_;
-    elements_until_expand_ = other.elements_until_expand_;
-    min_load_factor_ = other.min_load_factor_;
-    max_load_factor_ = other.max_load_factor_;
+    HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
     return *this;
   }
 
   // Lower case for c++11 for each.
-  Iterator begin() {
-    Iterator ret(this, 0);
+  iterator begin() {
+    iterator ret(this, 0);
     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
       ++ret;  // Skip all the empty slots.
     }
@@ -232,8 +276,8 @@
   }
 
   // Lower case for c++11 for each. const version.
-  ConstIterator begin() const {
-    ConstIterator ret(this, 0);
+  const_iterator begin() const {
+    const_iterator ret(this, 0);
     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
       ++ret;  // Skip all the empty slots.
     }
@@ -241,13 +285,13 @@
   }
 
   // Lower case for c++11 for each.
-  Iterator end() {
-    return Iterator(this, NumBuckets());
+  iterator end() {
+    return iterator(this, NumBuckets());
   }
 
   // Lower case for c++11 for each. const version.
-  ConstIterator end() const {
-    return ConstIterator(this, NumBuckets());
+  const_iterator end() const {
+    return const_iterator(this, NumBuckets());
   }
 
   bool Empty() {
@@ -262,7 +306,7 @@
   // and set the empty slot to be the location we just moved from.
   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
   // element to its actual location/index.
-  Iterator Erase(Iterator it) {
+  iterator Erase(iterator it) {
     // empty_index is the index that will become empty.
     size_t empty_index = it.index_;
     DCHECK(!IsFreeSlot(empty_index));
@@ -313,23 +357,23 @@
   // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
   // object in the heap for performance solution.
   template <typename K>
-  Iterator Find(const K& key) {
+  iterator Find(const K& key) {
     return FindWithHash(key, hashfn_(key));
   }
 
   template <typename K>
-  ConstIterator Find(const K& key) const {
+  const_iterator Find(const K& key) const {
     return FindWithHash(key, hashfn_(key));
   }
 
   template <typename K>
-  Iterator FindWithHash(const K& key, size_t hash) {
-    return Iterator(this, FindIndex(key, hash));
+  iterator FindWithHash(const K& key, size_t hash) {
+    return iterator(this, FindIndex(key, hash));
   }
 
   template <typename K>
-  ConstIterator FindWithHash(const K& key, size_t hash) const {
-    return ConstIterator(this, FindIndex(key, hash));
+  const_iterator FindWithHash(const K& key, size_t hash) const {
+    return const_iterator(this, FindIndex(key, hash));
   }
 
   // Insert an element, allows duplicates.
@@ -352,10 +396,43 @@
     return num_elements_;
   }
 
+  void swap(HashSet& other) {
+    // Use argument-dependent lookup with fall-back to std::swap() for function objects.
+    using std::swap;
+    swap(allocfn_, other.allocfn_);
+    swap(hashfn_, other.hashfn_);
+    swap(emptyfn_, other.emptyfn_);
+    swap(pred_, other.pred_);
+    std::swap(data_, other.data_);
+    std::swap(num_buckets_, other.num_buckets_);
+    std::swap(num_elements_, other.num_elements_);
+    std::swap(elements_until_expand_, other.elements_until_expand_);
+    std::swap(min_load_factor_, other.min_load_factor_);
+    std::swap(max_load_factor_, other.max_load_factor_);
+    std::swap(owns_data_, other.owns_data_);
+  }
+
+  allocator_type get_allocator() const {
+    return allocfn_;
+  }
+
   void ShrinkToMaximumLoad() {
     Resize(Size() / max_load_factor_);
   }
 
+  // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
+  // set. No-op if the hash set is already large enough to do this.
+  void Reserve(size_t num_elements) {
+    size_t num_buckets = num_elements / max_load_factor_;
+    // Deal with rounding errors. Add one for rounding.
+    while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
+      ++num_buckets;
+    }
+    if (num_buckets > NumBuckets()) {
+      Resize(num_buckets);
+    }
+  }
+
   // To distance that inserted elements were probed. Used for measuring how good hash functions
   // are.
   size_t TotalProbeDistance() const {
@@ -399,6 +476,40 @@
     return errors;
   }
 
+  double GetMinLoadFactor() const {
+    return min_load_factor_;
+  }
+
+  double GetMaxLoadFactor() const {
+    return max_load_factor_;
+  }
+
+  // Change the load factor of the hash set. If the current load factor is greater than the max
+  // specified, then we resize the hash table storage.
+  void SetLoadFactor(double min_load_factor, double max_load_factor) {
+    DCHECK_LT(min_load_factor, max_load_factor);
+    DCHECK_GT(min_load_factor, 0.0);
+    DCHECK_LT(max_load_factor, 1.0);
+    min_load_factor_ = min_load_factor;
+    max_load_factor_ = max_load_factor;
+    elements_until_expand_ = NumBuckets() * max_load_factor_;
+    // If the current load factor isn't in the range, then resize to the mean of the minimum and
+    // maximum load factor.
+    const double load_factor = CalculateLoadFactor();
+    if (load_factor > max_load_factor_) {
+      Resize(Size() / ((min_load_factor_ + max_load_factor_) * 0.5));
+    }
+  }
+
+  // The hash set expands when Size() reaches ElementsUntilExpand().
+  size_t ElementsUntilExpand() const {
+    return elements_until_expand_;
+  }
+
+  size_t NumBuckets() const {
+    return num_buckets_;
+  }
+
  private:
   T& ElementForIndex(size_t index) {
     DCHECK_LT(index, NumBuckets());
@@ -429,7 +540,7 @@
   }
 
   // Find the hash table slot for an element, or return NumBuckets() if not found.
-  // This value for not found is important so that Iterator(this, FindIndex(...)) == end().
+  // This value for not found is important so that iterator(this, FindIndex(...)) == end().
   template <typename K>
   size_t FindIndex(const K& element, size_t hash) const {
     // Guard against failing to get an element for a non-existing index.
@@ -454,10 +565,6 @@
     return emptyfn_.IsEmpty(ElementForIndex(index));
   }
 
-  size_t NumBuckets() const {
-    return num_buckets_;
-  }
-
   // Allocate a number of buckets.
   void AllocateStorage(size_t num_buckets) {
     num_buckets_ = num_buckets;
@@ -560,6 +667,12 @@
   double max_load_factor_;
 };
 
+template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
+void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
+          HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
+  lhs.swap(rhs);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_HASH_SET_H_
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index 6d2c5e0..8254063 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -196,6 +196,24 @@
   }
 }
 
+TEST_F(HashSetTest, TestLoadFactor) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  static constexpr size_t kStringCount = 1000;
+  static constexpr double kEpsilon = 0.01;
+  for (size_t i = 0; i < kStringCount; ++i) {
+    hash_set.Insert(RandomString(i % 10 + 1));
+  }
+  // Check that changing the load factor resizes the table to be within the target range.
+  EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
+  EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
+  hash_set.SetLoadFactor(0.1, 0.3);
+  EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
+  EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
+  EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
+  hash_set.SetLoadFactor(0.6, 0.8);
+  EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
+}
+
 TEST_F(HashSetTest, TestStress) {
   HashSet<std::string, IsEmptyFnString> hash_set;
   std::unordered_multiset<std::string> std_set;
@@ -315,4 +333,25 @@
   ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
 }
 
+TEST_F(HashSetTest, TestReserve) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
+  for (size_t size : sizes) {
+    hash_set.Reserve(size);
+    const size_t buckets_before = hash_set.NumBuckets();
+    // Check that we expanded enough.
+    CHECK_GE(hash_set.ElementsUntilExpand(), size);
+    // Try inserting elements until we are at our reserve size and ensure the hash set did not
+    // expand.
+    while (hash_set.Size() < size) {
+      hash_set.Insert(std::to_string(hash_set.Size()));
+    }
+    CHECK_EQ(hash_set.NumBuckets(), buckets_before);
+  }
+  // Check the behaviour for shrinking, it does not necessarily resize down.
+  constexpr size_t size = 100;
+  hash_set.Reserve(size);
+  CHECK_GE(hash_set.ElementsUntilExpand(), size);
+}
+
 }  // namespace art
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b2c5677..70bd398 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -50,6 +50,8 @@
 Mutex* Locks::modify_ldt_lock_ = nullptr;
 MutatorMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
+ReaderWriterMutex* Locks::oat_file_manager_lock_ = nullptr;
+ReaderWriterMutex* Locks::oat_file_count_lock_ = nullptr;
 Mutex* Locks::reference_processor_lock_ = nullptr;
 Mutex* Locks::reference_queue_cleared_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_finalizer_references_lock_ = nullptr;
@@ -940,6 +942,8 @@
     DCHECK(classlinker_classes_lock_ != nullptr);
     DCHECK(deoptimization_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
+    DCHECK(oat_file_manager_lock_ != nullptr);
+    DCHECK(oat_file_count_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
     DCHECK(jni_libraries_lock_ != nullptr);
     DCHECK(logging_lock_ != nullptr);
@@ -1028,6 +1032,14 @@
       modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level);
     }
 
+    UPDATE_CURRENT_LOCK_LEVEL(kOatFileManagerLock);
+    DCHECK(oat_file_manager_lock_ == nullptr);
+    oat_file_manager_lock_ = new ReaderWriterMutex("OatFile manager lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kOatFileCountLock);
+    DCHECK(oat_file_count_lock_ == nullptr);
+    oat_file_count_lock_ = new ReaderWriterMutex("OatFile count lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
     intern_table_lock_ = new Mutex("InternTable lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3da806b..d4c9057 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -83,6 +83,8 @@
   kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
+  kOatFileCountLock,
+  kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
   kDefaultMutexLevel,
@@ -644,8 +646,14 @@
   // Guards modification of the LDT on x86.
   static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
 
+  // Guards opened oat files in OatFileManager.
+  static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
+
+  // Guards opened oat files in OatFileManager.
+  static ReaderWriterMutex* oat_file_count_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
+
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(oat_file_count_lock_);
 
   // Guards reference processor.
   static Mutex* reference_processor_lock_ ACQUIRED_AFTER(intern_table_lock_);
diff --git a/runtime/base/scoped_arena_allocator.h b/runtime/base/scoped_arena_allocator.h
index 4f51370..a30c73d 100644
--- a/runtime/base/scoped_arena_allocator.h
+++ b/runtime/base/scoped_arena_allocator.h
@@ -31,6 +31,16 @@
 template <typename T>
 class ScopedArenaAllocatorAdapter;
 
+// Tag associated with each allocation to help prevent double free.
+enum class ArenaFreeTag : uint8_t {
+  // Allocation is used and has not yet been destroyed.
+  kUsed,
+  // Allocation has been destroyed.
+  kFree,
+};
+
+static constexpr size_t kArenaAlignment = 8;
+
 // Holds a list of Arenas for use by ScopedArenaAllocator stack.
 class ArenaStack : private DebugStackRefCounter, private ArenaAllocatorMemoryTool {
  public:
@@ -50,6 +60,12 @@
 
   MemStats GetPeakStats() const;
 
+  // Return the arena tag associated with a pointer.
+  static ArenaFreeTag& ArenaTagForAllocation(void* ptr) {
+    DCHECK(kIsDebugBuild) << "Only debug builds have tags";
+    return *(reinterpret_cast<ArenaFreeTag*>(ptr) - 1);
+  }
+
  private:
   struct Peak;
   struct Current;
@@ -72,13 +88,18 @@
     if (UNLIKELY(IsRunningOnMemoryTool())) {
       return AllocWithMemoryTool(bytes, kind);
     }
-    size_t rounded_bytes = RoundUp(bytes, 8);
+    // Add kArenaAlignment for the free or used tag. Required to preserve alignment.
+    size_t rounded_bytes = RoundUp(bytes + (kIsDebugBuild ? kArenaAlignment : 0u), kArenaAlignment);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
     }
     CurrentStats()->RecordAlloc(bytes, kind);
     top_ptr_ = ptr + rounded_bytes;
+    if (kIsDebugBuild) {
+      ptr += kArenaAlignment;
+      ArenaTagForAllocation(ptr) = ArenaFreeTag::kUsed;
+    }
     return ptr;
   }
 
@@ -132,7 +153,7 @@
   ScopedArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL);
 
   // Allow a delete-expression to destroy but not deallocate allocators created by Create().
-  static void operator delete(void* ptr) { UNUSED(ptr); }
+  static void operator delete(void* ptr ATTRIBUTE_UNUSED) {}
 
  private:
   ArenaStack* const arena_stack_;
diff --git a/runtime/base/scoped_arena_containers.h b/runtime/base/scoped_arena_containers.h
index eecc55f..1236585 100644
--- a/runtime/base/scoped_arena_containers.h
+++ b/runtime/base/scoped_arena_containers.h
@@ -20,10 +20,12 @@
 #include <deque>
 #include <queue>
 #include <set>
+#include <type_traits>
 #include <unordered_map>
-#include <vector>
+#include <utility>
 
 #include "arena_containers.h"  // For ArenaAllocatorAdapterKind.
+#include "base/dchecked_vector.h"
 #include "scoped_arena_allocator.h"
 #include "safe_map.h"
 
@@ -47,7 +49,7 @@
 using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>;
 
 template <typename T>
-using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
+using ScopedArenaVector = dchecked_vector<T, ScopedArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
 using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>;
@@ -145,8 +147,8 @@
   pointer address(reference x) const { return &x; }
   const_pointer address(const_reference x) const { return &x; }
 
-  pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) {
-    UNUSED(hint);
+  pointer allocate(size_type n,
+                   ScopedArenaAllocatorAdapter<void>::pointer hint ATTRIBUTE_UNUSED = nullptr) {
     DCHECK_LE(n, max_size());
     DebugStackIndirectTopRef::CheckTop();
     return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T),
@@ -157,13 +159,15 @@
     arena_stack_->MakeInaccessible(p, sizeof(T) * n);
   }
 
-  void construct(pointer p, const_reference val) {
+  template <typename U, typename... Args>
+  void construct(U* p, Args&&... args) {
     // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
-    new (static_cast<void*>(p)) value_type(val);
+    ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
   }
-  void destroy(pointer p) {
+  template <typename U>
+  void destroy(U* p) {
     // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
-    p->~value_type();
+    p->~U();
   }
 
  private:
@@ -193,6 +197,47 @@
   return ScopedArenaAllocatorAdapter<void>(this, kind);
 }
 
+// Special deleter that only calls the destructor. Also checks for double free errors.
+template <typename T>
+class ArenaDelete {
+  static constexpr uint8_t kMagicFill = 0xCE;
+ public:
+  void operator()(T* ptr) const {
+    ptr->~T();
+    if (RUNNING_ON_MEMORY_TOOL > 0) {
+      // Writing to the memory will fail if it we already destroyed the pointer with
+      // DestroyOnlyDelete since we make it no access.
+      memset(ptr, kMagicFill, sizeof(T));
+      MEMORY_TOOL_MAKE_NOACCESS(ptr, sizeof(T));
+    } else if (kIsDebugBuild) {
+      CHECK(ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) == ArenaFreeTag::kUsed)
+          << "Freeing invalid object " << ptr;
+      ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) = ArenaFreeTag::kFree;
+      // Write a magic value to try and catch use after free error.
+      memset(ptr, kMagicFill, sizeof(T));
+    }
+  }
+};
+
+// In general we lack support for arrays. We would need to call the destructor on each element,
+// which requires access to the array size. Support for that is future work.
+//
+// However, we can support trivially destructible component types, as then a destructor doesn't
+// need to be called.
+template <typename T>
+class ArenaDelete<T[]> {
+ public:
+  void operator()(T* ptr ATTRIBUTE_UNUSED) const {
+    static_assert(std::is_trivially_destructible<T>::value,
+                  "ArenaUniquePtr does not support non-trivially-destructible arrays.");
+    // TODO: Implement debug checks, and MEMORY_TOOL support.
+  }
+};
+
+// Arena unique ptr that only calls the destructor of the element.
+template <typename T>
+using ArenaUniquePtr = std::unique_ptr<T, ArenaDelete<T>>;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_SCOPED_ARENA_CONTAINERS_H_
diff --git a/runtime/base/stringpiece.h b/runtime/base/stringpiece.h
index d793bb6..46743e9 100644
--- a/runtime/base/stringpiece.h
+++ b/runtime/base/stringpiece.h
@@ -148,11 +148,24 @@
 
   StringPiece substr(size_type pos, size_type n = npos) const;
 
+  int Compare(const StringPiece& rhs) const {
+    const int r = memcmp(data(), rhs.data(), std::min(size(), rhs.size()));
+    if (r != 0) {
+      return r;
+    }
+    if (size() < rhs.size()) {
+      return -1;
+    } else if (size() > rhs.size()) {
+      return 1;
+    }
+    return 0;
+  }
+
  private:
   // Pointer to char data, not necessarily zero terminated.
   const char* ptr_;
   // Length of data.
-  size_type   length_;
+  size_type length_;
 };
 
 // This large function is defined inline so that in a fairly common case where
@@ -201,9 +214,7 @@
 }
 
 inline bool operator<(const StringPiece& x, const StringPiece& y) {
-  const int r = memcmp(x.data(), y.data(),
-                       std::min(x.size(), y.size()));
-  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+  return x.Compare(y) < 0;
 }
 
 inline bool operator>(const StringPiece& x, const StringPiece& y) {
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index f1f6f9b..1942e1d 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -125,7 +125,7 @@
     histogram->CreateHistogram(&cumulative_data);
     histogram->PrintConfidenceIntervals(os, 0.99, cumulative_data);
   }
-  os << "Done Dumping histograms \n";
+  os << "Done Dumping histograms\n";
 }
 
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 7965cd7..b9ea475 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -19,6 +19,7 @@
 
 #include "art_method-inl.h"
 #include "gc_map.h"
+#include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
 
@@ -53,7 +54,7 @@
 
   void CheckReferences(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (GetMethod()->IsOptimized(sizeof(void*))) {
+    if (GetCurrentOatQuickMethodHeader()->IsOptimized()) {
       CheckOptimizedMethod(registers, number_of_references, native_pc_offset);
     } else {
       CheckQuickMethod(registers, number_of_references, native_pc_offset);
@@ -64,7 +65,7 @@
   void CheckOptimizedMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
-    CodeInfo code_info = m->GetOptimizedCodeInfo();
+    CodeInfo code_info = GetCurrentOatQuickMethodHeader()->GetOptimizedCodeInfo();
     StackMapEncoding encoding = code_info.ExtractEncoding();
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     uint16_t number_of_dex_registers = m->GetCodeItem()->registers_size_;
@@ -108,7 +109,7 @@
   void CheckQuickMethod(int* registers, int number_of_references, uint32_t native_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
-    NativePcOffsetToReferenceMap map(m->GetNativeGcMap(sizeof(void*)));
+    NativePcOffsetToReferenceMap map(GetCurrentOatQuickMethodHeader()->GetNativeGcMap());
     const uint8_t* ref_bitmap = map.FindBitMap(native_pc_offset);
     CHECK(ref_bitmap);
     for (int i = 0; i < number_of_references; ++i) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index acb39c5..da70456 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -16,12 +16,15 @@
 
 #include "class_linker.h"
 
+#include <algorithm>
 #include <deque>
 #include <iostream>
 #include <memory>
 #include <queue>
 #include <string>
+#include <tuple>
 #include <unistd.h>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -41,24 +44,20 @@
 #include "compiler_callbacks.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
+#include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc_root-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
-#include "handle_scope.h"
+#include "handle_scope-inl.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "leb128.h"
 #include "linear_alloc.h"
-#include "oat.h"
-#include "oat_file.h"
-#include "oat_file-inl.h"
-#include "oat_file_assistant.h"
-#include "object_lock.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -72,12 +71,17 @@
 #include "mirror/reference-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "mirror/string-inl.h"
+#include "native/dalvik_system_DexFile.h"
+#include "oat.h"
+#include "oat_file.h"
+#include "oat_file-inl.h"
+#include "oat_file_assistant.h"
+#include "oat_file_manager.h"
+#include "object_lock.h"
 #include "os.h"
 #include "runtime.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
-#include "handle_scope-inl.h"
 #include "thread-inl.h"
 #include "trace.h"
 #include "utils.h"
@@ -89,9 +93,6 @@
 
 static constexpr bool kSanityCheckObjects = kIsDebugBuild;
 
-// For b/21333911.
-static constexpr bool kDuplicateClassesCheck = false;
-
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_REQUIRES(Locks::mutator_lock_);
@@ -615,10 +616,7 @@
   // initialized.
   {
     const DexFile& dex_file = java_lang_Object->GetDexFile();
-    const DexFile::StringId* void_string_id = dex_file.FindStringId("V");
-    CHECK(void_string_id != nullptr);
-    uint32_t void_string_index = dex_file.GetIndexForStringId(*void_string_id);
-    const DexFile::TypeId* void_type_id = dex_file.FindTypeId(void_string_index);
+    const DexFile::TypeId* void_type_id = dex_file.FindTypeId("V");
     CHECK(void_type_id != nullptr);
     uint16_t void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
     // Now we resolve void type so the dex cache contains it. We use java.lang.Object class
@@ -696,343 +694,6 @@
   }
 }
 
-const OatFile* ClassLinker::RegisterOatFile(const OatFile* oat_file) {
-  WriterMutexLock mu(Thread::Current(), dex_lock_);
-  if (kIsDebugBuild) {
-    for (size_t i = 0; i < oat_files_.size(); ++i) {
-      CHECK_NE(oat_file, oat_files_[i]) << oat_file->GetLocation();
-    }
-  }
-  VLOG(class_linker) << "Registering " << oat_file->GetLocation();
-  oat_files_.push_back(oat_file);
-  return oat_file;
-}
-
-OatFile& ClassLinker::GetImageOatFile(gc::space::ImageSpace* space) {
-  VLOG(startup) << "ClassLinker::GetImageOatFile entering";
-  OatFile* oat_file = space->ReleaseOatFile();
-  CHECK_EQ(RegisterOatFile(oat_file), oat_file);
-  VLOG(startup) << "ClassLinker::GetImageOatFile exiting";
-  return *oat_file;
-}
-
-class DexFileAndClassPair : ValueObject {
- public:
-  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
-     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
-       dex_file_(dex_file),
-       current_class_index_(current_class_index),
-       from_loaded_oat_(from_loaded_oat) {}
-
-  DexFileAndClassPair(const DexFileAndClassPair&) = default;
-
-  DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) {
-    cached_descriptor_ = rhs.cached_descriptor_;
-    dex_file_ = rhs.dex_file_;
-    current_class_index_ = rhs.current_class_index_;
-    from_loaded_oat_ = rhs.from_loaded_oat_;
-    return *this;
-  }
-
-  const char* GetCachedDescriptor() const {
-    return cached_descriptor_;
-  }
-
-  bool operator<(const DexFileAndClassPair& rhs) const {
-    const char* lhsDescriptor = cached_descriptor_;
-    const char* rhsDescriptor = rhs.cached_descriptor_;
-    int cmp = strcmp(lhsDescriptor, rhsDescriptor);
-    if (cmp != 0) {
-      // Note that the order must be reversed. We want to iterate over the classes in dex files.
-      // They are sorted lexicographically. Thus, the priority-queue must be a min-queue.
-      return cmp > 0;
-    }
-    return dex_file_ < rhs.dex_file_;
-  }
-
-  bool DexFileHasMoreClasses() const {
-    return current_class_index_ + 1 < dex_file_->NumClassDefs();
-  }
-
-  DexFileAndClassPair GetNext() const {
-    return DexFileAndClassPair(dex_file_, current_class_index_ + 1, from_loaded_oat_);
-  }
-
-  size_t GetCurrentClassIndex() const {
-    return current_class_index_;
-  }
-
-  bool FromLoadedOat() const {
-    return from_loaded_oat_;
-  }
-
-  const DexFile* GetDexFile() const {
-    return dex_file_;
-  }
-
-  void DeleteDexFile() {
-    delete dex_file_;
-    dex_file_ = nullptr;
-  }
-
- private:
-  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
-    return dex_file->StringByTypeIdx(class_def.class_idx_);
-  }
-
-  const char* cached_descriptor_;
-  const DexFile* dex_file_;
-  size_t current_class_index_;
-  bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
-                          // and what was loaded before. Any old duplicates must have been
-                          // OK, and any new "internal" duplicates are as well (they must
-                          // be from multidex, which resolves correctly).
-};
-
-static void AddDexFilesFromOat(const OatFile* oat_file,
-                               bool already_loaded,
-                               std::priority_queue<DexFileAndClassPair>* heap) {
-  const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
-  for (const OatDexFile* oat_dex_file : oat_dex_files) {
-    std::string error;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
-    if (dex_file.get() == nullptr) {
-      LOG(WARNING) << "Could not create dex file from oat file: " << error;
-    } else {
-      if (dex_file->NumClassDefs() > 0U) {
-        heap->emplace(dex_file.release(), 0U, already_loaded);
-      }
-    }
-  }
-}
-
-static void AddNext(DexFileAndClassPair* original,
-                    std::priority_queue<DexFileAndClassPair>* heap) {
-  if (original->DexFileHasMoreClasses()) {
-    heap->push(original->GetNext());
-  } else {
-    // Need to delete the dex file.
-    original->DeleteDexFile();
-  }
-}
-
-static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) {
-  while (!heap->empty()) {
-    delete heap->top().GetDexFile();
-    heap->pop();
-  }
-}
-
-const OatFile* ClassLinker::GetBootOatFile() {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-  if (image_space == nullptr) {
-    return nullptr;
-  }
-  return image_space->GetOatFile();
-}
-
-const OatFile* ClassLinker::GetPrimaryOatFile() {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-  const OatFile* boot_oat_file = GetBootOatFile();
-  if (boot_oat_file != nullptr) {
-    for (const OatFile* oat_file : oat_files_) {
-      if (oat_file != boot_oat_file) {
-        return oat_file;
-      }
-    }
-  }
-  return nullptr;
-}
-
-// Check for class-def collisions in dex files.
-//
-// This works by maintaining a heap with one class from each dex file, sorted by the class
-// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
-// against the following top element. If the descriptor is the same, it is now checked whether
-// the two elements agree on whether their dex file was from an already-loaded oat-file or the
-// new oat file. Any disagreement indicates a collision.
-bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) {
-  if (!kDuplicateClassesCheck) {
-    return false;
-  }
-
-  // Dex files are registered late - once a class is actually being loaded. We have to compare
-  // against the open oat files. Take the dex_lock_ that protects oat_files_ accesses.
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-
-  std::priority_queue<DexFileAndClassPair> queue;
-
-  // Add dex files from already loaded oat files, but skip boot.
-  {
-    const OatFile* boot_oat = GetBootOatFile();
-    for (const OatFile* loaded_oat_file : oat_files_) {
-      if (loaded_oat_file == boot_oat) {
-        continue;
-      }
-      AddDexFilesFromOat(loaded_oat_file, true, &queue);
-    }
-  }
-
-  if (queue.empty()) {
-    // No other oat files, return early.
-    return false;
-  }
-
-  // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, false, &queue);
-
-  // Now drain the queue.
-  while (!queue.empty()) {
-    DexFileAndClassPair compare_pop = queue.top();
-    queue.pop();
-
-    // Compare against the following elements.
-    while (!queue.empty()) {
-      DexFileAndClassPair top = queue.top();
-
-      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
-        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
-        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
-          *error_msg =
-              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
-                           compare_pop.GetCachedDescriptor(),
-                           compare_pop.GetDexFile()->GetLocation().c_str(),
-                           top.GetDexFile()->GetLocation().c_str());
-          FreeDexFilesInHeap(&queue);
-          return true;
-        }
-        // Pop it.
-        queue.pop();
-        AddNext(&top, &queue);
-      } else {
-        // Something else. Done here.
-        break;
-      }
-    }
-    AddNext(&compare_pop, &queue);
-  }
-
-  return false;
-}
-
-std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat(
-    const char* dex_location, const char* oat_location,
-    std::vector<std::string>* error_msgs) {
-  CHECK(error_msgs != nullptr);
-
-  // Verify we aren't holding the mutator lock, which could starve GC if we
-  // have to generate or relocate an oat file.
-  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
-
-  OatFileAssistant oat_file_assistant(dex_location, oat_location, kRuntimeISA,
-     !Runtime::Current()->IsAotCompiler());
-
-  // Lock the target oat location to avoid races generating and loading the
-  // oat file.
-  std::string error_msg;
-  if (!oat_file_assistant.Lock(&error_msg)) {
-    // Don't worry too much if this fails. If it does fail, it's unlikely we
-    // can generate an oat file anyway.
-    VLOG(class_linker) << "OatFileAssistant::Lock: " << error_msg;
-  }
-
-  // Check if we already have an up-to-date oat file open.
-  const OatFile* source_oat_file = nullptr;
-  {
-    ReaderMutexLock mu(Thread::Current(), dex_lock_);
-    for (const OatFile* oat_file : oat_files_) {
-      CHECK(oat_file != nullptr);
-      if (oat_file_assistant.GivenOatFileIsUpToDate(*oat_file)) {
-        source_oat_file = oat_file;
-        break;
-      }
-    }
-  }
-
-  // If we didn't have an up-to-date oat file open, try to load one from disk.
-  if (source_oat_file == nullptr) {
-    // Update the oat file on disk if we can. This may fail, but that's okay.
-    // Best effort is all that matters here.
-    if (!oat_file_assistant.MakeUpToDate(&error_msg)) {
-      LOG(WARNING) << error_msg;
-    }
-
-    // Get the oat file on disk.
-    std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
-    if (oat_file.get() != nullptr) {
-      // Take the file only if it has no collisions, or we must take it because of preopting.
-      bool accept_oat_file = !HasCollisions(oat_file.get(), &error_msg);
-      if (!accept_oat_file) {
-        // Failed the collision check. Print warning.
-        if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-          LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
-                       << dex_location;
-        } else {
-          LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
-                          " load classes for " << dex_location;
-        }
-        LOG(WARNING) << error_msg;
-
-        // However, if the app was part of /system and preopted, there is no original dex file
-        // available. In that case grudgingly accept the oat file.
-        if (!DexFile::MaybeDex(dex_location)) {
-          accept_oat_file = true;
-          LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
-                       << "Allow oat file use. This is potentially dangerous.";
-        }
-      }
-
-      if (accept_oat_file) {
-        source_oat_file = oat_file.release();
-        RegisterOatFile(source_oat_file);
-      }
-    }
-  }
-
-  std::vector<std::unique_ptr<const DexFile>> dex_files;
-
-  // Load the dex files from the oat file.
-  if (source_oat_file != nullptr) {
-    dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
-    if (dex_files.empty()) {
-      error_msgs->push_back("Failed to open dex files from "
-          + source_oat_file->GetLocation());
-    }
-  }
-
-  // Fall back to running out of the original dex file if we couldn't load any
-  // dex_files from the oat file.
-  if (dex_files.empty()) {
-    if (oat_file_assistant.HasOriginalDexFiles()) {
-      if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-        if (!DexFile::Open(dex_location, dex_location, &error_msg, &dex_files)) {
-          LOG(WARNING) << error_msg;
-          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
-        }
-      } else {
-        error_msgs->push_back("Fallback mode disabled, skipping dex files.");
-      }
-    } else {
-      error_msgs->push_back("No original dex files found for dex location "
-          + std::string(dex_location));
-    }
-  }
-  return dex_files;
-}
-
-const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-  for (size_t i = 0; i < oat_files_.size(); i++) {
-    const OatFile* oat_file = oat_files_[i];
-    DCHECK(oat_file != nullptr);
-    if (oat_file->GetLocation() == oat_location) {
-      return oat_file;
-    }
-  }
-  return nullptr;
-}
-
 static void SanityCheckArtMethod(ArtMethod* m,
                                  mirror::Class* expected_class,
                                  gc::space::ImageSpace* space)
@@ -1169,16 +830,17 @@
   CHECK(space != nullptr);
   image_pointer_size_ = space->GetImageHeader().GetPointerSize();
   dex_cache_image_class_lookup_required_ = true;
-  OatFile& oat_file = GetImageOatFile(space);
-  CHECK_EQ(oat_file.GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
-  CHECK_EQ(oat_file.GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
-  const char* image_file_location = oat_file.GetOatHeader().
+  const OatFile* oat_file = runtime->GetOatFileManager().RegisterImageOatFile(space);
+  DCHECK(oat_file != nullptr);
+  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
+  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
+  const char* image_file_location = oat_file->GetOatHeader().
       GetStoreValueByKey(OatHeader::kImageLocationKey);
   CHECK(image_file_location == nullptr || *image_file_location == 0);
-  quick_resolution_trampoline_ = oat_file.GetOatHeader().GetQuickResolutionTrampoline();
-  quick_imt_conflict_trampoline_ = oat_file.GetOatHeader().GetQuickImtConflictTrampoline();
-  quick_generic_jni_trampoline_ = oat_file.GetOatHeader().GetQuickGenericJniTrampoline();
-  quick_to_interpreter_bridge_trampoline_ = oat_file.GetOatHeader().GetQuickToInterpreterBridge();
+  quick_resolution_trampoline_ = oat_file->GetOatHeader().GetQuickResolutionTrampoline();
+  quick_imt_conflict_trampoline_ = oat_file->GetOatHeader().GetQuickImtConflictTrampoline();
+  quick_generic_jni_trampoline_ = oat_file->GetOatHeader().GetQuickGenericJniTrampoline();
+  quick_to_interpreter_bridge_trampoline_ = oat_file->GetOatHeader().GetQuickToInterpreterBridge();
   StackHandleScope<2> hs(self);
   mirror::Object* dex_caches_object = space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
   Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
@@ -1200,20 +862,20 @@
                                                           java_lang_Object->GetObjectSize(),
                                                           VoidFunctor()));
 
-  CHECK_EQ(oat_file.GetOatHeader().GetDexFileCount(),
+  CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(),
            static_cast<uint32_t>(dex_caches->GetLength()));
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     StackHandleScope<1> hs2(self);
     Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
-    const OatFile::OatDexFile* oat_dex_file = oat_file.GetOatDexFile(dex_file_location.c_str(),
-                                                                     nullptr);
-    CHECK(oat_dex_file != nullptr) << oat_file.GetLocation() << " " << dex_file_location;
+    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
+                                                                      nullptr);
+    CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location;
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file.get() == nullptr) {
+    if (dex_file == nullptr) {
       LOG(FATAL) << "Failed to open dex file " << dex_file_location
-                 << " from within oat file " << oat_file.GetLocation()
+                 << " from within oat file " << oat_file->GetLocation()
                  << " error '" << error_msg << "'";
       UNREACHABLE();
     }
@@ -1361,9 +1023,9 @@
   class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   VisitClassRoots(visitor, flags);
   array_iftable_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  for (GcRoot<mirror::Class>& root : find_array_class_cache_) {
-    root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  }
+  // Instead of visiting the find_array_class_cache_ drop it so that it doesn't prevent class
+  // unloading if we are marking roots.
+  DropFindArrayClassCache();
 }
 
 class VisitClassLoaderClassesVisitor : public ClassLoaderVisitor {
@@ -1508,24 +1170,36 @@
   mirror::IntArray::ResetArrayClass();
   mirror::LongArray::ResetArrayClass();
   mirror::ShortArray::ResetArrayClass();
-  STLDeleteElements(&oat_files_);
   Thread* const self = Thread::Current();
-  JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
   for (const ClassLoaderData& data : class_loaders_) {
-    vm->DeleteWeakGlobalRef(self, data.weak_root);
-    delete data.allocator;
-    delete data.class_table;
+    DeleteClassLoader(self, data);
   }
   class_loaders_.clear();
 }
 
+void ClassLinker::DeleteClassLoader(Thread* self, const ClassLoaderData& data) {
+  Runtime* const runtime = Runtime::Current();
+  JavaVMExt* const vm = runtime->GetJavaVM();
+  vm->DeleteWeakGlobalRef(self, data.weak_root);
+  if (runtime->GetJit() != nullptr) {
+    jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache();
+    if (code_cache != nullptr) {
+      code_cache->RemoveMethodsIn(self, *data.allocator);
+    }
+  }
+  delete data.allocator;
+  delete data.class_table;
+}
+
 mirror::PointerArray* ClassLinker::AllocPointerArray(Thread* self, size_t length) {
   return down_cast<mirror::PointerArray*>(image_pointer_size_ == 8u ?
       static_cast<mirror::Array*>(mirror::LongArray::Alloc(self, length)) :
       static_cast<mirror::Array*>(mirror::IntArray::Alloc(self, length)));
 }
 
-mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, const DexFile& dex_file) {
+mirror::DexCache* ClassLinker::AllocDexCache(Thread* self,
+                                             const DexFile& dex_file,
+                                             LinearAlloc* linear_alloc) {
   StackHandleScope<6> hs(self);
   auto dex_cache(hs.NewHandle(down_cast<mirror::DexCache*>(
       GetClassRoot(kJavaLangDexCache)->AllocObject(self))));
@@ -1540,22 +1214,15 @@
   }
   DexCacheArraysLayout layout(image_pointer_size_, &dex_file);
   uint8_t* raw_arrays = nullptr;
-  if (dex_file.NumStringIds() != 0u || dex_file.NumTypeIds() != 0u ||
+  if (dex_file.GetOatDexFile() != nullptr &&
+      dex_file.GetOatDexFile()->GetDexCacheArrays() != nullptr) {
+    raw_arrays = dex_file.GetOatDexFile()->GetDexCacheArrays();
+  } else if (dex_file.NumStringIds() != 0u || dex_file.NumTypeIds() != 0u ||
       dex_file.NumMethodIds() != 0u || dex_file.NumFieldIds() != 0u) {
     // NOTE: We "leak" the raw_arrays because we never destroy the dex cache.
     DCHECK(image_pointer_size_ == 4u || image_pointer_size_ == 8u);
-    if (sizeof(void*) == 8u && image_pointer_size_ == 4u) {
-      // When cross-compiling for a 32-bit target on a 64-bit host, we need these arrays
-      // in the low 4GiB address space so that we can store pointers in 32-bit fields.
-      // This is conveniently provided by the linear allocator.
-      raw_arrays = reinterpret_cast<uint8_t*>(
-          Runtime::Current()->GetLinearAlloc()->Alloc(self, layout.Size()));  // Zero-initialized.
-    } else {
-      raw_arrays = reinterpret_cast<uint8_t*>(calloc(layout.Size(), 1u));  // Zero-initialized.
-      if (raw_arrays == nullptr) {
-        return nullptr;
-      }
-    }
+    // Zero-initialized.
+    raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
   }
   GcRoot<mirror::String>* strings = (dex_file.NumStringIds() == 0u) ? nullptr :
       reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset());
@@ -1768,13 +1435,18 @@
             break;
           }
           int32_t long_array_size = long_array->GetLength();
-          for (int32_t j = 0; j < long_array_size; ++j) {
+          // First element is the oat file.
+          for (int32_t j = kDexFileIndexStart; j < long_array_size; ++j) {
             const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(
                 long_array->GetWithoutChecks(j)));
             const DexFile::ClassDef* dex_class_def = cp_dex_file->FindClassDef(descriptor, hash);
             if (dex_class_def != nullptr) {
-              mirror::Class* klass = DefineClass(self, descriptor, hash, class_loader,
-                                                 *cp_dex_file, *dex_class_def);
+              mirror::Class* klass = DefineClass(self,
+                                                 descriptor,
+                                                 hash,
+                                                 class_loader,
+                                                 *cp_dex_file,
+                                                 *dex_class_def);
               if (klass == nullptr) {
                 CHECK(self->IsExceptionPending()) << descriptor;
                 self->ClearException();
@@ -1921,7 +1593,9 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  mirror::DexCache* dex_cache = RegisterDexFile(dex_file);
+  mirror::DexCache* dex_cache = RegisterDexFile(
+      dex_file,
+      GetOrCreateAllocatorForClassLoader(class_loader.Get()));
   if (dex_cache == nullptr) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -2167,13 +1841,6 @@
       return code;
     }
   }
-  jit::Jit* const jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    auto* code = jit->GetCodeCache()->GetCodeFor(method);
-    if (code != nullptr) {
-      return code;
-    }
-  }
   if (method->IsNative()) {
     // No code and native? Use generic trampoline.
     return GetQuickGenericJniStub();
@@ -2190,13 +1857,6 @@
   if (found) {
     return oat_method.GetQuickCode();
   }
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    auto* code = jit->GetCodeCache()->GetCodeFor(method);
-    if (code != nullptr) {
-      return code;
-    }
-  }
   return nullptr;
 }
 
@@ -2424,6 +2084,19 @@
   return allocator;
 }
 
+LinearAlloc* ClassLinker::GetOrCreateAllocatorForClassLoader(mirror::ClassLoader* class_loader) {
+  if (class_loader == nullptr) {
+    return Runtime::Current()->GetLinearAlloc();
+  }
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  LinearAlloc* allocator = class_loader->GetAllocator();
+  if (allocator == nullptr) {
+    allocator = Runtime::Current()->CreateLinearAlloc();
+    class_loader->SetAllocator(allocator);
+  }
+  return allocator;
+}
+
 void ClassLinker::LoadClassMembers(Thread* self,
                                    const DexFile& dex_file,
                                    const uint8_t* class_data,
@@ -2582,7 +2255,10 @@
 
 void ClassLinker::AppendToBootClassPath(Thread* self, const DexFile& dex_file) {
   StackHandleScope<1> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(AllocDexCache(
+      self,
+      dex_file,
+      Runtime::Current()->GetLinearAlloc())));
   CHECK(dex_cache.Get() != nullptr) << "Failed to allocate dex cache for "
                                     << dex_file.GetLocation();
   AppendToBootClassPath(dex_file, dex_cache);
@@ -2618,7 +2294,7 @@
   dex_cache->SetDexFile(&dex_file);
 }
 
-mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file) {
+mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
@@ -2631,7 +2307,7 @@
   // suspend all threads and another thread may need the dex_lock_ to
   // get to a suspend point.
   StackHandleScope<1> hs(self);
-  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(self, dex_file)));
+  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(self, dex_file, linear_alloc)));
   WriterMutexLock mu(self, dex_lock_);
   mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
   if (dex_cache != nullptr) {
@@ -3061,17 +2737,13 @@
   for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     const DexFile* dex_file = dex_cache->GetDexFile();
-    // Try binary searching the string/type index.
-    const DexFile::StringId* string_id = dex_file->FindStringId(descriptor);
-    if (string_id != nullptr) {
-      const DexFile::TypeId* type_id =
-          dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
-      if (type_id != nullptr) {
-        uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
-        mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
-        if (klass != nullptr) {
-          return klass;
-        }
+    // Try binary searching the type index by descriptor.
+    const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor);
+    if (type_id != nullptr) {
+      uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
+      mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
+      if (klass != nullptr) {
+        return klass;
       }
     }
   }
@@ -3428,6 +3100,9 @@
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
   const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
 
+  // Needs to be before we insert the class so that the allocator field is set.
+  LinearAlloc* const allocator = GetOrCreateAllocatorForClassLoader(klass->GetClassLoader());
+
   // Insert the class before loading the fields as the field roots
   // (ArtField::declaring_class_) are only visited from the class
   // table. There can't be any suspend points between inserting the
@@ -3435,9 +3110,6 @@
   mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), hash);
   CHECK(existing == nullptr);
 
-  // Needs to be after we insert the class so that the allocator field is set.
-  LinearAlloc* const allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
-
   // Instance fields are inherited, but we add a couple of static fields...
   const size_t num_fields = 2;
   LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, allocator, num_fields);
@@ -3676,6 +3348,18 @@
         return false;
       }
     }
+    // If we are a class we need to initialize all interfaces with default methods when we are
+    // initialized. Check all of them.
+    if (!klass->IsInterface()) {
+      size_t num_interfaces = klass->GetIfTableCount();
+      for (size_t i = 0; i < num_interfaces; i++) {
+        mirror::Class* iface = klass->GetIfTable()->GetInterface(i);
+        if (iface->HasDefaultMethods() &&
+            !CanWeInitializeClass(iface, can_init_statics, can_init_parents)) {
+          return false;
+        }
+      }
+    }
   }
   if (klass->IsInterface() || !klass->HasSuperClass()) {
     return true;
@@ -3802,6 +3486,38 @@
     }
   }
 
+  if (!klass->IsInterface()) {
+    // Initialize interfaces with default methods for the JLS.
+    size_t num_direct_interfaces = klass->NumDirectInterfaces();
+    // Only setup the (expensive) handle scope if we actually need to.
+    if (UNLIKELY(num_direct_interfaces > 0)) {
+      StackHandleScope<1> hs_iface(self);
+      MutableHandle<mirror::Class> handle_scope_iface(hs_iface.NewHandle<mirror::Class>(nullptr));
+      for (size_t i = 0; i < num_direct_interfaces; i++) {
+        handle_scope_iface.Assign(mirror::Class::GetDirectInterface(self, klass, i));
+        CHECK(handle_scope_iface.Get() != nullptr);
+        CHECK(handle_scope_iface->IsInterface());
+        if (handle_scope_iface->HasBeenRecursivelyInitialized()) {
+          // We have already done this for this interface. Skip it.
+          continue;
+        }
+        // We cannot just call initialize class directly because we need to ensure that ALL
+        // interfaces with default methods are initialized. Non-default interface initialization
+        // will not affect other non-default super-interfaces.
+        bool iface_initialized = InitializeDefaultInterfaceRecursive(self,
+                                                                     handle_scope_iface,
+                                                                     can_init_statics,
+                                                                     can_init_parents);
+        if (!iface_initialized) {
+          ObjectLock<mirror::Class> lock(self, klass);
+          // Initialization failed because one of our interfaces with default methods is erroneous.
+          mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
+          return false;
+        }
+      }
+    }
+  }
+
   const size_t num_static_fields = klass->NumStaticFields();
   if (num_static_fields > 0) {
     const DexFile::ClassDef* dex_class_def = klass->GetClassDef();
@@ -3891,6 +3607,52 @@
   return success;
 }
 
+// We recursively run down the tree of interfaces. We need to do this in the order they are declared
+// and perform the initialization only on those interfaces that contain default methods.
+bool ClassLinker::InitializeDefaultInterfaceRecursive(Thread* self,
+                                                      Handle<mirror::Class> iface,
+                                                      bool can_init_statics,
+                                                      bool can_init_parents) {
+  CHECK(iface->IsInterface());
+  size_t num_direct_ifaces = iface->NumDirectInterfaces();
+  // Only create the (expensive) handle scope if we need it.
+  if (UNLIKELY(num_direct_ifaces > 0)) {
+    StackHandleScope<1> hs(self);
+    MutableHandle<mirror::Class> handle_super_iface(hs.NewHandle<mirror::Class>(nullptr));
+    // First we initialize all of iface's super-interfaces recursively.
+    for (size_t i = 0; i < num_direct_ifaces; i++) {
+      mirror::Class* super_iface = mirror::Class::GetDirectInterface(self, iface, i);
+      if (!super_iface->HasBeenRecursivelyInitialized()) {
+        // Recursive step
+        handle_super_iface.Assign(super_iface);
+        if (!InitializeDefaultInterfaceRecursive(self,
+                                                 handle_super_iface,
+                                                 can_init_statics,
+                                                 can_init_parents)) {
+          return false;
+        }
+      }
+    }
+  }
+
+  bool result = true;
+  // Then we initialize 'iface' if it has default methods. We do not need to (and in fact must not)
+  // initialize if we don't have default methods.
+  if (iface->HasDefaultMethods()) {
+    result = EnsureInitialized(self, iface, can_init_statics, can_init_parents);
+  }
+
+  // Mark that this interface has undergone recursive default interface initialization so we know we
+  // can skip it on any later class initializations. We do this even if we are not a default
+  // interface since we can still avoid the traversal. This is purely a performance optimization.
+  if (result) {
+    // TODO This should be done in a better way
+    ObjectLock<mirror::Class> lock(self, iface);
+    iface->SetRecursivelyInitialized();
+  }
+  return result;
+}
+
 bool ClassLinker::WaitForInitializeClass(Handle<mirror::Class> klass,
                                          Thread* self,
                                          ObjectLock<mirror::Class>& lock)
@@ -4193,13 +3955,13 @@
     ClassLoaderData data;
     data.weak_root = self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader);
     data.class_table = class_table;
-    data.allocator = Runtime::Current()->CreateLinearAlloc();
-    class_loaders_.push_back(data);
     // Don't already have a class table, add it to the class loader.
     CHECK(class_loader->GetClassTable() == nullptr);
-    CHECK(class_loader->GetAllocator() == nullptr);
     class_loader->SetClassTable(data.class_table);
-    class_loader->SetAllocator(data.allocator);
+    // Should have been set when we registered the dex file.
+    data.allocator = class_loader->GetAllocator();
+    CHECK(data.allocator != nullptr);
+    class_loaders_.push_back(data);
   }
   return class_table;
 }
@@ -4623,20 +4385,16 @@
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
                               ArtMethod** out_imt) {
   self->AllowThreadSuspension();
-  if (klass->IsInterface()) {
-    // No vtable.
-    size_t count = klass->NumVirtualMethods();
-    if (!IsUint<16>(count)) {
-      ThrowClassFormatError(klass.Get(), "Too many methods on interface: %zd", count);
-      return false;
-    }
-    for (size_t i = 0; i < count; ++i) {
-      klass->GetVirtualMethodDuringLinking(i, image_pointer_size_)->SetMethodIndex(i);
-    }
-  } else if (!LinkVirtualMethods(self, klass)) {  // Link virtual methods first.
-    return false;
-  }
-  return LinkInterfaceMethods(self, klass, interfaces, out_imt);  // Link interface method last.
+  // A map from vtable indexes to the method they need to be updated to point to. Used because we
+  // need to have default methods be in the virtuals array of each class but we don't set that up
+  // until LinkInterfaceMethods.
+  std::unordered_map<size_t, ArtMethod*> default_translations;
+  // Link virtual methods then interface methods.
+  // We set up the interface lookup table first because we need it to determine if we need to update
+  // any vtable entries with new default method implementations.
+  return SetupInterfaceLookupTable(self, klass, interfaces)
+          && LinkVirtualMethods(self, klass, /*out*/ &default_translations)
+          && LinkInterfaceMethods(self, klass, default_translations, out_imt);
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -4760,9 +4518,36 @@
 const uint32_t LinkVirtualHashTable::invalid_index_ = std::numeric_limits<uint32_t>::max();
 const uint32_t LinkVirtualHashTable::removed_index_ = std::numeric_limits<uint32_t>::max() - 1;
 
-bool ClassLinker::LinkVirtualMethods(Thread* self, Handle<mirror::Class> klass) {
+bool ClassLinker::LinkVirtualMethods(
+    Thread* self,
+    Handle<mirror::Class> klass,
+    /*out*/std::unordered_map<size_t, ArtMethod*>* default_translations) {
   const size_t num_virtual_methods = klass->NumVirtualMethods();
-  if (klass->HasSuperClass()) {
+  if (klass->IsInterface()) {
+    // No vtable.
+    if (!IsUint<16>(num_virtual_methods)) {
+      ThrowClassFormatError(klass.Get(), "Too many methods on interface: %zu", num_virtual_methods);
+      return false;
+    }
+    bool has_defaults = false;
+    // TODO May need to replace this with real VTable for invoke_super
+    // Assign each method an IMT index and set the default flag.
+    for (size_t i = 0; i < num_virtual_methods; ++i) {
+      ArtMethod* m = klass->GetVirtualMethodDuringLinking(i, image_pointer_size_);
+      m->SetMethodIndex(i);
+      if (!m->IsAbstract()) {
+        m->SetAccessFlags(m->GetAccessFlags() | kAccDefault);
+        has_defaults = true;
+      }
+    }
+    // Mark that we have default methods so that we won't need to scan the virtual_methods_ array
+    // during initialization. This is a performance optimization. We could simply traverse the
+    // virtual_methods_ array again during initialization.
+    if (has_defaults) {
+      klass->SetHasDefaultMethods();
+    }
+    return true;
+  } else if (klass->HasSuperClass()) {
     const size_t super_vtable_length = klass->GetSuperClass()->GetVTableLength();
     const size_t max_count = num_virtual_methods + super_vtable_length;
     StackHandleScope<2> hs(self);
@@ -4778,14 +4563,22 @@
         vtable->SetElementPtrSize(
             i, super_class->GetEmbeddedVTableEntry(i, image_pointer_size_), image_pointer_size_);
       }
-      if (num_virtual_methods == 0) {
+      // We might need to change vtable if we have new virtual methods or new interfaces (since that
+      // might give us new default methods). If no new interfaces then we can skip the rest since
+      // the class cannot override any of the super-class's methods. This is required for
+      // correctness since without it we might not update overridden default method vtable entries
+      // correctly.
+      if (num_virtual_methods == 0 && super_class->GetIfTableCount() == klass->GetIfTableCount()) {
         klass->SetVTable(vtable.Get());
         return true;
       }
     } else {
+      DCHECK(super_class->IsAbstract() && !super_class->IsArrayClass());
       auto* super_vtable = super_class->GetVTable();
       CHECK(super_vtable != nullptr) << PrettyClass(super_class.Get());
-      if (num_virtual_methods == 0) {
+      // We might need to change vtable if we have new virtual methods or new interfaces (since that
+      // might give us new default methods). See comment above.
+      if (num_virtual_methods == 0 && super_class->GetIfTableCount() == klass->GetIfTableCount()) {
         klass->SetVTable(super_vtable);
         return true;
       }
@@ -4806,7 +4599,9 @@
     // the need for the initial vtable which we later shrink back down).
     // 3. Add non overridden methods to the end of the vtable.
     static constexpr size_t kMaxStackHash = 250;
-    const size_t hash_table_size = num_virtual_methods * 3;
+    // + 1 so that even if we only have new default methods we will still be able to use this hash
+    // table (i.e. it will never have 0 size).
+    const size_t hash_table_size = num_virtual_methods * 3 + 1;
     uint32_t* hash_table_ptr;
     std::unique_ptr<uint32_t[]> hash_heap_storage;
     if (hash_table_size <= kMaxStackHash) {
@@ -4823,10 +4618,10 @@
           i, image_pointer_size_)->GetDeclaringClass() != nullptr);
       hash_table.Add(i);
     }
-    // Loop through each super vtable method and see if they are overriden by a method we added to
+    // Loop through each super vtable method and see if they are overridden by a method we added to
     // the hash table.
     for (size_t j = 0; j < super_vtable_length; ++j) {
-      // Search the hash table to see if we are overidden by any method.
+      // Search the hash table to see if we are overridden by any method.
       ArtMethod* super_method = vtable->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
       MethodNameAndSignatureComparator super_method_name_comparator(
           super_method->GetInterfaceMethodIfProxy(image_pointer_size_));
@@ -4849,10 +4644,51 @@
                        << " would have incorrectly overridden the package-private method in "
                        << PrettyDescriptor(super_method->GetDeclaringClassDescriptor());
         }
+      } else if (super_method->IsDefault()) {
+        // We didn't directly override this method but we might through default methods...
+        // Check for default method update.
+        ArtMethod* default_method = nullptr;
+        std::string icce_message;
+        if (!FindDefaultMethodImplementation(self,
+                                             super_method,
+                                             klass,
+                                             /*out*/&default_method,
+                                             /*out*/&icce_message)) {
+          // An error occurred while finding default methods.
+          // TODO This should actually be thrown when we attempt to invoke this method.
+          ThrowIncompatibleClassChangeError(klass.Get(), "%s", icce_message.c_str());
+          return false;
+        }
+        // This should always work because we inherit superclass interfaces. We should either get
+        //  1) An IncompatibleClassChangeError because of conflicting default method
+        //     implementations.
+        //  2) The same default method implementation as the superclass.
+        //  3) A default method that overrides the superclass's.
+        // Therefore this check should never fail.
+        CHECK(default_method != nullptr);
+        if (UNLIKELY(default_method->GetDeclaringClass() != super_method->GetDeclaringClass())) {
+          // TODO Refactor this add default methods to virtuals here and not in
+          //      LinkInterfaceMethods maybe.
+          //      The problem is default methods might override previously present default-method or
+          //      miranda-method vtable entries from the superclass. Unfortunately we need these to
+          //      be entries in this class's virtuals. We do not give these entries there until
+          //      LinkInterfaceMethods so we pass this map around to let it know which vtable
+          //      entries need to be updated.
+          // Make a note that vtable entry j must be updated, store what it needs to be updated to.
+          // We will allocate a virtual method slot in LinkInterfaceMethods and fix it up then.
+          default_translations->insert({j, default_method});
+          VLOG(class_linker) << "Method " << PrettyMethod(super_method) << " overridden by default "
+                             << PrettyMethod(default_method) << " in " << PrettyClass(klass.Get());
+        } else {
+          // They are the same method/no override
+          // Cannot do direct comparison because we had to copy the ArtMethod object into the
+          // superclass's vtable.
+          continue;
+        }
       }
     }
-    // Add the non overridden methods at the end.
     size_t actual_count = super_vtable_length;
+    // Add the non-overridden methods at the end.
     for (size_t i = 0; i < num_virtual_methods; ++i) {
       ArtMethod* local_method = klass->GetVirtualMethodDuringLinking(i, image_pointer_size_);
       size_t method_idx = local_method->GetMethodIndexDuringLinking();
@@ -4900,20 +4736,223 @@
   return true;
 }
 
-bool ClassLinker::LinkInterfaceMethods(Thread* self,
-                                       Handle<mirror::Class> klass,
-                                       Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                                       ArtMethod** out_imt) {
-  StackHandleScope<3> hs(self);
-  Runtime* const runtime = Runtime::Current();
-  const bool has_superclass = klass->HasSuperClass();
-  const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
+// Find the default method implementation for 'interface_method' in 'klass'. Stores it into
+// out_default_method and returns true on success. If no default method was found stores nullptr
+// into out_default_method and returns true. If an error occurs (such as a default_method conflict)
+// it will fill the icce_message with an appropriate message for an IncompatibleClassChangeError,
+// which should then be thrown by the caller.
+bool ClassLinker::FindDefaultMethodImplementation(Thread* self,
+                                                  ArtMethod* target_method,
+                                                  Handle<mirror::Class> klass,
+                                                  /*out*/ArtMethod** out_default_method,
+                                                  /*out*/std::string* icce_message) const {
+  DCHECK(self != nullptr);
+  DCHECK(target_method != nullptr);
+  DCHECK(out_default_method != nullptr);
+  DCHECK(icce_message != nullptr);
+
+  *out_default_method = nullptr;
+  mirror::Class* chosen_iface = nullptr;
+
+  // We organize the interface table so that, for interface I any subinterfaces J follow it in the
+  // table. This lets us walk the table backwards when searching for default methods.  The first one
+  // we encounter is the best candidate since it is the most specific. Once we have found it we keep
+  // track of it and then continue checking all other interfaces, since we need to throw an error if
+  // we encounter conflicting default method implementations (one is not a subtype of the other).
+  //
+  // The order of unrelated interfaces does not matter and is not defined.
+  size_t iftable_count = klass->GetIfTableCount();
+  if (iftable_count == 0) {
+    // No interfaces. We have already reset out to null so just return true.
+    return true;
+  }
+
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::IfTable> iftable(hs.NewHandle(klass->GetIfTable()));
+  MethodNameAndSignatureComparator target_name_comparator(
+      target_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+  // Iterates over the klass's iftable in reverse
+  // We have a break at the end because size_t is unsigned.
+  for (size_t k = iftable_count - 1; /* break if k == 0 at end */; --k) {
+    DCHECK_LT(k, iftable->Count());
+    mirror::Class* iface = iftable->GetInterface(k);
+    size_t num_instance_methods = iface->NumVirtualMethods();
+    // Iterate through every method on this interface. The order does not matter so we go forwards.
+    for (size_t m = 0; m < num_instance_methods; m++) {
+      ArtMethod* current_method = iface->GetVirtualMethodUnchecked(m, image_pointer_size_);
+      // Skip abstract methods and methods with different names.
+      if (current_method->IsAbstract() ||
+          !target_name_comparator.HasSameNameAndSignature(
+              current_method->GetInterfaceMethodIfProxy(image_pointer_size_))) {
+        continue;
+      }
+      // The verifier should have caught the non-public method.
+      DCHECK(current_method->IsPublic()) << "Interface method is not public!";
+      if (UNLIKELY(chosen_iface != nullptr)) {
+        // We have multiple default impls of the same method. We need to check they do not
+        // conflict and throw an error if they do. Conflicting means that the current iface is not
+        // masked by the chosen interface.
+        if (!iface->IsAssignableFrom(chosen_iface)) {
+          *icce_message = StringPrintf("Conflicting default method implementations: '%s' and '%s'",
+                                       PrettyMethod(current_method).c_str(),
+                                       PrettyMethod(*out_default_method).c_str());
+          return false;
+        } else {
+          break;  // Continue checking at the next interface.
+        }
+      } else {
+        *out_default_method = current_method;
+        chosen_iface = iface;
+        // We should now finish traversing the graph to find if we have default methods that
+        // conflict.
+        break;
+      }
+    }
+    if (k == 0) {
+      break;
+    }
+  }
+  return true;
+}
+
+// Sets imt_ref appropriately for LinkInterfaceMethods.
+// If there is no method in the imt location of imt_ref it will store the given method there.
+// Otherwise it will set the conflict method which will figure out which method to use during
+// runtime.
+static void SetIMTRef(ArtMethod* unimplemented_method,
+                      ArtMethod* conflict_method,
+                      size_t image_pointer_size,
+                      ArtMethod* current_method,
+                      /*out*/ArtMethod** imt_ref)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Place method in imt if entry is empty, place conflict otherwise.
+  if (*imt_ref == unimplemented_method) {
+    *imt_ref = current_method;
+  } else if (*imt_ref != conflict_method) {
+    // If we are not a conflict and we have the same signature and name as the imt
+    // entry, it must be that we overwrote a superclass vtable entry.
+    MethodNameAndSignatureComparator imt_comparator(
+        (*imt_ref)->GetInterfaceMethodIfProxy(image_pointer_size));
+    if (imt_comparator.HasSameNameAndSignature(
+          current_method->GetInterfaceMethodIfProxy(image_pointer_size))) {
+      *imt_ref = current_method;
+    } else {
+      *imt_ref = conflict_method;
+    }
+  }
+}
+
+// Simple helper function that checks that no subtypes of 'val' are contained within the 'classes'
+// set.
+static bool NotSubinterfaceOfAny(const std::unordered_set<mirror::Class*>& classes,
+                                 mirror::Class* val)
+    REQUIRES(Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(val != nullptr);
+  for (auto c : classes) {
+    if (val->IsAssignableFrom(&*c)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Fills in and flattens the interface inheritance hierarchy.
+//
+// By the end of this function all interfaces in the transitive closure of to_process are added to
+// the iftable and every interface precedes all of its sub-interfaces in this list.
+//
+// all I, J: Interface | I <: J implies J precedes I
+//
+// (note A <: B means that A is a subtype of B)
+//
+// This returns the total number of items in the iftable. The iftable might be resized down after
+// this call.
+//
+// We order this backwards so that we do not need to reorder superclass interfaces when new
+// interfaces are added in subclass's interface tables.
+//
+// Upon entry into this function iftable is a copy of the superclass's iftable with the first
+// super_ifcount entries filled in with the transitive closure of the interfaces of the superclass.
+// The other entries are uninitialized.  We will fill in the remaining entries in this function. The
+// iftable must be large enough to hold all interfaces without changing its size.
+static size_t FillIfTable(mirror::IfTable* iftable,
+                          size_t super_ifcount,
+                          std::vector<mirror::Class*> to_process)
+    REQUIRES(Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // This is the set of all class's already in the iftable. Used to make checking if a class has
+  // already been added quicker.
+  std::unordered_set<mirror::Class*> classes_in_iftable;
+  // The first super_ifcount elements are from the superclass. We note that they are already added.
+  for (size_t i = 0; i < super_ifcount; i++) {
+    mirror::Class* iface = iftable->GetInterface(i);
+    DCHECK(NotSubinterfaceOfAny(classes_in_iftable, iface)) << "Bad ordering.";
+    classes_in_iftable.insert(iface);
+  }
+  size_t filled_ifcount = super_ifcount;
+  for (mirror::Class* interface : to_process) {
+    // Let us call the first filled_ifcount elements of iftable the current-iface-list.
+    // At this point in the loop current-iface-list has the invariant that:
+    //    for every pair of interfaces I,J within it:
+    //      if index_of(I) < index_of(J) then I is not a subtype of J
+
+    // If we have already seen this element then all of its super-interfaces must already be in the
+    // current-iface-list so we can skip adding it.
+    if (!ContainsElement(classes_in_iftable, interface)) {
+      // We haven't seen this interface so add all of its super-interfaces onto the
+      // current-iface-list, skipping those already on it.
+      int32_t ifcount = interface->GetIfTableCount();
+      for (int32_t j = 0; j < ifcount; j++) {
+        mirror::Class* super_interface = interface->GetIfTable()->GetInterface(j);
+        if (!ContainsElement(classes_in_iftable, super_interface)) {
+          DCHECK(NotSubinterfaceOfAny(classes_in_iftable, super_interface)) << "Bad ordering.";
+          classes_in_iftable.insert(super_interface);
+          iftable->SetInterface(filled_ifcount, super_interface);
+          filled_ifcount++;
+        }
+      }
+      DCHECK(NotSubinterfaceOfAny(classes_in_iftable, interface)) << "Bad ordering";
+      // Place this interface onto the current-iface-list after all of its super-interfaces.
+      classes_in_iftable.insert(interface);
+      iftable->SetInterface(filled_ifcount, interface);
+      filled_ifcount++;
+    } else if (kIsDebugBuild) {
+      // Check all super-interfaces are already in the list.
+      int32_t ifcount = interface->GetIfTableCount();
+      for (int32_t j = 0; j < ifcount; j++) {
+        mirror::Class* super_interface = interface->GetIfTable()->GetInterface(j);
+        DCHECK(ContainsElement(classes_in_iftable, super_interface))
+            << "Iftable does not contain " << PrettyClass(super_interface)
+            << ", a superinterface of " << PrettyClass(interface);
+      }
+    }
+  }
+  if (kIsDebugBuild) {
+    // Check that the iftable is ordered correctly.
+    for (size_t i = 0; i < filled_ifcount; i++) {
+      mirror::Class* if_a = iftable->GetInterface(i);
+      for (size_t j = i + 1; j < filled_ifcount; j++) {
+        mirror::Class* if_b = iftable->GetInterface(j);
+        // !(if_a <: if_b)
+        CHECK(!if_b->IsAssignableFrom(if_a))
+            << "Bad interface order: " << PrettyClass(if_a) << " (index " << i << ") extends "
+            << PrettyClass(if_b) << " (index " << j << ") and so should be after it in the "
+            << "interface list.";
+      }
+    }
+  }
+  return filled_ifcount;
+}
+
+bool ClassLinker::SetupInterfaceLookupTable(Thread* self, Handle<mirror::Class> klass,
+                                            Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
+  StackHandleScope<1> hs(self);
+  const size_t super_ifcount =
+      klass->HasSuperClass() ? klass->GetSuperClass()->GetIfTableCount() : 0U;
   const bool have_interfaces = interfaces.Get() != nullptr;
-  const size_t num_interfaces = have_interfaces
-      ? interfaces->GetLength()
-      : klass->NumDirectInterfaces();
-  const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
-  const size_t method_size = ArtMethod::Size(image_pointer_size_);
+  const size_t num_interfaces =
+      have_interfaces ? interfaces->GetLength() : klass->NumDirectInterfaces();
   if (num_interfaces == 0) {
     if (super_ifcount == 0) {
       // Class implements no interfaces.
@@ -4937,6 +4976,7 @@
     }
   }
   size_t ifcount = super_ifcount + num_interfaces;
+  // Check that every class being implemented is an interface.
   for (size_t i = 0; i < num_interfaces; i++) {
     mirror::Class* interface = have_interfaces
         ? interfaces->GetWithoutChecks(i)
@@ -4952,11 +4992,13 @@
     }
     ifcount += interface->GetIfTableCount();
   }
+  // Create the interface function table.
   MutableHandle<mirror::IfTable> iftable(hs.NewHandle(AllocIfTable(self, ifcount)));
   if (UNLIKELY(iftable.Get() == nullptr)) {
     self->AssertPendingOOMException();
     return false;
   }
+  // Fill in table with superclass's iftable.
   if (super_ifcount != 0) {
     mirror::IfTable* super_iftable = klass->GetSuperClass()->GetIfTable();
     for (size_t i = 0; i < super_ifcount; i++) {
@@ -4964,56 +5006,59 @@
       iftable->SetInterface(i, super_interface);
     }
   }
+
+  // Note that AllowThreadSuspension is to thread suspension as pthread_testcancel is to pthread
+  // cancellation. That is it will suspend if one has a pending suspend request but otherwise
+  // doesn't really do anything.
   self->AllowThreadSuspension();
-  // Flatten the interface inheritance hierarchy.
-  size_t idx = super_ifcount;
-  for (size_t i = 0; i < num_interfaces; i++) {
-    mirror::Class* interface = have_interfaces ? interfaces->Get(i) :
-        mirror::Class::GetDirectInterface(self, klass, i);
-    // Check if interface is already in iftable
-    bool duplicate = false;
-    for (size_t j = 0; j < idx; j++) {
-      mirror::Class* existing_interface = iftable->GetInterface(j);
-      if (existing_interface == interface) {
-        duplicate = true;
-        break;
-      }
+
+  size_t new_ifcount;
+  {
+    ScopedAssertNoThreadSuspension nts(self, "Copying mirror::Class*'s for FillIfTable");
+    std::vector<mirror::Class*> to_add;
+    for (size_t i = 0; i < num_interfaces; i++) {
+      mirror::Class* interface = have_interfaces ? interfaces->Get(i) :
+          mirror::Class::GetDirectInterface(self, klass, i);
+      to_add.push_back(interface);
     }
-    if (!duplicate) {
-      // Add this non-duplicate interface.
-      iftable->SetInterface(idx++, interface);
-      // Add this interface's non-duplicate super-interfaces.
-      for (int32_t j = 0; j < interface->GetIfTableCount(); j++) {
-        mirror::Class* super_interface = interface->GetIfTable()->GetInterface(j);
-        bool super_duplicate = false;
-        for (size_t k = 0; k < idx; k++) {
-          mirror::Class* existing_interface = iftable->GetInterface(k);
-          if (existing_interface == super_interface) {
-            super_duplicate = true;
-            break;
-          }
-        }
-        if (!super_duplicate) {
-          iftable->SetInterface(idx++, super_interface);
-        }
-      }
-    }
+
+    new_ifcount = FillIfTable(iftable.Get(), super_ifcount, std::move(to_add));
   }
+
   self->AllowThreadSuspension();
+
   // Shrink iftable in case duplicates were found
-  if (idx < ifcount) {
+  if (new_ifcount < ifcount) {
     DCHECK_NE(num_interfaces, 0U);
     iftable.Assign(down_cast<mirror::IfTable*>(
-        iftable->CopyOf(self, idx * mirror::IfTable::kMax)));
+        iftable->CopyOf(self, new_ifcount * mirror::IfTable::kMax)));
     if (UNLIKELY(iftable.Get() == nullptr)) {
       self->AssertPendingOOMException();
       return false;
     }
-    ifcount = idx;
+    ifcount = new_ifcount;
   } else {
-    DCHECK_EQ(idx, ifcount);
+    DCHECK_EQ(new_ifcount, ifcount);
   }
   klass->SetIfTable(iftable.Get());
+  return true;
+}
+
+bool ClassLinker::LinkInterfaceMethods(
+    Thread* self,
+    Handle<mirror::Class> klass,
+    const std::unordered_map<size_t, ArtMethod*>& default_translations,
+    ArtMethod** out_imt) {
+  StackHandleScope<3> hs(self);
+  Runtime* const runtime = Runtime::Current();
+  const bool has_superclass = klass->HasSuperClass();
+  const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
+  const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
+  const size_t method_size = ArtMethod::Size(image_pointer_size_);
+  const size_t ifcount = klass->GetIfTableCount();
+
+  MutableHandle<mirror::IfTable> iftable(hs.NewHandle(klass->GetIfTable()));
+
   // If we're an interface, we don't need the vtable pointers, so we're done.
   if (klass->IsInterface()) {
     return true;
@@ -5026,6 +5071,7 @@
   ArenaStack stack(runtime->GetLinearAlloc()->GetArenaPool());
   ScopedArenaAllocator allocator(&stack);
   ScopedArenaVector<ArtMethod*> miranda_methods(allocator.Adapter());
+  ScopedArenaVector<ArtMethod*> default_methods(allocator.Adapter());
 
   MutableHandle<mirror::PointerArray> vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
@@ -5055,7 +5101,9 @@
         for (size_t j = 0; j < num_virtuals; ++j) {
           auto method = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
           DCHECK(method != nullptr) << PrettyClass(super_class);
-          if (method->IsMiranda()) {
+          // Miranda methods cannot be used to implement an interface method and defaults should be
+          // skipped in case we override it.
+          if (method->IsDefault() || method->IsMiranda()) {
             continue;
           }
           ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
@@ -5076,6 +5124,8 @@
     size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
     if (num_methods > 0) {
       const bool is_super = i < super_ifcount;
+      // This is an interface implemented by a super-class. Therefore we can just copy the method
+      // array from the superclass.
       const bool super_interface = is_super && extend_super_iftable;
       mirror::PointerArray* method_array;
       if (super_interface) {
@@ -5119,16 +5169,13 @@
         input_vtable_array = vtable;
         input_array_length = input_vtable_array->GetLength();
       }
-      if (input_array_length == 0) {
-        // If the added virtual methods is empty, do nothing.
-        DCHECK(super_interface);
-        continue;
-      }
+      // For each method in interface
       for (size_t j = 0; j < num_methods; ++j) {
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        int32_t k;
+        uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+        ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
         // subclass over the superclass, which just requires walking
@@ -5137,7 +5184,12 @@
         // it -- otherwise it would use the same vtable slot.  In .dex files
         // those don't end up in the virtual method table, so it shouldn't
         // matter which direction we go.  We walk it backward anyway.)
-        for (k = input_array_length - 1; k >= 0; --k) {
+        //
+        // To find defaults we need to do the same but also go over interfaces.
+        bool found_impl = false;
+        ArtMethod* default_impl = nullptr;
+        bool found_default_impl = false;
+        for (int32_t k = input_array_length - 1; k >= 0; --k) {
           ArtMethod* vtable_method = input_virtual_methods != nullptr ?
               &input_virtual_methods->At(k, method_size, method_alignment) :
               input_vtable_array->GetElementPtrSize<ArtMethod*>(k, image_pointer_size_);
@@ -5153,25 +5205,69 @@
                   "Method '%s' implementing interface method '%s' is not public",
                   PrettyMethod(vtable_method).c_str(), PrettyMethod(interface_method).c_str());
               return false;
+            } else if (vtable_method->IsDefault()) {
+              // We might have a newer, better, default method for this, so we just skip it. If we
+              // are still using this we will select it again when scanning for default methods. To
+              // obviate the need to copy the method again we will make a note that we already found
+              // a default here.
+              // TODO This should be much cleaner.
+              found_default_impl = true;
+              default_impl = vtable_method;
+              break;
+            } else {
+              found_impl = true;
             }
             method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
             // Place method in imt if entry is empty, place conflict otherwise.
-            uint32_t imt_index = interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-            auto** imt_ref = &out_imt[imt_index];
-            if (*imt_ref == unimplemented_method) {
-              *imt_ref = vtable_method;
-            } else if (*imt_ref != conflict_method) {
-              // If we are not a conflict and we have the same signature and name as the imt entry,
-              // it must be that we overwrote a superclass vtable entry.
-              MethodNameAndSignatureComparator imt_comparator(
-                  (*imt_ref)->GetInterfaceMethodIfProxy(image_pointer_size_));
-              *imt_ref = imt_comparator.HasSameNameAndSignature(vtable_method_for_name_comparison) ?
-                  vtable_method : conflict_method;
-            }
+            SetIMTRef(unimplemented_method,
+                      conflict_method,
+                      image_pointer_size_,
+                      vtable_method,
+                      /*out*/imt_ptr);
             break;
           }
         }
-        if (k < 0 && !super_interface) {
+        // We should only search for default implementations when the class does not implement the
+        // method directly and either (1) the interface is newly implemented on this class and not
+        // on any of its superclasses, (2) the superclass's implementation is a default method, or
+        // (3) the superclass does not have an implementation.
+        if (!found_impl && (!super_interface ||
+                            method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_)
+                                ->IsOverridableByDefaultMethod())) {
+          ArtMethod* current_method = nullptr;
+          std::string icce_message;
+          if (!FindDefaultMethodImplementation(self,
+                                               interface_method,
+                                               klass,
+                                               /*out*/&current_method,
+                                               /*out*/&icce_message)) {
+            // There was a conflict with default method implementations.
+            self->EndAssertNoThreadSuspension(old_cause);
+            // TODO This should actually be thrown when we attempt to invoke this method.
+            ThrowIncompatibleClassChangeError(klass.Get(), "%s", icce_message.c_str());
+            return false;
+          } else if (current_method != nullptr) {
+            if (found_default_impl &&
+                current_method->GetDeclaringClass() == default_impl->GetDeclaringClass()) {
+              // We found a default method but it was the same one we already have from our
+              // superclass. Don't bother adding it to our vtable again.
+              current_method = default_impl;
+            } else {
+              // We found a default method implementation and there were no conflicts.
+              // Save the default method. We need to add it to the vtable.
+              default_methods.push_back(current_method);
+            }
+            method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
+            SetIMTRef(unimplemented_method,
+                      conflict_method,
+                      image_pointer_size_,
+                      current_method,
+                      /*out*/imt_ptr);
+            found_impl = true;
+          }
+        }
+        if (!found_impl && !super_interface) {
+          // It is defined in this class or any of its subclasses.
           ArtMethod* miranda_method = nullptr;
           for (auto& mir_method : miranda_methods) {
             if (interface_name_comparator.HasSameNameAndSignature(mir_method)) {
@@ -5183,7 +5279,7 @@
             miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
             CHECK(miranda_method != nullptr);
             // Point the interface table at a phantom slot.
-            new(miranda_method) ArtMethod(*interface_method, image_pointer_size_);
+            new(miranda_method) ArtMethod(interface_method, image_pointer_size_);
             miranda_methods.push_back(miranda_method);
           }
           method_array->SetElementPtrSize(j, miranda_method, image_pointer_size_);
@@ -5191,9 +5287,10 @@
       }
     }
   }
-  if (!miranda_methods.empty()) {
+  if (!miranda_methods.empty() || !default_methods.empty()) {
     const size_t old_method_count = klass->NumVirtualMethods();
-    const size_t new_method_count = old_method_count + miranda_methods.size();
+    const size_t new_method_count =
+        old_method_count + miranda_methods.size() + default_methods.size();
     // Attempt to realloc to save RAM if possible.
     LengthPrefixedArray<ArtMethod>* old_virtuals = klass->GetVirtualMethodsPtr();
     // The Realloced virtual methods aren't visiblef from the class roots, so there is no issue
@@ -5218,7 +5315,7 @@
     ScopedArenaUnorderedMap<ArtMethod*, ArtMethod*> move_table(allocator.Adapter());
     if (virtuals != old_virtuals) {
       // Maps from heap allocated miranda method to linear alloc miranda method.
-      StrideIterator<ArtMethod> out = virtuals->Begin(method_size, method_alignment);
+      StrideIterator<ArtMethod> out = virtuals->begin(method_size, method_alignment);
       // Copy over the old methods + miranda methods.
       for (auto& m : klass->GetVirtualMethods(image_pointer_size_)) {
         move_table.emplace(&m, &*out);
@@ -5228,30 +5325,54 @@
         ++out;
       }
     }
-    StrideIterator<ArtMethod> out(virtuals->Begin(method_size, method_alignment) + old_method_count);
+    StrideIterator<ArtMethod> out(virtuals->begin(method_size, method_alignment)
+                                      + old_method_count);
     // Copy over miranda methods before copying vtable since CopyOf may cause thread suspension and
     // we want the roots of the miranda methods to get visited.
     for (ArtMethod* mir_method : miranda_methods) {
-      out->CopyFrom(mir_method, image_pointer_size_);
-      out->SetAccessFlags(out->GetAccessFlags() | kAccMiranda);
-      move_table.emplace(mir_method, &*out);
+      ArtMethod& new_method = *out;
+      new_method.CopyFrom(mir_method, image_pointer_size_);
+      new_method.SetAccessFlags(new_method.GetAccessFlags() | kAccMiranda);
+      DCHECK_NE(new_method.GetAccessFlags() & kAccAbstract, 0u)
+          << "Miranda method should be abstract!";
+      move_table.emplace(mir_method, &new_method);
       ++out;
     }
-    virtuals->SetLength(new_method_count);
+    // We need to copy the default methods into our own virtual method table since the runtime
+    // requires that every method on a class's vtable be in that respective class's virtual method
+    // table.
+    // NOTE This means that two classes might have the same implementation of a method from the same
+    // interface but will have different ArtMethod*s for them. This also means we cannot compare a
+    // default method found on a class with one found on the declaring interface directly and must
+    // look at the declaring class to determine if they are the same.
+    for (ArtMethod* def_method : default_methods) {
+      ArtMethod& new_method = *out;
+      new_method.CopyFrom(def_method, image_pointer_size_);
+      new_method.SetAccessFlags(new_method.GetAccessFlags() | kAccDefault);
+      // Clear the preverified flag if it is present. Since this class hasn't been verified yet it
+      // shouldn't have methods that are preverified.
+      // TODO This is rather arbitrary. We should maybe support classes where only some of its
+      // methods are preverified.
+      new_method.SetAccessFlags(new_method.GetAccessFlags() & ~kAccPreverified);
+      move_table.emplace(def_method, &new_method);
+      ++out;
+    }
+    virtuals->SetSize(new_method_count);
     UpdateClassVirtualMethods(klass.Get(), virtuals);
     // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
 
     const size_t old_vtable_count = vtable->GetLength();
-    const size_t new_vtable_count = old_vtable_count + miranda_methods.size();
+    const size_t new_vtable_count =
+        old_vtable_count + miranda_methods.size() + default_methods.size();
     miranda_methods.clear();
     vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
     if (UNLIKELY(vtable.Get() == nullptr)) {
       self->AssertPendingOOMException();
       return false;
     }
-    out = virtuals->Begin(method_size, method_alignment) + old_method_count;
+    out = virtuals->begin(method_size, method_alignment) + old_method_count;
     size_t vtable_pos = old_vtable_count;
     for (size_t i = old_method_count; i < new_method_count; ++i) {
       // Leave the declaring class alone as type indices are relative to it
@@ -5261,15 +5382,29 @@
       ++vtable_pos;
     }
     CHECK_EQ(vtable_pos, new_vtable_count);
-    // Update old vtable methods.
+    // Update old vtable methods. We use the default_translations map to figure out what each vtable
+    // entry should be updated to, if they need to be at all.
     for (size_t i = 0; i < old_vtable_count; ++i) {
-      auto* m = vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_);
-      DCHECK(m != nullptr) << PrettyClass(klass.Get());
-      auto it = move_table.find(m);
+      ArtMethod* translated_method = vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_);
+      // Try and find what we need to change this method to.
+      auto translation_it = default_translations.find(i);
+      bool found_translation = false;
+      if (translation_it != default_translations.end()) {
+        size_t vtable_index;
+        std::tie(vtable_index, translated_method) = *translation_it;
+        DCHECK_EQ(vtable_index, i);
+        found_translation = true;
+      }
+      DCHECK(translated_method != nullptr);
+      auto it = move_table.find(translated_method);
       if (it != move_table.end()) {
-        auto* new_m = it->second;
-        DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
-        vtable->SetElementPtrSize(i, new_m, image_pointer_size_);
+        auto* new_method = it->second;
+        DCHECK(new_method != nullptr);
+        vtable->SetElementPtrSize(i, new_method, image_pointer_size_);
+      } else {
+        // If it was not going to be updated we wouldn't have put it into the default_translations
+        // map.
+        CHECK(!found_translation) << "We were asked to update this vtable entry. Must not fail.";
       }
     }
 
@@ -5300,7 +5435,11 @@
       auto* resolved_methods = klass->GetDexCache()->GetResolvedMethods();
       for (size_t i = 0, count = klass->GetDexCache()->NumResolvedMethods(); i < count; ++i) {
         auto* m = mirror::DexCache::GetElementPtrSize(resolved_methods, i, image_pointer_size_);
-        CHECK(move_table.find(m) == move_table.end()) << PrettyMethod(m);
+        // We don't remove default methods from the move table since we need them to update the
+        // vtable. Therefore just skip them for this check.
+        if (!m->IsDefault()) {
+          CHECK(move_table.find(m) == move_table.end()) << PrettyMethod(m);
+        }
       }
     }
     // Put some random garbage in old virtuals to help find stale pointers.
@@ -6075,7 +6214,8 @@
 }
 
 bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) {
-  if (Runtime::Current()->UseJit()) {
+  Runtime* const runtime = Runtime::Current();
+  if (runtime->UseJit()) {
     // JIT can have direct code pointers from any method to any other method.
     return true;
   }
@@ -6097,13 +6237,7 @@
   } else {
     // The method can be called outside its own oat file. Therefore it won't be called using its
     // direct code pointer only if all loaded oat files have been compiled in PIC mode.
-    ReaderMutexLock mu(Thread::Current(), dex_lock_);
-    for (const OatFile* oat_file : oat_files_) {
-      if (!oat_file->IsPic()) {
-        return true;
-      }
-    }
-    return false;
+    return runtime->GetOatFileManager().HaveNonPicOatFile();
   }
 }
 
@@ -6138,9 +6272,13 @@
   for (const DexFile* dex_file : dex_files) {
     StackHandleScope<3> hs2(self);
 
-    Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc(self, 1));
+    // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
+    // oat file but we can leave it null.
+    Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc(
+        self,
+        kDexFileIndexStart + 1));
     DCHECK(h_long_array.Get() != nullptr);
-    h_long_array->Set(0, reinterpret_cast<intptr_t>(dex_file));
+    h_long_array->Set(kDexFileIndexStart, reinterpret_cast<intptr_t>(dex_file));
 
     Handle<mirror::Object> h_dex_file = hs2.NewHandle(
         cookie_field->GetDeclaringClass()->AllocObject(self));
@@ -6221,10 +6359,24 @@
   }
 }
 
+void ClassLinker::InsertDexFileInToClassLoader(mirror::Object* dex_file,
+                                               mirror::ClassLoader* class_loader) {
+  DCHECK(dex_file != nullptr);
+  DCHECK(class_loader != nullptr);
+  Thread* const self = Thread::Current();
+  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  ClassTable* const table = class_loader->GetClassTable();
+  DCHECK(table != nullptr);
+  if (table->InsertDexFile(dex_file)) {
+    // It was not already inserted, perform the write barrier to let the GC know the class loader's
+    // class table was modified.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+  }
+}
+
 void ClassLinker::CleanupClassLoaders() {
   Thread* const self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
   for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
     const ClassLoaderData& data = *it;
     // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
@@ -6232,10 +6384,7 @@
     if (class_loader != nullptr) {
       ++it;
     } else {
-      // Weak reference was cleared, delete the data associated with this class loader.
-      delete data.class_table;
-      delete data.allocator;
-      vm->DeleteWeakGlobalRef(self, data.weak_root);
+      DeleteClassLoader(self, data);
       it = class_loaders_.erase(it);
     }
   }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 7f3e938..392efd2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_CLASS_LINKER_H_
 
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -318,24 +319,17 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  mirror::DexCache* RegisterDexFile(const DexFile& dex_file)
+  mirror::DexCache* RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  const OatFile* RegisterOatFile(const OatFile* oat_file)
-      REQUIRES(!dex_lock_);
-
   const std::vector<const DexFile*>& GetBootClassPath() {
     return boot_class_path_;
   }
 
-  // Returns the first non-image oat file in the class path.
-  const OatFile* GetPrimaryOatFile()
-      REQUIRES(!dex_lock_);
-
   void VisitClasses(ClassVisitor* visitor)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -364,26 +358,6 @@
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Finds or creates the oat file holding dex_location. Then loads and returns
-  // all corresponding dex files (there may be more than one dex file loaded
-  // in the case of multidex).
-  // This may return the original, unquickened dex files if the oat file could
-  // not be generated.
-  //
-  // Returns an empty vector if the dex files could not be loaded. In this
-  // case, there will be at least one error message returned describing why no
-  // dex files could not be loaded. The 'error_msgs' argument must not be
-  // null, regardless of whether there is an error or not.
-  //
-  // This method should not be called with the mutator_lock_ held, because it
-  // could end up starving GC if we need to generate or relocate any oat
-  // files.
-  std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
-      const char* dex_location,
-      const char* oat_location,
-      std::vector<std::string>* error_msgs)
-      REQUIRES(!dex_lock_, !Locks::mutator_lock_);
-
   // Allocate an instance of a java.lang.Object.
   mirror::Object* AllocObject(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -552,12 +526,24 @@
 
   // Clean up class loaders, this needs to happen after JNI weak globals are cleared.
   void CleanupClassLoaders()
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!Locks::classlinker_classes_lock_);
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Unlike GetOrCreateAllocatorForClassLoader, GetAllocatorForClassLoader asserts that the
+  // allocator for this class loader is already created.
   static LinearAlloc* GetAllocatorForClassLoader(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Return the linear alloc for a class loader if it is already allocated, otherwise allocate and
+  // set it. TODO: Consider using a lock other than classlinker_classes_lock_.
+  static LinearAlloc* GetOrCreateAllocatorForClassLoader(mirror::ClassLoader* class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void InsertDexFileInToClassLoader(mirror::Object* dex_file, mirror::ClassLoader* class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   struct ClassLoaderData {
     jweak weak_root;  // Weak root to enable class unloading.
@@ -565,6 +551,10 @@
     LinearAlloc* allocator;
   };
 
+  static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void VisitClassLoaders(ClassLoaderVisitor* visitor) const
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
@@ -581,10 +571,6 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  OatFile& GetImageOatFile(gc::space::ImageSpace* space)
-      REQUIRES(!dex_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   void FinishInit(Thread* self)
   SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
@@ -600,7 +586,9 @@
   mirror::Class* AllocClass(Thread* self, uint32_t class_size)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
-  mirror::DexCache* AllocDexCache(Thread* self, const DexFile& dex_file)
+  mirror::DexCache* AllocDexCache(Thread* self,
+                                  const DexFile& dex_file,
+                                  LinearAlloc* linear_alloc)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
@@ -679,6 +667,12 @@
                        bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
+  bool InitializeDefaultInterfaceRecursive(Thread* self,
+                                           Handle<mirror::Class> klass,
+                                           bool can_run_clinit,
+                                           bool can_init_parents)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   bool WaitForInitializeClass(Handle<mirror::Class> klass,
                               Thread* self,
                               ObjectLock<mirror::Class>& lock);
@@ -718,12 +712,65 @@
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool LinkVirtualMethods(Thread* self, Handle<mirror::Class> klass)
+  // Links the virtual methods for the given class and records any default methods that will need to
+  // be updated later.
+  //
+  // Arguments:
+  // * self - The current thread.
+  // * klass - class, whose vtable will be filled in.
+  // * default_translations - Vtable index to new method map.
+  //                          Any vtable entries that need to be updated with new default methods
+  //                          are stored into the default_translations map. The default_translations
+  //                          map is keyed on the vtable index that needs to be updated. We use this
+  //                          map because if we override a default method with another default
+  //                          method we need to update the vtable to point to the new method.
+  //                          Unfortunately since we copy the ArtMethod* we cannot just do a simple
+  //                          scan, we therefore store the vtable index's that might need to be
+  //                          updated with the method they will turn into.
+  // TODO This whole default_translations thing is very dirty. There should be a better way.
+  bool LinkVirtualMethods(Thread* self,
+                          Handle<mirror::Class> klass,
+                          /*out*/std::unordered_map<size_t, ArtMethod*>* default_translations)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Sets up the interface lookup table (IFTable) in the correct order to allow searching for
+  // default methods.
+  bool SetupInterfaceLookupTable(Thread* self,
+                                 Handle<mirror::Class> klass,
+                                 Handle<mirror::ObjectArray<mirror::Class>> interfaces)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Find the default method implementation for 'interface_method' in 'klass', if one exists.
+  //
+  // Arguments:
+  // * self - The current thread.
+  // * target_method - The method we are trying to find a default implementation for.
+  // * klass - The class we are searching for a definition of target_method.
+  // * out_default_method - The pointer we will store the found default method to on success.
+  // * icce_message - A string we will store an appropriate IncompatibleClassChangeError message
+  //                  into in case of failure. Note we must do it this way since we do not know
+  //                  whether we can allocate the exception object, which could cause us to go to
+  //                  sleep.
+  //
+  // Return value:
+  // * True - There were no conflicting method implementations found in the class while searching
+  //          for target_method. The default method implementation is stored into out_default_method
+  //          if it was found.  Otherwise *out_default_method will be set to nullptr.
+  // * False - Conflicting method implementations were found when searching for target_method. The
+  //           value of *out_default_method is undefined and *icce_message is a string that should
+  //           be used to create an IncompatibleClassChangeError as soon as possible.
+  bool FindDefaultMethodImplementation(Thread* self,
+                                       ArtMethod* target_method,
+                                       Handle<mirror::Class> klass,
+                                       /*out*/ArtMethod** out_default_method,
+                                       /*out*/std::string* icce_message) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
+  // methods. See LinkVirtualMethods for an explanation of what default_translations is.
   bool LinkInterfaceMethods(Thread* self,
                             Handle<mirror::Class> klass,
-                            Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                            const std::unordered_map<size_t, ArtMethod*>& default_translations,
                             ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -758,12 +805,6 @@
     return dex_caches_;
   }
 
-  const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
-      REQUIRES(!dex_lock_);
-
-  // Returns the boot image oat file.
-  const OatFile* GetBootOatFile() SHARED_REQUIRES(dex_lock_);
-
   void CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
@@ -813,9 +854,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  // Check for duplicate class definitions of the given oat file against all open oat files.
-  bool HasCollisions(const OatFile* oat_file, std::string* error_msg) REQUIRES(!dex_lock_);
-
   bool HasInitWithString(Thread* self, const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
 
@@ -834,7 +872,6 @@
   // JNI weak globals to allow dex caches to get unloaded. We lazily delete weak globals when we
   // register new dex files.
   std::list<jweak> dex_caches_ GUARDED_BY(dex_lock_);
-  std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
   // This contains the class loaders which have class tables. It is populated by
   // InsertClassTableForClassLoader.
@@ -880,8 +917,8 @@
   // Image pointer size.
   size_t image_pointer_size_;
 
+  friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
-  friend class ImageDumper;  // for FindOpenedOatFileFromOatLocation
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
   ART_FRIEND_TEST(mirror::DexCacheTest, Open);  // for AllocDexCache
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 0926ce3..04b8900 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -1032,9 +1032,7 @@
   mirror::Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", class_loader);
   ArtMethod* clinit = klass->FindClassInitializer(sizeof(void*));
   ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;", sizeof(void*));
-  const DexFile::StringId* string_id = dex_file->FindStringId("LStaticsFromCode;");
-  ASSERT_TRUE(string_id != nullptr);
-  const DexFile::TypeId* type_id = dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
+  const DexFile::TypeId* type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(type_id != nullptr);
   uint32_t type_idx = dex_file->GetIndexForTypeId(*type_id);
   mirror::Class* uninit = ResolveVerifyAndClinit(type_idx, clinit, soa.Self(), true, false);
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index dc60a2c..aef02b6 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -37,6 +37,9 @@
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
+  for (GcRoot<mirror::Object>& root : dex_files_) {
+    visitor.VisitRoot(root.AddressWithoutBarrier());
+  }
 }
 
 }  // namespace art
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index fc8e6c4..3ed1c95 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -21,7 +21,9 @@
 namespace art {
 
 ClassTable::ClassTable() {
-  classes_.push_back(ClassSet());
+  Runtime* const runtime = Runtime::Current();
+  classes_.push_back(ClassSet(runtime->GetHashTableMinLoadFactor(),
+                              runtime->GetHashTableMaxLoadFactor()));
 }
 
 void ClassTable::FreezeSnapshot() {
@@ -135,4 +137,15 @@
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
+bool ClassTable::InsertDexFile(mirror::Object* dex_file) {
+  DCHECK(dex_file != nullptr);
+  for (GcRoot<mirror::Object>& root : dex_files_) {
+    if (root.Read() == dex_file) {
+      return false;
+    }
+  }
+  dex_files_.push_back(GcRoot<mirror::Object>(dex_file));
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 727392e..002bb56 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -50,12 +50,14 @@
 
   // Used by image writer for checking.
   bool Contains(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Freeze the current class tables by allocating a new table and never updating or modifying the
   // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
   void FreezeSnapshot()
-      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the number of classes in previous snapshots.
   size_t NumZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
@@ -65,17 +67,18 @@
 
   // Update a class in the table with the new class. Returns the existing class which was replaced.
   mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock.
   template<class Visitor>
   void VisitRoots(Visitor& visitor)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
+      NO_THREAD_SAFETY_ANALYSIS
+      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
   template<class Visitor>
   void VisitRoots(const Visitor& visitor)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
+      NO_THREAD_SAFETY_ANALYSIS
+      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
   // Return false if the callback told us to exit.
   bool Visit(ClassVisitor* visitor)
@@ -85,13 +88,21 @@
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
   void Insert(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void InsertWithHash(mirror::Class* klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the class was found and removed, false otherwise.
   bool Remove(const char* descriptor)
-      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true if we inserted the dex file, false if it already exists.
+  bool InsertDexFile(mirror::Object* dex_file)
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
   class ClassDescriptorHashEquals {
@@ -123,6 +134,9 @@
   // TODO: shard lock to have one per class loader.
   // We have a vector to help prevent dirty pages after the zygote forks by calling FreezeSnapshot.
   std::vector<ClassSet> classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  // Dex files used by the class loader which may not be owned by the class loader. We keep these
+  // live so that we do not have issues closing any of the dex files.
+  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(Locks::classlinker_classes_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 56c5d1a..b6b5141 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -42,6 +42,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mem_map.h"
+#include "native/dalvik_system_DexFile.h"
 #include "noop_compiler_callbacks.h"
 #include "os.h"
 #include "primitive.h"
@@ -516,7 +517,7 @@
           mirror::LongArray* long_array = cookie_field->GetObject(dex_file)->AsLongArray();
           DCHECK(long_array != nullptr);
           int32_t long_array_size = long_array->GetLength();
-          for (int32_t j = 0; j < long_array_size; ++j) {
+          for (int32_t j = kDexFileIndexStart; j < long_array_size; ++j) {
             const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(
                 long_array->GetWithoutChecks(j)));
             if (cp_dex_file == nullptr) {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index b19381d..7117be9 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -69,29 +69,26 @@
   return alloc_record_count;
 }
 
-class Breakpoint {
+class Breakpoint : public ValueObject {
  public:
-  Breakpoint(ArtMethod* method, uint32_t dex_pc,
-             DeoptimizationRequest::Kind deoptimization_kind)
-    SHARED_REQUIRES(Locks::mutator_lock_)
-    : method_(nullptr), dex_pc_(dex_pc), deoptimization_kind_(deoptimization_kind) {
+  Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
+    : method_(method),
+      dex_pc_(dex_pc),
+      deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
           deoptimization_kind_ == DeoptimizationRequest::kSelectiveDeoptimization ||
           deoptimization_kind_ == DeoptimizationRequest::kFullDeoptimization);
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    method_ = soa.EncodeMethod(method);
   }
 
   Breakpoint(const Breakpoint& other) SHARED_REQUIRES(Locks::mutator_lock_)
-    : method_(nullptr), dex_pc_(other.dex_pc_),
-      deoptimization_kind_(other.deoptimization_kind_) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    method_ = soa.EncodeMethod(other.Method());
-  }
+    : method_(other.method_),
+      dex_pc_(other.dex_pc_),
+      deoptimization_kind_(other.deoptimization_kind_) {}
 
-  ArtMethod* Method() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    return soa.DecodeMethod(method_);
+  // Method() is called from root visiting, do not use ScopedObjectAccess here or it can cause
+  // GC to deadlock if another thread tries to call SuspendAll while the GC is in a runnable state.
+  ArtMethod* Method() const {
+    return method_;
   }
 
   uint32_t DexPc() const {
@@ -104,7 +101,7 @@
 
  private:
   // The location of this breakpoint.
-  jmethodID method_;
+  ArtMethod* method_;
   uint32_t dex_pc_;
 
   // Indicates whether breakpoint needs full deoptimization or selective deoptimization.
@@ -612,7 +609,7 @@
         // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
         // This prevents us from having any pending deoptimization request when the debugger attaches
         // to us again while no event has been requested yet.
-        MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+        MutexLock mu(self, *Locks::deoptimization_lock_);
         deoptimization_requests_.clear();
         full_deoptimization_event_count_ = 0U;
       }
@@ -1903,8 +1900,7 @@
 JDWP::JdwpError Dbg::GetThreadName(JDWP::ObjectId thread_id, std::string* name) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
   JDWP::JdwpError error;
-  Thread* thread = DecodeThread(soa, thread_id, &error);
-  UNUSED(thread);
+  DecodeThread(soa, thread_id, &error);
   if (error != JDWP::ERR_NONE && error != JDWP::ERR_THREAD_NOT_ALIVE) {
     return error;
   }
@@ -1931,8 +1927,7 @@
   }
   ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroup");
   // Okay, so it's an object, but is it actually a thread?
-  Thread* thread = DecodeThread(soa, thread_id, &error);
-  UNUSED(thread);
+  DecodeThread(soa, thread_id, &error);
   if (error == JDWP::ERR_THREAD_NOT_ALIVE) {
     // Zombie threads are in the null group.
     expandBufAddObjectId(pReply, JDWP::ObjectId(0));
@@ -5043,4 +5038,13 @@
   method_ = soa.EncodeMethod(m);
 }
 
+void Dbg::VisitRoots(RootVisitor* visitor) {
+  // Visit breakpoint roots, used to prevent unloading of methods with breakpoints.
+  ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+  BufferedRootVisitor<128> root_visitor(visitor, RootInfo(kRootVMInternal));
+  for (Breakpoint& breakpoint : gBreakpoints) {
+    breakpoint.Method()->VisitRoots(root_visitor, sizeof(void*));
+  }
+}
+
 }  // namespace art
diff --git a/runtime/debugger.h b/runtime/debugger.h
index b3617e4..e908304 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -646,6 +646,7 @@
   static void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Visit breakpoint roots, used to prevent unloading of methods with breakpoints.
   static void VisitRoots(RootVisitor* visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index ae62e2b..3a93aac 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -37,6 +37,7 @@
 #include "dex_file-inl.h"
 #include "dex_file_verifier.h"
 #include "globals.h"
+#include "handle_scope-inl.h"
 #include "leb128.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
@@ -44,8 +45,8 @@
 #include "os.h"
 #include "reflection.h"
 #include "safe_map.h"
-#include "handle_scope-inl.h"
 #include "thread.h"
+#include "type_lookup_table.h"
 #include "utf-inl.h"
 #include "utils.h"
 #include "well_known_classes.h"
@@ -414,11 +415,19 @@
       method_ids_(reinterpret_cast<const MethodId*>(base + header_->method_ids_off_)),
       proto_ids_(reinterpret_cast<const ProtoId*>(base + header_->proto_ids_off_)),
       class_defs_(reinterpret_cast<const ClassDef*>(base + header_->class_defs_off_)),
-      find_class_def_misses_(0),
-      class_def_index_(nullptr),
       oat_dex_file_(oat_dex_file) {
   CHECK(begin_ != nullptr) << GetLocation();
   CHECK_GT(size_, 0U) << GetLocation();
+  const uint8_t* lookup_data = (oat_dex_file != nullptr)
+      ? oat_dex_file->GetLookupTableData()
+      : nullptr;
+  if (lookup_data != nullptr) {
+    if (lookup_data + TypeLookupTable::RawDataLength(*this) > oat_dex_file->GetOatFile()->End()) {
+      LOG(WARNING) << "found truncated lookup table in " << GetLocation();
+    } else {
+      lookup_table_.reset(TypeLookupTable::Open(lookup_data, *this));
+    }
+  }
 }
 
 DexFile::~DexFile() {
@@ -426,8 +435,6 @@
   // that's only called after DetachCurrentThread, which means there's no JNIEnv. We could
   // re-attach, but cleaning up these global references is not obviously useful. It's not as if
   // the global reference table is otherwise empty!
-  // Remove the index if one were created.
-  delete class_def_index_.LoadRelaxed();
 }
 
 bool DexFile::Init(std::string* error_msg) {
@@ -477,51 +484,26 @@
 
 const DexFile::ClassDef* DexFile::FindClassDef(const char* descriptor, size_t hash) const {
   DCHECK_EQ(ComputeModifiedUtf8Hash(descriptor), hash);
-  // If we have an index lookup the descriptor via that as its constant time to search.
-  Index* index = class_def_index_.LoadSequentiallyConsistent();
-  if (index != nullptr) {
-    auto it = index->FindWithHash(descriptor, hash);
-    return (it == index->end()) ? nullptr : it->second;
+  if (LIKELY(lookup_table_ != nullptr)) {
+    const uint32_t class_def_idx = lookup_table_->Lookup(descriptor, hash);
+    return (class_def_idx != DexFile::kDexNoIndex) ? &GetClassDef(class_def_idx) : nullptr;
   }
+
   // Fast path for rate no class defs case.
-  uint32_t num_class_defs = NumClassDefs();
+  const uint32_t num_class_defs = NumClassDefs();
   if (num_class_defs == 0) {
     return nullptr;
   }
-  // Search for class def with 2 binary searches and then a linear search.
-  const StringId* string_id = FindStringId(descriptor);
-  if (string_id != nullptr) {
-    const TypeId* type_id = FindTypeId(GetIndexForStringId(*string_id));
-    if (type_id != nullptr) {
-      uint16_t type_idx = GetIndexForTypeId(*type_id);
-      for (size_t i = 0; i < num_class_defs; ++i) {
-        const ClassDef& class_def = GetClassDef(i);
-        if (class_def.class_idx_ == type_idx) {
-          return &class_def;
-        }
+  const TypeId* type_id = FindTypeId(descriptor);
+  if (type_id != nullptr) {
+    uint16_t type_idx = GetIndexForTypeId(*type_id);
+    for (size_t i = 0; i < num_class_defs; ++i) {
+      const ClassDef& class_def = GetClassDef(i);
+      if (class_def.class_idx_ == type_idx) {
+        return &class_def;
       }
     }
   }
-  // A miss. If we've had kMaxFailedDexClassDefLookups misses then build an index to speed things
-  // up. This isn't done eagerly at construction as construction is not performed in multi-threaded
-  // sections of tools like dex2oat. If we're lazy we hopefully increase the chance of balancing
-  // out which thread builds the index.
-  const uint32_t kMaxFailedDexClassDefLookups = 100;
-  uint32_t old_misses = find_class_def_misses_.FetchAndAddSequentiallyConsistent(1);
-  if (old_misses == kMaxFailedDexClassDefLookups) {
-    // Are we the ones moving the miss count past the max? Sanity check the index doesn't exist.
-    CHECK(class_def_index_.LoadSequentiallyConsistent() == nullptr);
-    // Build the index.
-    index = new Index();
-    for (uint32_t i = 0; i < num_class_defs;  ++i) {
-      const ClassDef& class_def = GetClassDef(i);
-      const char* class_descriptor = GetClassDescriptor(class_def);
-      index->Insert(std::make_pair(class_descriptor, &class_def));
-    }
-    // Sanity check the index still doesn't exist, only 1 thread should build it.
-    CHECK(class_def_index_.LoadSequentiallyConsistent() == nullptr);
-    class_def_index_.StoreSequentiallyConsistent(index);
-  }
   return nullptr;
 }
 
@@ -625,6 +607,26 @@
   return nullptr;
 }
 
+const DexFile::TypeId* DexFile::FindTypeId(const char* string) const {
+  int32_t lo = 0;
+  int32_t hi = NumTypeIds() - 1;
+  while (hi >= lo) {
+    int32_t mid = (hi + lo) / 2;
+    const TypeId& type_id = GetTypeId(mid);
+    const DexFile::StringId& str_id = GetStringId(type_id.descriptor_idx_);
+    const char* str = GetStringData(str_id);
+    int compare = CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(string, str);
+    if (compare > 0) {
+      lo = mid + 1;
+    } else if (compare < 0) {
+      hi = mid - 1;
+    } else {
+      return &type_id;
+    }
+  }
+  return nullptr;
+}
+
 const DexFile::StringId* DexFile::FindStringId(const uint16_t* string, size_t length) const {
   int32_t lo = 0;
   int32_t hi = NumStringIds() - 1;
@@ -697,6 +699,10 @@
   return nullptr;
 }
 
+void DexFile::CreateTypeLookupTable() const {
+  lookup_table_.reset(TypeLookupTable::Create(*this));
+}
+
 // Given a signature place the type ids into the given vector
 bool DexFile::CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
                              std::vector<uint16_t>* param_type_idxs) const {
@@ -732,11 +738,7 @@
     }
     // TODO: avoid creating a std::string just to get a 0-terminated char array
     std::string descriptor(signature.data() + start_offset, offset - start_offset);
-    const DexFile::StringId* string_id = FindStringId(descriptor.c_str());
-    if (string_id == nullptr) {
-      return false;
-    }
-    const DexFile::TypeId* type_id = FindTypeId(GetIndexForStringId(*string_id));
+    const DexFile::TypeId* type_id = FindTypeId(descriptor.c_str());
     if (type_id == nullptr) {
       return false;
     }
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 47e5c12..e7877b2 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -51,6 +51,7 @@
 class Signature;
 template<class T> class Handle;
 class StringPiece;
+class TypeLookupTable;
 class ZipArchive;
 
 // TODO: move all of the macro functionality into the DexCache class.
@@ -532,6 +533,8 @@
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
 
+  const TypeId* FindTypeId(const char* string) const;
+
   // Looks up a string id for a given utf16 string.
   const StringId* FindStringId(const uint16_t* string, size_t length) const;
 
@@ -1139,6 +1142,12 @@
     return oat_dex_file_;
   }
 
+  TypeLookupTable* GetTypeLookupTable() const {
+    return lookup_table_.get();
+  }
+
+  void CreateTypeLookupTable() const;
+
  private:
   // Opens a .dex file
   static std::unique_ptr<const DexFile> OpenFile(int fd, const char* location,
@@ -1237,44 +1246,11 @@
   // Points to the base of the class definition list.
   const ClassDef* const class_defs_;
 
-  // Number of misses finding a class def from a descriptor.
-  mutable Atomic<uint32_t> find_class_def_misses_;
-
-  struct UTF16EmptyFn {
-    void MakeEmpty(std::pair<const char*, const ClassDef*>& pair) const {
-      pair.first = nullptr;
-      pair.second = nullptr;
-    }
-    bool IsEmpty(const std::pair<const char*, const ClassDef*>& pair) const {
-      if (pair.first == nullptr) {
-        DCHECK(pair.second == nullptr);
-        return true;
-      }
-      return false;
-    }
-  };
-  struct UTF16HashCmp {
-    // Hash function.
-    size_t operator()(const char* key) const {
-      return ComputeModifiedUtf8Hash(key);
-    }
-    // std::equal function.
-    bool operator()(const char* a, const char* b) const {
-      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(a, b) == 0;
-    }
-  };
-  using Index = HashMap<const char*,
-                        const ClassDef*,
-                        UTF16EmptyFn,
-                        UTF16HashCmp,
-                        UTF16HashCmp,
-                        std::allocator<std::pair<const char*, const ClassDef*>>>;
-  mutable Atomic<Index*> class_def_index_;
-
   // If this dex file was loaded from an oat file, oat_dex_file_ contains a
   // pointer to the OatDexFile it was loaded from. Otherwise oat_dex_file_ is
   // null.
   const OatDexFile* oat_dex_file_;
+  mutable std::unique_ptr<TypeLookupTable> lookup_table_;
 
   friend class DexFileVerifierTest;
 };
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 90b35a3..0a167bb 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -297,6 +297,7 @@
     ASSERT_TRUE(type_str_id != nullptr);
     uint32_t type_str_idx = java_lang_dex_file_->GetIndexForStringId(*type_str_id);
     const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId(type_str_idx);
+    ASSERT_EQ(type_id, java_lang_dex_file_->FindTypeId(type_str));
     ASSERT_TRUE(type_id != nullptr);
     EXPECT_EQ(java_lang_dex_file_->GetIndexForTypeId(*type_id), i);
   }
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 09416cc..440d696 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -23,7 +23,9 @@
 
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
+#include "experimental_flags.h"
 #include "leb128.h"
+#include "runtime.h"
 #include "safe_map.h"
 #include "utf-inl.h"
 #include "utils.h"
@@ -1414,7 +1416,12 @@
     }
 
     if (IsDataSectionType(type)) {
-      offset_to_type_map_.Put(aligned_offset, type);
+      if (aligned_offset == 0u) {
+        ErrorStringPrintf("Item %d offset is 0", i);
+        return false;
+      }
+      DCHECK(offset_to_type_map_.Find(aligned_offset) == offset_to_type_map_.end());
+      offset_to_type_map_.Insert(std::pair<uint32_t, uint16_t>(aligned_offset, type));
     }
 
     aligned_offset = ptr_ - begin_;
@@ -1587,7 +1594,8 @@
 }
 
 bool DexFileVerifier::CheckOffsetToTypeMap(size_t offset, uint16_t type) {
-  auto it = offset_to_type_map_.find(offset);
+  DCHECK_NE(offset, 0u);
+  auto it = offset_to_type_map_.Find(offset);
   if (UNLIKELY(it == offset_to_type_map_.end())) {
     ErrorStringPrintf("No data map entry found @ %zx; expected %x", offset, type);
     return false;
@@ -2530,7 +2538,14 @@
   }
 
   // Only the static initializer may have code in an interface.
-  if (((class_access_flags & kAccInterface) != 0) && !is_clinit_by_name) {
+  // TODO We should have some way determine whether to allow this experimental flag without the
+  // runtime being started.
+  // We assume experimental flags are enabled when running without a runtime to enable tools like
+  // dexdump to handle dex files with these features.
+  if (((class_access_flags & kAccInterface) != 0)
+      && !is_clinit_by_name
+      && Runtime::Current() != nullptr
+      && !Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
     *error_msg = StringPrintf("Non-clinit interface method %" PRIu32 " should not have code",
                               method_index);
     return false;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 4f15357..6c63749 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -175,7 +175,35 @@
   const char* const location_;
   const DexFile::Header* const header_;
 
-  AllocationTrackingSafeMap<uint32_t, uint16_t, kAllocatorTagDexFileVerifier> offset_to_type_map_;
+  struct OffsetTypeMapEmptyFn {
+    // Make a hash map slot empty by making the offset 0. Offset 0 is a valid dex file offset that
+    // is in the offset of the dex file header. However, we only store data section items in the
+    // map, and these are after the header.
+    void MakeEmpty(std::pair<uint32_t, uint16_t>& pair) const {
+      pair.first = 0u;
+    }
+    // Check if a hash map slot is empty.
+    bool IsEmpty(const std::pair<uint32_t, uint16_t>& pair) const {
+      return pair.first == 0;
+    }
+  };
+  struct OffsetTypeMapHashCompareFn {
+    // Hash function for offset.
+    size_t operator()(const uint32_t key) const {
+      return key;
+    }
+    // std::equal function for offset.
+    bool operator()(const uint32_t a, const uint32_t b) const {
+      return a == b;
+    }
+  };
+  // Map from offset to dex file type, HashMap for performance reasons.
+  AllocationTrackingHashMap<uint32_t,
+                            uint16_t,
+                            OffsetTypeMapEmptyFn,
+                            kAllocatorTagDexFileVerifier,
+                            OffsetTypeMapHashCompareFn,
+                            OffsetTypeMapHashCompareFn> offset_to_type_map_;
   const uint8_t* ptr_;
   const void* previous_item_;
 
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 48a12e5..035230e 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -185,6 +185,7 @@
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
   static constexpr uint32_t kMaxVarArgRegs25x = 6;  // lambdas are 2 registers.
+  static constexpr uint32_t kLambdaVirtualRegisterWidth = 2;
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -248,7 +249,7 @@
 
   // VRegA
   bool HasVRegA() const;
-  int32_t VRegA() const;
+  ALWAYS_INLINE int32_t VRegA() const;
 
   int8_t VRegA_10t() const {
     return VRegA_10t(Fetch16(0));
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index f66628d..21e4e44 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -34,6 +34,7 @@
 #include "mirror/throwable.h"
 #include "nth_caller_visitor.h"
 #include "runtime.h"
+#include "stack_map.h"
 #include "thread.h"
 
 namespace art {
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index f193999..e57569e 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -30,6 +30,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "nth_caller_visitor.h"
+#include "oat_quick_method_header.h"
 #include "reflection.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -358,32 +359,31 @@
   const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
   auto** caller_sp = reinterpret_cast<ArtMethod**>(
       reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
+  const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type);
+  uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(
+      (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
   ArtMethod* outer_method = *caller_sp;
   ArtMethod* caller = outer_method;
 
-  if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) {
-    const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type);
-    uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(
-        (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
-    if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
-      uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc);
-      CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
-      StackMapEncoding encoding = code_info.ExtractEncoding();
-      StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-      DCHECK(stack_map.IsValid());
-      if (stack_map.HasInlineInfo(encoding)) {
-        InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-        caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-      }
-    } else {
-      // We're instrumenting, just use the StackVisitor which knows how to
-      // handle instrumented frames.
-      NthCallerVisitor visitor(Thread::Current(), 1, true);
-      visitor.WalkStack();
-      caller = visitor.caller;
-      if (kIsDebugBuild) {
-        // Avoid doing the check below.
-        do_caller_check = false;
+  if (outer_method != nullptr) {
+    const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
+    if (current_code->IsOptimized()) {
+      if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
+        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+        StackMapEncoding encoding = code_info.ExtractEncoding();
+        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+        DCHECK(stack_map.IsValid());
+        if (stack_map.HasInlineInfo(encoding)) {
+          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+        }
+      } else {
+        // We're instrumenting, just use the StackVisitor which knows how to
+        // handle instrumented frames.
+        NthCallerVisitor visitor(Thread::Current(), 1, true);
+        visitor.WalkStack();
+        caller = visitor.caller;
       }
     }
   }
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 4217cab..0469ee6 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -39,6 +39,7 @@
 
 class ArtField;
 class ArtMethod;
+class OatQuickMethodHeader;
 class ScopedObjectAccessAlreadyRunnable;
 class Thread;
 
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
deleted file mode 100644
index 72c2e0a..0000000
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method-inl.h"
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "interpreter/interpreter.h"
-#include "mirror/object-inl.h"
-#include "reflection.h"
-#include "runtime.h"
-#include "stack.h"
-
-namespace art {
-
-extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                   ShadowFrame* shadow_frame, JValue* result) {
-  ArtMethod* method = shadow_frame->GetMethod();
-  // Ensure static methods are initialized.
-  if (method->IsStatic()) {
-    mirror::Class* declaringClass = method->GetDeclaringClass();
-    if (UNLIKELY(!declaringClass->IsInitialized())) {
-      self->PushShadowFrame(shadow_frame);
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true,
-                                                                            true))) {
-        self->PopShadowFrame();
-        DCHECK(self->IsExceptionPending());
-        return;
-      }
-      self->PopShadowFrame();
-      CHECK(h_class->IsInitializing());
-      // Reload from shadow frame in case the method moved, this is faster than adding a handle.
-      method = shadow_frame->GetMethod();
-    }
-  }
-  uint16_t arg_offset = (code_item == nullptr) ? 0 : code_item->registers_size_ - code_item->ins_size_;
-  method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
-                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                 result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
-}
-
-}  // namespace art
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.h b/runtime/entrypoints/interpreter/interpreter_entrypoints.h
deleted file mode 100644
index 0952214..0000000
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_ENTRYPOINTS_INTERPRETER_INTERPRETER_ENTRYPOINTS_H_
-#define ART_RUNTIME_ENTRYPOINTS_INTERPRETER_INTERPRETER_ENTRYPOINTS_H_
-
-#include "base/macros.h"
-#include "dex_file.h"
-#include "offsets.h"
-
-#define INTERPRETER_ENTRYPOINT_OFFSET(ptr_size, x) \
-    Thread::InterpreterEntryPointOffset<ptr_size>(OFFSETOF_MEMBER(InterpreterEntryPoints, x))
-
-namespace art {
-
-union JValue;
-class ShadowFrame;
-class Thread;
-
-// Pointers to functions that are called by interpreter trampolines via thread-local storage.
-struct PACKED(4) InterpreterEntryPoints {
-  void (*pInterpreterToInterpreterBridge)(Thread* self, const DexFile::CodeItem* code_item,
-                                          ShadowFrame* shadow_frame, JValue* result);
-  void (*pInterpreterToCompiledCodeBridge)(Thread* self, const DexFile::CodeItem* code_item,
-                                           ShadowFrame* shadow_frame, JValue* result);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_ENTRYPOINTS_INTERPRETER_INTERPRETER_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/jni/jni_entrypoints.h b/runtime/entrypoints/jni/jni_entrypoints.h
index 6fb0560..9c1b0dc 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.h
+++ b/runtime/entrypoints/jni/jni_entrypoints.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_JNI_JNI_ENTRYPOINTS_H_
 #define ART_RUNTIME_ENTRYPOINTS_JNI_JNI_ENTRYPOINTS_H_
 
+#include "jni.h"
+
 #include "base/macros.h"
 #include "offsets.h"
 
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 28c62a8..4e4f851 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -56,9 +56,8 @@
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
-    mirror::Class* klass, ArtMethod* method, Thread* self) \
+    mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
-  UNUSED(method); \
   ScopedQuickEntrypointChecks sqec(self); \
   if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
     if (LIKELY(klass->IsInitialized())) { \
@@ -83,9 +82,8 @@
   return AllocObjectFromCodeResolved<instrumented_bool>(klass, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
-    mirror::Class* klass, ArtMethod* method, Thread* self) \
+    mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
-  UNUSED(method); \
   ScopedQuickEntrypointChecks sqec(self); \
   if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
     size_t byte_count = klass->GetObjectSize(); \
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 3bf001e..4adb39b 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -21,8 +21,9 @@
 namespace art {
 
 extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self)
-    SHARED_REQUIRES(Locks::mutator_lock_)
-    NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
   ScopedQuickEntrypointChecks sqec(self);
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerException("Null reference used for synchronization (monitor-enter)");
@@ -41,8 +42,9 @@
 }
 
 extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self)
-    SHARED_REQUIRES(Locks::mutator_lock_)
-    NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_)
+    SHARED_REQUIRES(Locks::mutator_lock_) /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
   ScopedQuickEntrypointChecks sqec(self);
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerException("Null reference used for synchronization (monitor-exit)");
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5d3ac73..5eda6d6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -29,6 +29,7 @@
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_quick_method_header.h"
 #include "quick_exception_handler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
@@ -294,7 +295,6 @@
   static mirror::Object* GetProxyThisObject(ArtMethod** sp)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     CHECK((*sp)->IsProxyMethod());
-    CHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize, (*sp)->GetFrameSizeInBytes());
     CHECK_GT(kNumQuickGprArgs, 0u);
     constexpr uint32_t kThisGprIndex = 0u;  // 'this' is in the 1st GPR.
     size_t this_arg_offset = kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset +
@@ -320,12 +320,12 @@
     const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
-    ArtMethod* outer_method = *caller_sp;
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
-    uintptr_t outer_pc_offset = outer_method->NativeQuickPcOffset(outer_pc);
+    const OatQuickMethodHeader* current_code = (*caller_sp)->GetOatQuickMethodHeader(outer_pc);
+    uintptr_t outer_pc_offset = current_code->NativeQuickPcOffset(outer_pc);
 
-    if (outer_method->IsOptimized(sizeof(void*))) {
-      CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+    if (current_code->IsOptimized()) {
+      CodeInfo code_info = current_code->GetOptimizedCodeInfo();
       StackMapEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(outer_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
@@ -336,7 +336,7 @@
         return stack_map.GetDexPc(encoding);
       }
     } else {
-      return outer_method->ToDexPc(outer_pc);
+      return current_code->ToDexPc(*caller_sp, outer_pc);
     }
   }
 
@@ -719,7 +719,7 @@
     uint16_t num_regs = code_item->registers_size_;
     // No last shadow coming from quick.
     ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-        CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0);
+        CREATE_SHADOW_FRAME(num_regs, /* link */ nullptr, method, /* dex pc */ 0);
     ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
@@ -841,9 +841,6 @@
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
   DCHECK_EQ((*sp), proxy_method) << PrettyMethod(proxy_method);
-  DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
-            Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
-      << PrettyMethod(proxy_method);
   self->VerifyStack();
   // Start new JNI local reference state.
   JNIEnvExt* env = self->GetJniEnv();
@@ -1522,9 +1519,9 @@
     return sp8;
   }
 
-  virtual void WalkHeader(BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>* sm)
+  virtual void WalkHeader(
+      BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>* sm ATTRIBUTE_UNUSED)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    UNUSED(sm);
   }
 
   void Walk(const char* shorty, uint32_t shorty_len) SHARED_REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 0b36694..4e85913 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -49,7 +49,7 @@
   static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
-    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+    QuickMethodFrameInfo frame_info = Runtime::Current()->GetRuntimeMethodFrameInfo(save_method);
     EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for "
         << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills="
         << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
@@ -58,8 +58,8 @@
   static void CheckPCOffset(InstructionSet isa, Runtime::CalleeSaveType type, size_t pc_offset)
       NO_THREAD_SAFETY_ANALYSIS {
     ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
-    QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
-    EXPECT_EQ(save_method->GetReturnPcOffset().SizeValue(), pc_offset)
+    QuickMethodFrameInfo frame_info = Runtime::Current()->GetRuntimeMethodFrameInfo(save_method);
+    EXPECT_EQ(frame_info.GetReturnPcOffset(), pc_offset)
         << "Expected and real pc offset differs for " << type
         << " core spills=" << std::hex << frame_info.CoreSpillMask()
         << " fp spills=" << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c37d159..78f56ee 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -114,7 +114,7 @@
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_functions,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, interpreter_entrypoints,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, jni_entrypoints,
                         sizeof(void*) * 6);
 
     // Skip across the entrypoints structures.
@@ -137,15 +137,6 @@
                        thread_tlsptr_end);
   }
 
-  void CheckInterpreterEntryPoints() {
-    CHECKED(OFFSETOF_MEMBER(InterpreterEntryPoints, pInterpreterToInterpreterBridge) == 0,
-            InterpreterEntryPoints_start_with_i2i);
-    EXPECT_OFFSET_DIFFNP(InterpreterEntryPoints, pInterpreterToInterpreterBridge,
-                         pInterpreterToCompiledCodeBridge, sizeof(void*));
-    CHECKED(OFFSETOF_MEMBER(InterpreterEntryPoints, pInterpreterToCompiledCodeBridge)
-            + sizeof(void*) == sizeof(InterpreterEntryPoints), InterpreterEntryPoints_all);
-  }
-
   void CheckJniEntryPoints() {
     CHECKED(OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookup) == 0,
             JniEntryPoints_start_with_dlsymlookup);
@@ -321,10 +312,6 @@
   CheckThreadOffsets();
 }
 
-TEST_F(EntrypointsOrderTest, InterpreterEntryPoints) {
-  CheckInterpreterEntryPoints();
-}
-
 TEST_F(EntrypointsOrderTest, JniEntryPoints) {
   CheckJniEntryPoints();
 }
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 9f84bd2..18ccd08 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -26,6 +26,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/stack_trace_element.h"
+#include "oat_quick_method_header.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
@@ -91,10 +92,25 @@
     fake_header_code_and_maps_.insert(fake_header_code_and_maps_.end(),
                                       fake_code_.begin(), fake_code_.end());
 
-    // NOTE: Don't align the code (it will not be executed) but check that the Thumb2
-    // adjustment will be a NOP, see ArtMethod::EntryPointToCodePointer().
-    CHECK_ALIGNED(mapping_table_offset, 2);
-    const uint8_t* code_ptr = &fake_header_code_and_maps_[gc_map_offset];
+    // Align the code.
+    const size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+    fake_header_code_and_maps_.reserve(fake_header_code_and_maps_.size() + alignment);
+    const void* unaligned_code_ptr =
+        fake_header_code_and_maps_.data() + (fake_header_code_and_maps_.size() - code_size);
+    size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr));
+    size_t padding = RoundUp(offset, alignment) - offset;
+    // Make sure no resizing takes place.
+    CHECK_GE(fake_header_code_and_maps_.capacity(), fake_header_code_and_maps_.size() + padding);
+    fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(), padding, 0);
+    const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding;
+    CHECK_EQ(code_ptr,
+             static_cast<const void*>(fake_header_code_and_maps_.data() +
+                                          (fake_header_code_and_maps_.size() - code_size)));
+
+    if (kRuntimeISA == kArm) {
+      // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer().
+      CHECK_ALIGNED(mapping_table_offset, 2);
+    }
 
     method_f_ = my_klass_->FindVirtualMethod("f", "()I", sizeof(void*));
     ASSERT_TRUE(method_f_ != nullptr);
@@ -169,7 +185,7 @@
   r->SetInstructionSet(kRuntimeISA);
   ArtMethod* save_method = r->CreateCalleeSaveMethod();
   r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll);
-  QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo();
+  QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method);
 
   ASSERT_EQ(kStackAlignment, 16U);
   // ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
@@ -186,15 +202,15 @@
     fake_stack.push_back(0);
   }
 
-  fake_stack.push_back(
-      method_g_->ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
+  fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
+      method_g_, dex_pc, /* is_catch_handler */ false));  // return pc
 
   // Create/push fake 16byte stack frame for method g
   fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
   fake_stack.push_back(0);
   fake_stack.push_back(0);
-  fake_stack.push_back(
-      method_g_->ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
+  fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc(
+      method_g_, dex_pc, /* is_catch_handler */ false));  // return pc
 
   // Create/push fake 16byte stack frame for method f
   fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
diff --git a/runtime/experimental_flags.h b/runtime/experimental_flags.h
new file mode 100644
index 0000000..2e674e9
--- /dev/null
+++ b/runtime/experimental_flags.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_EXPERIMENTAL_FLAGS_H_
+#define ART_RUNTIME_EXPERIMENTAL_FLAGS_H_
+
+#include <ostream>
+
+namespace art {
+
+// Possible experimental features that might be enabled.
+struct ExperimentalFlags {
+  // The actual flag values.
+  enum {
+    kNone           = 0x0000,
+    kLambdas        = 0x0001,
+    kDefaultMethods = 0x0002,
+  };
+
+  constexpr ExperimentalFlags() : value_(0x0000) {}
+  constexpr ExperimentalFlags(decltype(kNone) t) : value_(static_cast<uint32_t>(t)) {}
+
+  constexpr operator decltype(kNone)() const {
+    return static_cast<decltype(kNone)>(value_);
+  }
+
+  constexpr explicit operator bool() const {
+    return value_ != kNone;
+  }
+
+  constexpr ExperimentalFlags operator|(const decltype(kNone)& b) const {
+    return static_cast<decltype(kNone)>(value_ | static_cast<uint32_t>(b));
+  }
+  constexpr ExperimentalFlags operator|(const ExperimentalFlags& b) const {
+    return static_cast<decltype(kNone)>(value_ | b.value_);
+  }
+
+  constexpr ExperimentalFlags operator&(const ExperimentalFlags& b) const {
+    return static_cast<decltype(kNone)>(value_ & b.value_);
+  }
+  constexpr ExperimentalFlags operator&(const decltype(kNone)& b) const {
+    return static_cast<decltype(kNone)>(value_ & static_cast<uint32_t>(b));
+  }
+
+  constexpr bool operator==(const ExperimentalFlags& b) const {
+    return value_ == b.value_;
+  }
+
+ private:
+  uint32_t value_;
+};
+
+inline std::ostream& operator<<(std::ostream& stream, const ExperimentalFlags& e) {
+  bool started = false;
+  if (e & ExperimentalFlags::kLambdas) {
+    stream << (started ? "|" : "") << "kLambdas";
+    started = true;
+  }
+  if (e & ExperimentalFlags::kDefaultMethods) {
+    stream << (started ? "|" : "") << "kDefaultMethods";
+    started = true;
+  }
+  if (!started) {
+    stream << "kNone";
+  }
+  return stream;
+}
+
+inline std::ostream& operator<<(std::ostream& stream, const decltype(ExperimentalFlags::kNone)& e) {
+  return stream << ExperimentalFlags(e);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_EXPERIMENTAL_FLAGS_H_
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index c3a9627..52ccbee 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "mirror/class.h"
+#include "oat_quick_method_header.h"
 #include "sigchain.h"
 #include "thread-inl.h"
 #include "verify_object-inl.h"
@@ -359,16 +360,17 @@
     return false;
   }
 
+  const OatQuickMethodHeader* method_header = method_obj->GetOatQuickMethodHeader(return_pc);
+
   // We can be certain that this is a method now.  Check if we have a GC map
   // at the return PC address.
   if (true || kIsDebugBuild) {
     VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
-    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj,
-                                                                                 sizeof(void*));
-    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
+    uint32_t sought_offset = return_pc -
+        reinterpret_cast<uintptr_t>(method_header->GetEntryPoint());
     VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
-  uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
+  uint32_t dexpc = method_header->ToDexPc(method_obj, return_pc, false);
   VLOG(signals) << "dexpc: " << dexpc;
   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
 }
@@ -404,9 +406,8 @@
   manager_->AddHandler(this, false);
 }
 
-bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
+bool JavaStackTraceHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-  UNUSED(sig);
 #ifdef TEST_NESTED_SIGNAL
   bool in_generated_code = true;
 #else
diff --git a/runtime/gc/accounting/bitmap.cc b/runtime/gc/accounting/bitmap.cc
index fdded02..380cb8e 100644
--- a/runtime/gc/accounting/bitmap.cc
+++ b/runtime/gc/accounting/bitmap.cc
@@ -18,6 +18,7 @@
 
 #include "base/bit_utils.h"
 #include "card_table.h"
+#include "jit/jit_code_cache.h"
 #include "mem_map.h"
 
 namespace art {
@@ -91,6 +92,7 @@
 }
 
 template class MemoryRangeBitmap<CardTable::kCardSize>;
+template class MemoryRangeBitmap<jit::kJitCodeAlignment>;
 
 }  // namespace accounting
 }  // namespace gc
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 277d319..eb0852a 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -41,8 +41,7 @@
   explicit RememberedSetCardVisitor(RememberedSet::CardSet* const dirty_cards)
       : dirty_cards_(dirty_cards) {}
 
-  void operator()(uint8_t* card, uint8_t expected_value, uint8_t new_value) const {
-    UNUSED(new_value);
+  void operator()(uint8_t* card, uint8_t expected_value, uint8_t new_value ATTRIBUTE_UNUSED) const {
     if (expected_value == CardTable::kCardDirty) {
       dirty_cards_->insert(card);
     }
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 006d2c7..3be7181 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -46,7 +46,7 @@
       DCHECK(Test(obj));
       return true;
     }
-  } while (!atomic_entry->CompareExchangeWeakSequentiallyConsistent(old_word, old_word | mask));
+  } while (!atomic_entry->CompareExchangeWeakRelaxed(old_word, old_word | mask));
   DCHECK(Test(obj));
   return false;
 }
diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/dlmalloc.cc
index 3d85395..e747f00 100644
--- a/runtime/gc/allocator/dlmalloc.cc
+++ b/runtime/gc/allocator/dlmalloc.cc
@@ -77,7 +77,8 @@
 }
 
 extern "C" void DlmallocBytesAllocatedCallback(void* start ATTRIBUTE_UNUSED,
-                                               void* end ATTRIBUTE_UNUSED, size_t used_bytes,
+                                               void* end ATTRIBUTE_UNUSED,
+                                               size_t used_bytes,
                                                void* arg) {
   if (used_bytes == 0) {
     return;
@@ -86,10 +87,10 @@
   *bytes_allocated += used_bytes + sizeof(size_t);
 }
 
-extern "C" void DlmallocObjectsAllocatedCallback(void* start, void* end, size_t used_bytes,
+extern "C" void DlmallocObjectsAllocatedCallback(void* start ATTRIBUTE_UNUSED,
+                                                 void* end ATTRIBUTE_UNUSED,
+                                                 size_t used_bytes,
                                                  void* arg) {
-  UNUSED(start);
-  UNUSED(end);
   if (used_bytes == 0) {
     return;
   }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 87f1392..3ce3d63 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -131,6 +131,7 @@
 
    private:
     Slot* next_;  // Next slot in the list.
+    friend class RosAlloc;
   };
 
   // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
@@ -302,6 +303,7 @@
     // free without traversing the whole free list.
     uint32_t size_;
     uint32_t padding_ ATTRIBUTE_UNUSED;
+    friend class RosAlloc;
   };
 
   // Represents a run of memory slots of the same size.
@@ -482,7 +484,7 @@
   static constexpr uint8_t kMagicNumFree = 43;
   // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
   static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets;
-  // The number of smaller size brackets that are 16 bytes apart.
+  // The number of smaller size brackets that are the quantum size apart.
   static constexpr size_t kNumOfQuantumSizeBrackets = 32;
   // The sizes (the slot sizes, in bytes) of the size brackets.
   static size_t bracketSizes[kNumOfSizeBrackets];
@@ -520,9 +522,7 @@
   }
   // Returns true if the given allocation size is for a thread local allocation.
   static bool IsSizeForThreadLocal(size_t size) {
-    DCHECK_GT(kNumThreadLocalSizeBrackets, 0U);
-    size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1;
-    bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx];
+    bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
     DCHECK(size > kLargeSizeThreshold ||
            (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
     return is_size_for_thread_local;
@@ -634,6 +634,16 @@
   // are less than this index. We use shared (current) runs for the rest.
   static const size_t kNumThreadLocalSizeBrackets = 8;
 
+  // The size of the largest bracket we use thread-local runs for.
+  // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
+  static const size_t kMaxThreadLocalBracketSize = 128;
+
+  // The bracket size increment for the brackets of size <= 512 bytes.
+  static constexpr size_t kBracketQuantumSize = 16;
+
+  // Equal to Log2(kQuantumBracketSizeIncrement).
+  static constexpr size_t kBracketQuantumSizeShift = 4;
+
  private:
   // The base address of the memory region that's managed by this allocator.
   uint8_t* base_;
@@ -770,6 +780,19 @@
            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
   ~RosAlloc();
 
+  static size_t RunFreeListOffset() {
+    return OFFSETOF_MEMBER(Run, free_list_);
+  }
+  static size_t RunFreeListHeadOffset() {
+    return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
+  }
+  static size_t RunFreeListSizeOffset() {
+    return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
+  }
+  static size_t RunSlotNextOffset() {
+    return OFFSETOF_MEMBER(Slot, next_);
+  }
+
   // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
   // If used, this may cause race conditions if multiple threads are allocating at the same time.
   template<bool kThreadSafe = true>
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
new file mode 100644
index 0000000..26f5ad3
--- /dev/null
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
+#define ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
+
+#include "concurrent_copying.h"
+
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/space/region_space.h"
+#include "lock_word.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
+  if (from_ref == nullptr) {
+    return nullptr;
+  }
+  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (UNLIKELY(kUseBakerReadBarrier && !is_active_)) {
+    // In the lock word forward address state, the read barrier bits
+    // in the lock word are part of the stored forwarding address and
+    // invalid. This is usually OK as the from-space copy of objects
+    // aren't accessed by mutators due to the to-space
+    // invariant. However, during the dex2oat image writing relocation
+    // and the zygote compaction, objects can be in the forward
+    // address state (to store the forward/relocation addresses) and
+    // they can still be accessed and the invalid read barrier bits
+    // are consulted. If they look like gray but aren't really, the
+    // read barriers slow path can trigger when it shouldn't. To guard
+    // against this, return here if the CC collector isn't running.
+    return from_ref;
+  }
+  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
+  space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
+  switch (rtype) {
+    case space::RegionSpace::RegionType::kRegionTypeToSpace:
+      // It's already marked.
+      return from_ref;
+    case space::RegionSpace::RegionType::kRegionTypeFromSpace: {
+      mirror::Object* to_ref = GetFwdPtr(from_ref);
+      if (kUseBakerReadBarrier) {
+        DCHECK_NE(to_ref, ReadBarrier::GrayPtr())
+            << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      }
+      if (to_ref == nullptr) {
+        // It isn't marked yet. Mark it by copying it to the to-space.
+        to_ref = Copy(from_ref);
+      }
+      DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
+          << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      return to_ref;
+    }
+    case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
+      // This may or may not succeed, which is ok.
+      if (kUseBakerReadBarrier) {
+        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+      }
+      mirror::Object* to_ref = from_ref;
+      if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
+        // Already marked.
+      } else {
+        // Newly marked.
+        if (kUseBakerReadBarrier) {
+          DCHECK_EQ(to_ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+        }
+        PushOntoMarkStack(to_ref);
+      }
+      return to_ref;
+    }
+    case space::RegionSpace::RegionType::kRegionTypeNone:
+      return MarkNonMoving(from_ref);
+    default:
+      UNREACHABLE();
+  }
+}
+
+inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
+  DCHECK(region_space_->IsInFromSpace(from_ref));
+  LockWord lw = from_ref->GetLockWord(false);
+  if (lw.GetState() == LockWord::kForwardingAddress) {
+    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
+    DCHECK(fwd_ptr != nullptr);
+    return fwd_ptr;
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 0a7a69f..4a49712 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "base/stl_util.h"
+#include "debugger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/reference_processor.h"
@@ -35,13 +36,16 @@
 namespace gc {
 namespace collector {
 
+static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
+
 ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
     : GarbageCollector(heap,
                        name_prefix + (name_prefix.empty() ? "" : " ") +
                        "concurrent copying + mark sweep"),
       region_space_(nullptr), gc_barrier_(new Barrier(0)),
       gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
-                                                     2 * MB, 2 * MB)),
+                                                     kDefaultGcMarkStackSize,
+                                                     kDefaultGcMarkStackSize)),
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
@@ -335,9 +339,7 @@
         << thread->GetState() << " thread " << thread << " self " << self;
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      concurrent_copying_->GetBarrier().Pass(self);
-    }
+    concurrent_copying_->GetBarrier().Pass(self);
   }
 
  private:
@@ -366,26 +368,15 @@
       }
     }
   }
-  // TODO: Other garbage collectors uses Runtime::VisitConcurrentRoots(), refactor this part
-  // to also use the same function.
   {
-    TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
-    Runtime::Current()->VisitConstantRoots(this);
-  }
-  {
-    TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
-    Runtime::Current()->GetInternTable()->VisitRoots(this, kVisitRootFlagAllRoots);
-  }
-  {
-    TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
-    Runtime::Current()->GetClassLinker()->VisitRoots(this, kVisitRootFlagAllRoots);
+    TimingLogger::ScopedTiming split2("VisitConcurrentRoots", GetTimings());
+    Runtime::Current()->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
   }
   {
     // TODO: don't visit the transaction roots if it's not active.
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
     Runtime::Current()->VisitNonThreadRoots(this);
   }
-  Runtime::Current()->GetHeap()->VisitAllocationRecords(this);
 
   // Immune spaces.
   for (auto& space : heap_->GetContinuousSpaces()) {
@@ -401,7 +392,7 @@
 
   Thread* self = Thread::Current();
   {
-    TimingLogger::ScopedTiming split6("ProcessMarkStack", GetTimings());
+    TimingLogger::ScopedTiming split7("ProcessMarkStack", GetTimings());
     // We transition through three mark stack modes (thread-local, shared, GC-exclusive). The
     // primary reasons are the fact that we need to use a checkpoint to process thread-local mark
     // stacks, but after we disable weak refs accesses, we can't use a checkpoint due to a deadlock
@@ -506,9 +497,7 @@
     thread->SetIsGcMarking(false);
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      concurrent_copying_->GetBarrier().Pass(self);
-    }
+    concurrent_copying_->GetBarrier().Pass(self);
   }
 
  private:
@@ -572,17 +561,31 @@
   Locks::mutator_lock_->SharedLock(self);
 }
 
+void ConcurrentCopying::ExpandGcMarkStack() {
+  DCHECK(gc_mark_stack_->IsFull());
+  const size_t new_size = gc_mark_stack_->Capacity() * 2;
+  std::vector<StackReference<mirror::Object>> temp(gc_mark_stack_->Begin(),
+                                                   gc_mark_stack_->End());
+  gc_mark_stack_->Resize(new_size);
+  for (auto& ref : temp) {
+    gc_mark_stack_->PushBack(ref.AsMirrorPtr());
+  }
+  DCHECK(!gc_mark_stack_->IsFull());
+}
+
 void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) {
   CHECK_EQ(is_mark_stack_push_disallowed_.LoadRelaxed(), 0)
       << " " << to_ref << " " << PrettyTypeOf(to_ref);
   Thread* self = Thread::Current();  // TODO: pass self as an argument from call sites?
   CHECK(thread_running_gc_ != nullptr);
   MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
-  if (mark_stack_mode == kMarkStackModeThreadLocal) {
-    if (self == thread_running_gc_) {
+  if (LIKELY(mark_stack_mode == kMarkStackModeThreadLocal)) {
+    if (LIKELY(self == thread_running_gc_)) {
       // If GC-running thread, use the GC mark stack instead of a thread-local mark stack.
       CHECK(self->GetThreadLocalMarkStack() == nullptr);
-      CHECK(!gc_mark_stack_->IsFull());
+      if (UNLIKELY(gc_mark_stack_->IsFull())) {
+        ExpandGcMarkStack();
+      }
       gc_mark_stack_->PushBack(to_ref);
     } else {
       // Otherwise, use a thread-local mark stack.
@@ -616,7 +619,9 @@
   } else if (mark_stack_mode == kMarkStackModeShared) {
     // Access the shared GC mark stack with a lock.
     MutexLock mu(self, mark_stack_lock_);
-    CHECK(!gc_mark_stack_->IsFull());
+    if (UNLIKELY(gc_mark_stack_->IsFull())) {
+      ExpandGcMarkStack();
+    }
     gc_mark_stack_->PushBack(to_ref);
   } else {
     CHECK_EQ(static_cast<uint32_t>(mark_stack_mode),
@@ -628,7 +633,9 @@
         << "Only GC-running thread should access the mark stack "
         << "in the GC exclusive mark stack mode";
     // Access the GC mark stack without a lock.
-    CHECK(!gc_mark_stack_->IsFull());
+    if (UNLIKELY(gc_mark_stack_->IsFull())) {
+      ExpandGcMarkStack();
+    }
     gc_mark_stack_->PushBack(to_ref);
   }
 }
@@ -641,18 +648,6 @@
   return heap_->live_stack_.get();
 }
 
-inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
-  DCHECK(region_space_->IsInFromSpace(from_ref));
-  LockWord lw = from_ref->GetLockWord(false);
-  if (lw.GetState() == LockWord::kForwardingAddress) {
-    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
-    CHECK(fwd_ptr != nullptr);
-    return fwd_ptr;
-  } else {
-    return nullptr;
-  }
-}
-
 // The following visitors are that used to verify that there's no
 // references to the from-space left after marking.
 class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
@@ -911,9 +906,7 @@
     }
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      concurrent_copying_->GetBarrier().Pass(self);
-    }
+    concurrent_copying_->GetBarrier().Pass(self);
   }
 
  private:
@@ -1060,7 +1053,7 @@
   return count;
 }
 
-void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
+inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
@@ -1075,9 +1068,10 @@
         << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
         << " is_marked=" << IsMarked(to_ref);
   }
-  if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
-      to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
-      !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
+#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+  if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
+                to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
+                !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
     CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
@@ -1086,14 +1080,13 @@
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
     // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
     // here in this else block.
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
         // If to-space, change from gray to white.
         bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
                                                            ReadBarrier::WhitePtr());
         CHECK(success) << "Must succeed as we won the race.";
-        CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       } else {
         // If non-moving space/unevac from space, change from gray
         // to black. We can't change gray to white because it's not
@@ -1105,13 +1098,13 @@
         bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
                                                            ReadBarrier::BlackPtr());
         CHECK(success) << "Must succeed as we won the race.";
-        CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     }
-#else
-    DCHECK(!kUseBakerReadBarrier);
-#endif
   }
+#else
+  DCHECK(!kUseBakerReadBarrier);
+#endif
   if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
     ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
@@ -1602,6 +1595,7 @@
   }
 
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!root->IsNull()) {
       VisitRoot(root);
@@ -1609,6 +1603,7 @@
   }
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
     collector_->MarkRoot(root);
   }
@@ -1618,7 +1613,7 @@
 };
 
 // Scan ref fields of an object.
-void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   ConcurrentCopyingRefFieldsVisitor visitor(this);
   to_ref->VisitReferences(visitor, visitor);
@@ -1628,9 +1623,6 @@
 inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) {
   mirror::Object* ref = obj->GetFieldObject<
       mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
-  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-    return;
-  }
   mirror::Object* to_ref = Mark(ref);
   if (to_ref == ref) {
     return;
@@ -1644,19 +1636,16 @@
       // It was updated by the mutator.
       break;
     }
-  } while (!obj->CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier<
+  } while (!obj->CasFieldWeakRelaxedObjectWithoutWriteBarrier<
       false, false, kVerifyNone>(offset, expected_ref, new_ref));
 }
 
 // Process some roots.
-void ConcurrentCopying::VisitRoots(
+inline void ConcurrentCopying::VisitRoots(
     mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
     mirror::Object** root = roots[i];
     mirror::Object* ref = *root;
-    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-      continue;
-    }
     mirror::Object* to_ref = Mark(ref);
     if (to_ref == ref) {
       continue;
@@ -1669,16 +1658,13 @@
         // It was updated by the mutator.
         break;
       }
-    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    } while (!addr->CompareExchangeWeakRelaxed(expected_ref, new_ref));
   }
 }
 
-void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
+inline void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
   DCHECK(!root->IsNull());
   mirror::Object* const ref = root->AsMirrorPtr();
-  if (region_space_->IsInToSpace(ref)) {
-    return;
-  }
   mirror::Object* to_ref = Mark(ref);
   if (to_ref != ref) {
     auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
@@ -1690,11 +1676,11 @@
         // It was updated by the mutator.
         break;
       }
-    } while (!addr->CompareExchangeWeakSequentiallyConsistent(expected_ref, new_ref));
+    } while (!addr->CompareExchangeWeakRelaxed(expected_ref, new_ref));
   }
 }
 
-void ConcurrentCopying::VisitRoots(
+inline void ConcurrentCopying::VisitRoots(
     mirror::CompressedReference<mirror::Object>** roots, size_t count,
     const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
@@ -1993,148 +1979,85 @@
   return alloc_stack->Contains(ref);
 }
 
-mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
-  if (from_ref == nullptr) {
-    return nullptr;
-  }
-  DCHECK(from_ref != nullptr);
-  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
-  if (kUseBakerReadBarrier && !is_active_) {
-    // In the lock word forward address state, the read barrier bits
-    // in the lock word are part of the stored forwarding address and
-    // invalid. This is usually OK as the from-space copy of objects
-    // aren't accessed by mutators due to the to-space
-    // invariant. However, during the dex2oat image writing relocation
-    // and the zygote compaction, objects can be in the forward
-    // address state (to store the forward/relocation addresses) and
-    // they can still be accessed and the invalid read barrier bits
-    // are consulted. If they look like gray but aren't really, the
-    // read barriers slow path can trigger when it shouldn't. To guard
-    // against this, return here if the CC collector isn't running.
-    return from_ref;
-  }
-  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
-  space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
-  if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) {
-    // It's already marked.
-    return from_ref;
-  }
-  mirror::Object* to_ref;
-  if (rtype == space::RegionSpace::RegionType::kRegionTypeFromSpace) {
-    to_ref = GetFwdPtr(from_ref);
-    if (kUseBakerReadBarrier) {
-      DCHECK(to_ref != ReadBarrier::GrayPtr()) << "from_ref=" << from_ref << " to_ref=" << to_ref;
+mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
+  // ref is in a non-moving space (from_ref == to_ref).
+  DCHECK(!region_space_->HasAddress(ref)) << ref;
+  if (immune_region_.ContainsObject(ref)) {
+    accounting::ContinuousSpaceBitmap* cc_bitmap =
+        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
+    DCHECK(cc_bitmap != nullptr)
+        << "An immune space object must have a bitmap";
+    if (kIsDebugBuild) {
+      DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref))
+          << "Immune space object must be already marked";
     }
-    if (to_ref == nullptr) {
-      // It isn't marked yet. Mark it by copying it to the to-space.
-      to_ref = Copy(from_ref);
-    }
-    DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
-        << "from_ref=" << from_ref << " to_ref=" << to_ref;
-  } else if (rtype == space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace) {
     // This may or may not succeed, which is ok.
     if (kUseBakerReadBarrier) {
-      from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
     }
-    if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
+    if (cc_bitmap->AtomicTestAndSet(ref)) {
       // Already marked.
-      to_ref = from_ref;
     } else {
       // Newly marked.
-      to_ref = from_ref;
       if (kUseBakerReadBarrier) {
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
       }
-      PushOntoMarkStack(to_ref);
+      PushOntoMarkStack(ref);
     }
   } else {
-    // from_ref is in a non-moving space.
-    DCHECK(!region_space_->HasAddress(from_ref)) << from_ref;
-    if (immune_region_.ContainsObject(from_ref)) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      DCHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap";
-      if (kIsDebugBuild) {
-        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
-            << "Immune space object must be already marked";
-      }
-      // This may or may not succeed, which is ok.
+    // Use the mark bitmap.
+    accounting::ContinuousSpaceBitmap* mark_bitmap =
+        heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
+    accounting::LargeObjectBitmap* los_bitmap =
+        heap_mark_bitmap_->GetLargeObjectBitmap(ref);
+    CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+    bool is_los = mark_bitmap == nullptr;
+    if (!is_los && mark_bitmap->Test(ref)) {
+      // Already marked.
       if (kUseBakerReadBarrier) {
-        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
-      if (cc_bitmap->AtomicTestAndSet(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-      } else {
-        // Newly marked.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-        }
-        PushOntoMarkStack(to_ref);
+    } else if (is_los && los_bitmap->Test(ref)) {
+      // Already marked in LOS.
+      if (kUseBakerReadBarrier) {
+        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     } else {
-      // Use the mark bitmap.
-      accounting::ContinuousSpaceBitmap* mark_bitmap =
-          heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      accounting::LargeObjectBitmap* los_bitmap =
-          heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
-      CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
-      bool is_los = mark_bitmap == nullptr;
-      if (!is_los && mark_bitmap->Test(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+      // Not marked.
+      if (IsOnAllocStack(ref)) {
+        // If it's on the allocation stack, it's considered marked. Keep it white.
+        // Objects on the allocation stack need not be marked.
+        if (!is_los) {
+          DCHECK(!mark_bitmap->Test(ref));
+        } else {
+          DCHECK(!los_bitmap->Test(ref));
         }
-      } else if (is_los && los_bitmap->Test(from_ref)) {
-        // Already marked in LOS.
-        to_ref = from_ref;
         if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
         }
       } else {
-        // Not marked.
-        if (IsOnAllocStack(from_ref)) {
-          // If it's on the allocation stack, it's considered marked. Keep it white.
-          to_ref = from_ref;
-          // Objects on the allocation stack need not be marked.
-          if (!is_los) {
-            DCHECK(!mark_bitmap->Test(to_ref));
-          } else {
-            DCHECK(!los_bitmap->Test(to_ref));
-          }
-          if (kUseBakerReadBarrier) {
-            DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-          }
+        // Not marked or on the allocation stack. Try to mark it.
+        // This may or may not succeed, which is ok.
+        if (kUseBakerReadBarrier) {
+          ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+        }
+        if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
+          // Already marked.
+        } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
+          // Already marked in LOS.
         } else {
-          // Not marked or on the allocation stack. Try to mark it.
-          // This may or may not succeed, which is ok.
+          // Newly marked.
           if (kUseBakerReadBarrier) {
-            from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+            DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
           }
-          if (!is_los && mark_bitmap->AtomicTestAndSet(from_ref)) {
-            // Already marked.
-            to_ref = from_ref;
-          } else if (is_los && los_bitmap->AtomicTestAndSet(from_ref)) {
-            // Already marked in LOS.
-            to_ref = from_ref;
-          } else {
-            // Newly marked.
-            to_ref = from_ref;
-            if (kUseBakerReadBarrier) {
-              DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-            }
-            PushOntoMarkStack(to_ref);
-          }
+          PushOntoMarkStack(ref);
         }
       }
     }
   }
-  return to_ref;
+  return ref;
 }
 
 void ConcurrentCopying::FinishPhase() {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 8efad73..27726e2 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -93,7 +93,7 @@
     DCHECK(ref != nullptr);
     return IsMarked(ref) == ref;
   }
-  mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
   bool IsMarking() const {
     return is_marking_;
@@ -182,6 +182,9 @@
   void ReenableWeakRefAccess(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   void DisableMarking() SHARED_REQUIRES(Locks::mutator_lock_);
   void IssueDisableMarkingCheckpoint() SHARED_REQUIRES(Locks::mutator_lock_);
+  void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 77a288b..db516a0 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -1146,9 +1146,7 @@
     }
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      mark_sweep_->GetBarrier().Pass(self);
-    }
+    mark_sweep_->GetBarrier().Pass(self);
   }
 
  private:
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 6c32658..bb7e854 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -56,8 +56,7 @@
   RecursiveMarkDirtyObjects(false, accounting::CardTable::kCardDirty - 1);
 }
 
-void StickyMarkSweep::Sweep(bool swap_bitmaps) {
-  UNUSED(swap_bitmaps);
+void StickyMarkSweep::Sweep(bool swap_bitmaps ATTRIBUTE_UNUSED) {
   SweepArray(GetHeap()->GetLiveStack(), false);
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 657fcb5..ab93142 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -26,6 +26,7 @@
 
 #include "art_field-inl.h"
 #include "base/allocator.h"
+#include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/histogram-inl.h"
 #include "base/stl_util.h"
@@ -1258,11 +1259,11 @@
 }
 
 void Heap::Trim(Thread* self) {
+  Runtime* const runtime = Runtime::Current();
   if (!CareAboutPauseTimes()) {
     ATRACE_BEGIN("Deflating monitors");
     // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
     // about pauses.
-    Runtime* runtime = Runtime::Current();
     {
       ScopedSuspendAll ssa(__FUNCTION__);
       uint64_t start_time = NanoTime();
@@ -1274,6 +1275,10 @@
   }
   TrimIndirectReferenceTables(self);
   TrimSpaces(self);
+  // Trim arenas that may have been used by JIT or verifier.
+  ATRACE_BEGIN("Trimming arena maps");
+  runtime->GetArenaPool()->TrimMaps();
+  ATRACE_END();
 }
 
 class TrimIndirectReferenceTableClosure : public Closure {
@@ -1286,9 +1291,7 @@
     ATRACE_END();
     // If thread is a running mutator, then act on behalf of the trim thread.
     // See the code in ThreadList::RunCheckpoint.
-    if (thread->GetState() == kRunnable) {
-      barrier_->Pass(Thread::Current());
-    }
+    barrier_->Pass(Thread::Current());
   }
 
  private:
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1923d24..ce64b10 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -789,10 +789,13 @@
 
   CHECK(image_header.GetOatDataBegin() != nullptr);
 
-  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, image_header.GetOatDataBegin(),
+  OatFile* oat_file = OatFile::Open(oat_filename,
+                                    oat_filename,
+                                    image_header.GetOatDataBegin(),
                                     image_header.GetOatFileBegin(),
                                     !Runtime::Current()->IsAotCompiler(),
-                                    nullptr, error_msg);
+                                    nullptr,
+                                    error_msg);
   if (oat_file == nullptr) {
     *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
                               oat_filename.c_str(), GetName(), error_msg->c_str());
@@ -839,15 +842,13 @@
   return true;
 }
 
-
 const OatFile* ImageSpace::GetOatFile() const {
   return oat_file_non_owned_;
 }
 
-
-OatFile* ImageSpace::ReleaseOatFile() {
-  CHECK(oat_file_.get() != nullptr);
-  return oat_file_.release();
+std::unique_ptr<const OatFile> ImageSpace::ReleaseOatFile() {
+  CHECK(oat_file_ != nullptr);
+  return std::move(oat_file_);
 }
 
 void ImageSpace::Dump(std::ostream& os) const {
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 215c18b..9920742 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -62,9 +62,8 @@
   const OatFile* GetOatFile() const;
 
   // Releases the OatFile from the ImageSpace so it can be transfer to
-  // the caller, presumably the ClassLinker.
-  OatFile* ReleaseOatFile()
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  // the caller, presumably the OatFileManager.
+  std::unique_ptr<const OatFile> ReleaseOatFile();
 
   void VerifyImageAllocations()
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 3a0d814..b1572cc 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -56,7 +56,7 @@
     mark_bitmap_.reset(accounting::ContinuousSpaceBitmap::Create(
         StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
         Begin(), NonGrowthLimitCapacity()));
-    CHECK(live_bitmap_.get() != nullptr) << "could not create allocspace mark bitmap #"
+    CHECK(mark_bitmap_.get() != nullptr) << "could not create allocspace mark bitmap #"
         << bitmap_index;
   }
   for (auto& freed : recent_freed_objects_) {
diff --git a/runtime/gc/space/memory_tool_malloc_space.h b/runtime/gc/space/memory_tool_malloc_space.h
index a5dbad9..c081011 100644
--- a/runtime/gc/space/memory_tool_malloc_space.h
+++ b/runtime/gc/space/memory_tool_malloc_space.h
@@ -48,9 +48,7 @@
   size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void RegisterRecentFree(mirror::Object* ptr) OVERRIDE {
-    UNUSED(ptr);
-  }
+  void RegisterRecentFree(mirror::Object* ptr ATTRIBUTE_UNUSED) OVERRIDE {}
 
   size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE;
 
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index 9e882a8..bbfcb31 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -31,8 +31,7 @@
   explicit CountObjectsAllocated(size_t* objects_allocated)
       : objects_allocated_(objects_allocated) {}
 
-  void operator()(mirror::Object* obj) const {
-    UNUSED(obj);
+  void operator()(mirror::Object* obj ATTRIBUTE_UNUSED) const {
     ++*objects_allocated_;
   }
 
diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc
index 2c44da2..f1d26d9 100644
--- a/runtime/gc/task_processor_test.cc
+++ b/runtime/gc/task_processor_test.cc
@@ -105,8 +105,7 @@
   TestOrderTask(uint64_t expected_time, size_t expected_counter, size_t* counter)
      : HeapTask(expected_time), expected_counter_(expected_counter), counter_(counter) {
   }
-  virtual void Run(Thread* thread) OVERRIDE {
-    UNUSED(thread);  // Fix cppling bug.
+  virtual void Run(Thread* thread ATTRIBUTE_UNUSED) OVERRIDE {
     ASSERT_EQ(*counter_, expected_counter_);
     ++*counter_;
   }
diff --git a/runtime/image.cc b/runtime/image.cc
index 42b348a..1bc19ff 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '1', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '2', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
@@ -150,10 +150,10 @@
 void ImageSection::VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const {
   for (size_t pos = 0; pos < Size(); ) {
     auto* array = reinterpret_cast<LengthPrefixedArray<ArtField>*>(base + Offset() + pos);
-    for (size_t i = 0; i < array->Length(); ++i) {
+    for (size_t i = 0; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, sizeof(ArtField)));
     }
-    pos += array->ComputeSize(array->Length());
+    pos += array->ComputeSize(array->size());
   }
 }
 
@@ -164,10 +164,10 @@
   const size_t method_size = ArtMethod::Size(pointer_size);
   for (size_t pos = 0; pos < Size(); ) {
     auto* array = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(base + Offset() + pos);
-    for (size_t i = 0; i < array->Length(); ++i) {
+    for (size_t i = 0; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, method_size, method_alignment));
     }
-    pos += array->ComputeSize(array->Length(), method_size, method_alignment);
+    pos += array->ComputeSize(array->size(), method_size, method_alignment);
   }
 }
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index deada4c..e937397 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -36,6 +36,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
+#include "oat_quick_method_header.h"
 #include "thread.h"
 #include "thread_list.h"
 
@@ -96,16 +97,6 @@
 
 static void UpdateEntrypoints(ArtMethod* method, const void* quick_code)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  Runtime* const runtime = Runtime::Current();
-  jit::Jit* jit = runtime->GetJit();
-  if (jit != nullptr) {
-    const void* old_code_ptr = method->GetEntryPointFromQuickCompiledCode();
-    jit::JitCodeCache* code_cache = jit->GetCodeCache();
-    if (code_cache->ContainsCodePtr(old_code_ptr)) {
-      // Save the old compiled code since we need it to implement ClassLinker::GetQuickOatCodeFor.
-      code_cache->SaveCompiledCode(method, old_code_ptr);
-    }
-  }
   method->SetEntryPointFromQuickCompiledCode(quick_code);
 }
 
@@ -251,7 +242,9 @@
         instrumentation_stack_->insert(it, instrumentation_frame);
         SetReturnPc(instrumentation_exit_pc_);
       }
-      dex_pcs_.push_back(m->ToDexPc(last_return_pc_));
+      dex_pcs_.push_back((GetCurrentOatQuickMethodHeader() == nullptr)
+          ? DexFile::kDexNoIndex
+          : GetCurrentOatQuickMethodHeader()->ToDexPc(m, last_return_pc_));
       last_return_pc_ = return_pc;
       ++instrumentation_stack_depth_;
       return true;  // Continue.
@@ -394,146 +387,151 @@
   return (events & expected) != 0;
 }
 
+static void PotentiallyAddListenerTo(Instrumentation::InstrumentationEvent event,
+                                     uint32_t events,
+                                     std::list<InstrumentationListener*>& list,
+                                     InstrumentationListener* listener,
+                                     bool* has_listener)
+    REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  if (!HasEvent(event, events)) {
+    return;
+  }
+  // If there is a free slot in the list, we insert the listener in that slot.
+  // Otherwise we add it to the end of the list.
+  auto it = std::find(list.begin(), list.end(), nullptr);
+  if (it != list.end()) {
+    *it = listener;
+  } else {
+    list.push_back(listener);
+  }
+  *has_listener = true;
+}
+
 void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  if (HasEvent(kMethodEntered, events)) {
-    method_entry_listeners_.push_back(listener);
-    have_method_entry_listeners_ = true;
-  }
-  if (HasEvent(kMethodExited, events)) {
-    method_exit_listeners_.push_back(listener);
-    have_method_exit_listeners_ = true;
-  }
-  if (HasEvent(kMethodUnwind, events)) {
-    method_unwind_listeners_.push_back(listener);
-    have_method_unwind_listeners_ = true;
-  }
-  if (HasEvent(kBackwardBranch, events)) {
-    backward_branch_listeners_.push_back(listener);
-    have_backward_branch_listeners_ = true;
-  }
-  if (HasEvent(kInvokeVirtualOrInterface, events)) {
-    invoke_virtual_or_interface_listeners_.push_back(listener);
-    have_invoke_virtual_or_interface_listeners_ = true;
-  }
-  if (HasEvent(kDexPcMoved, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_dex_pc_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    dex_pc_listeners_.reset(modified);
-    have_dex_pc_listeners_ = true;
-  }
-  if (HasEvent(kFieldRead, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_field_read_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    field_read_listeners_.reset(modified);
-    have_field_read_listeners_ = true;
-  }
-  if (HasEvent(kFieldWritten, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_field_write_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    field_write_listeners_.reset(modified);
-    have_field_write_listeners_ = true;
-  }
-  if (HasEvent(kExceptionCaught, events)) {
-    std::list<InstrumentationListener*>* modified;
-    if (have_exception_caught_listeners_) {
-      modified = new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-    } else {
-      modified = new std::list<InstrumentationListener*>();
-    }
-    modified->push_back(listener);
-    exception_caught_listeners_.reset(modified);
-    have_exception_caught_listeners_ = true;
-  }
+  PotentiallyAddListenerTo(kMethodEntered,
+                           events,
+                           method_entry_listeners_,
+                           listener,
+                           &have_method_entry_listeners_);
+  PotentiallyAddListenerTo(kMethodExited,
+                           events,
+                           method_exit_listeners_,
+                           listener,
+                           &have_method_exit_listeners_);
+  PotentiallyAddListenerTo(kMethodUnwind,
+                           events,
+                           method_unwind_listeners_,
+                           listener,
+                           &have_method_unwind_listeners_);
+  PotentiallyAddListenerTo(kBackwardBranch,
+                           events,
+                           backward_branch_listeners_,
+                           listener,
+                           &have_backward_branch_listeners_);
+  PotentiallyAddListenerTo(kInvokeVirtualOrInterface,
+                           events,
+                           invoke_virtual_or_interface_listeners_,
+                           listener,
+                           &have_invoke_virtual_or_interface_listeners_);
+  PotentiallyAddListenerTo(kDexPcMoved,
+                           events,
+                           dex_pc_listeners_,
+                           listener,
+                           &have_dex_pc_listeners_);
+  PotentiallyAddListenerTo(kFieldRead,
+                           events,
+                           field_read_listeners_,
+                           listener,
+                           &have_field_read_listeners_);
+  PotentiallyAddListenerTo(kFieldWritten,
+                           events,
+                           field_write_listeners_,
+                           listener,
+                           &have_field_write_listeners_);
+  PotentiallyAddListenerTo(kExceptionCaught,
+                           events,
+                           exception_caught_listeners_,
+                           listener,
+                           &have_exception_caught_listeners_);
   UpdateInterpreterHandlerTable();
 }
 
+static void PotentiallyRemoveListenerFrom(Instrumentation::InstrumentationEvent event,
+                                          uint32_t events,
+                                          std::list<InstrumentationListener*>& list,
+                                          InstrumentationListener* listener,
+                                          bool* has_listener)
+    REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  if (!HasEvent(event, events)) {
+    return;
+  }
+  auto it = std::find(list.begin(), list.end(), listener);
+  if (it != list.end()) {
+    // Just update the entry, do not remove from the list. Removing entries in the list
+    // is unsafe when mutators are iterating over it.
+    *it = nullptr;
+  }
+
+  // Check if the list contains any non-null listener, and update 'has_listener'.
+  for (InstrumentationListener* l : list) {
+    if (l != nullptr) {
+      *has_listener = true;
+      return;
+    }
+  }
+  *has_listener = false;
+}
+
 void Instrumentation::RemoveListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-
-  if (HasEvent(kMethodEntered, events) && have_method_entry_listeners_) {
-    method_entry_listeners_.remove(listener);
-    have_method_entry_listeners_ = !method_entry_listeners_.empty();
-  }
-  if (HasEvent(kMethodExited, events) && have_method_exit_listeners_) {
-    method_exit_listeners_.remove(listener);
-    have_method_exit_listeners_ = !method_exit_listeners_.empty();
-  }
-  if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
-    method_unwind_listeners_.remove(listener);
-    have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
-  }
-  if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
-    backward_branch_listeners_.remove(listener);
-    have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
-  }
-  if (HasEvent(kInvokeVirtualOrInterface, events) && have_invoke_virtual_or_interface_listeners_) {
-    invoke_virtual_or_interface_listeners_.remove(listener);
-    have_invoke_virtual_or_interface_listeners_ = !invoke_virtual_or_interface_listeners_.empty();
-  }
-  if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-    modified->remove(listener);
-    have_dex_pc_listeners_ = !modified->empty();
-    if (have_dex_pc_listeners_) {
-      dex_pc_listeners_.reset(modified);
-    } else {
-      dex_pc_listeners_.reset();
-      delete modified;
-    }
-  }
-  if (HasEvent(kFieldRead, events) && have_field_read_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-    modified->remove(listener);
-    have_field_read_listeners_ = !modified->empty();
-    if (have_field_read_listeners_) {
-      field_read_listeners_.reset(modified);
-    } else {
-      field_read_listeners_.reset();
-      delete modified;
-    }
-  }
-  if (HasEvent(kFieldWritten, events) && have_field_write_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-    modified->remove(listener);
-    have_field_write_listeners_ = !modified->empty();
-    if (have_field_write_listeners_) {
-      field_write_listeners_.reset(modified);
-    } else {
-      field_write_listeners_.reset();
-      delete modified;
-    }
-  }
-  if (HasEvent(kExceptionCaught, events) && have_exception_caught_listeners_) {
-    std::list<InstrumentationListener*>* modified =
-        new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-    modified->remove(listener);
-    have_exception_caught_listeners_ = !modified->empty();
-    if (have_exception_caught_listeners_) {
-      exception_caught_listeners_.reset(modified);
-    } else {
-      exception_caught_listeners_.reset();
-      delete modified;
-    }
-  }
+  PotentiallyRemoveListenerFrom(kMethodEntered,
+                                events,
+                                method_entry_listeners_,
+                                listener,
+                                &have_method_entry_listeners_);
+  PotentiallyRemoveListenerFrom(kMethodExited,
+                                events,
+                                method_exit_listeners_,
+                                listener,
+                                &have_method_exit_listeners_);
+  PotentiallyRemoveListenerFrom(kMethodUnwind,
+                                events,
+                                method_unwind_listeners_,
+                                listener,
+                                &have_method_unwind_listeners_);
+  PotentiallyRemoveListenerFrom(kBackwardBranch,
+                                events,
+                                backward_branch_listeners_,
+                                listener,
+                                &have_backward_branch_listeners_);
+  PotentiallyRemoveListenerFrom(kInvokeVirtualOrInterface,
+                                events,
+                                invoke_virtual_or_interface_listeners_,
+                                listener,
+                                &have_invoke_virtual_or_interface_listeners_);
+  PotentiallyRemoveListenerFrom(kDexPcMoved,
+                                events,
+                                dex_pc_listeners_,
+                                listener,
+                                &have_dex_pc_listeners_);
+  PotentiallyRemoveListenerFrom(kFieldRead,
+                                events,
+                                field_read_listeners_,
+                                listener,
+                                &have_field_read_listeners_);
+  PotentiallyRemoveListenerFrom(kFieldWritten,
+                                events,
+                                field_write_listeners_,
+                                listener,
+                                &have_field_write_listeners_);
+  PotentiallyRemoveListenerFrom(kExceptionCaught,
+                                events,
+                                exception_caught_listeners_,
+                                listener,
+                                &have_exception_caught_listeners_);
   UpdateInterpreterHandlerTable();
 }
 
@@ -868,28 +866,24 @@
 void Instrumentation::MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
                                            ArtMethod* method,
                                            uint32_t dex_pc) const {
-  auto it = method_entry_listeners_.begin();
-  bool is_end = (it == method_entry_listeners_.end());
-  // Implemented this way to prevent problems caused by modification of the list while iterating.
-  while (!is_end) {
-    InstrumentationListener* cur = *it;
-    ++it;
-    is_end = (it == method_entry_listeners_.end());
-    cur->MethodEntered(thread, this_object, method, dex_pc);
+  if (HasMethodEntryListeners()) {
+    for (InstrumentationListener* listener : method_entry_listeners_) {
+      if (listener != nullptr) {
+        listener->MethodEntered(thread, this_object, method, dex_pc);
+      }
+    }
   }
 }
 
 void Instrumentation::MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
                                           ArtMethod* method,
                                           uint32_t dex_pc, const JValue& return_value) const {
-  auto it = method_exit_listeners_.begin();
-  bool is_end = (it == method_exit_listeners_.end());
-  // Implemented this way to prevent problems caused by modification of the list while iterating.
-  while (!is_end) {
-    InstrumentationListener* cur = *it;
-    ++it;
-    is_end = (it == method_exit_listeners_.end());
-    cur->MethodExited(thread, this_object, method, dex_pc, return_value);
+  if (HasMethodExitListeners()) {
+    for (InstrumentationListener* listener : method_exit_listeners_) {
+      if (listener != nullptr) {
+        listener->MethodExited(thread, this_object, method, dex_pc, return_value);
+      }
+    }
   }
 }
 
@@ -898,7 +892,9 @@
                                         uint32_t dex_pc) const {
   if (HasMethodUnwindListeners()) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
-      listener->MethodUnwind(thread, this_object, method, dex_pc);
+      if (listener != nullptr) {
+        listener->MethodUnwind(thread, this_object, method, dex_pc);
+      }
     }
   }
 }
@@ -906,16 +902,19 @@
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                                           ArtMethod* method,
                                           uint32_t dex_pc) const {
-  std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
-  for (InstrumentationListener* listener : *original.get()) {
-    listener->DexPcMoved(thread, this_object, method, dex_pc);
+  for (InstrumentationListener* listener : dex_pc_listeners_) {
+    if (listener != nullptr) {
+      listener->DexPcMoved(thread, this_object, method, dex_pc);
+    }
   }
 }
 
 void Instrumentation::BackwardBranchImpl(Thread* thread, ArtMethod* method,
                                          int32_t offset) const {
   for (InstrumentationListener* listener : backward_branch_listeners_) {
-    listener->BackwardBranch(thread, method, offset);
+    if (listener != nullptr) {
+      listener->BackwardBranch(thread, method, offset);
+    }
   }
 }
 
@@ -925,25 +924,29 @@
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
-    listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+    if (listener != nullptr) {
+      listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+    }
   }
 }
 
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
-  std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
-  for (InstrumentationListener* listener : *original.get()) {
-    listener->FieldRead(thread, this_object, method, dex_pc, field);
+  for (InstrumentationListener* listener : field_read_listeners_) {
+    if (listener != nullptr) {
+      listener->FieldRead(thread, this_object, method, dex_pc, field);
+    }
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field, const JValue& field_value) const {
-  std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
-  for (InstrumentationListener* listener : *original.get()) {
-    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+  for (InstrumentationListener* listener : field_write_listeners_) {
+    if (listener != nullptr) {
+      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+    }
   }
 }
 
@@ -952,14 +955,24 @@
   if (HasExceptionCaughtListeners()) {
     DCHECK_EQ(thread->GetException(), exception_object);
     thread->ClearException();
-    std::shared_ptr<std::list<InstrumentationListener*>> original(exception_caught_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->ExceptionCaught(thread, exception_object);
+    for (InstrumentationListener* listener : exception_caught_listeners_) {
+      if (listener != nullptr) {
+        listener->ExceptionCaught(thread, exception_object);
+      }
     }
     thread->SetException(exception_object);
   }
 }
 
+// Computes a frame ID by ignoring inlined frames.
+size_t Instrumentation::ComputeFrameId(Thread* self,
+                                       size_t frame_depth,
+                                       size_t inlined_frames_before_frame) {
+  CHECK_GE(frame_depth, inlined_frames_before_frame);
+  size_t no_inline_depth = frame_depth - inlined_frames_before_frame;
+  return StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk) - no_inline_depth;
+}
+
 static void CheckStackDepth(Thread* self, const InstrumentationStackFrame& instrumentation_frame,
                             int delta)
     SHARED_REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 612ca14..726cf1b 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -397,6 +397,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::thread_list_lock_);
 
+  static size_t ComputeFrameId(Thread* self,
+                               size_t frame_depth,
+                               size_t inlined_frames_before_frame)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   InstrumentationLevel GetCurrentInstrumentationLevel() const;
 
@@ -512,20 +517,25 @@
   InstrumentationLevelTable requested_instrumentation_levels_ GUARDED_BY(Locks::mutator_lock_);
 
   // The event listeners, written to with the mutator_lock_ exclusively held.
+  // Mutators must be able to iterate over these lists concurrently, that is, with listeners being
+  // added or removed while iterating. The modifying thread holds exclusive lock,
+  // so other threads cannot iterate (i.e. read the data of the list) at the same time but they
+  // do keep iterators that need to remain valid. This is the reason these listeners are std::list
+  // and not for example std::vector: the existing storage for a std::list does not move.
+  // Note that mutators cannot make a copy of these lists before iterating, as the instrumentation
+  // listeners can also be deleted concurrently.
+  // As a result, these lists are never trimmed. That's acceptable given the low number of
+  // listeners we have.
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
       GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> dex_pc_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> field_read_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> field_write_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
-  std::shared_ptr<std::list<InstrumentationListener*>> exception_caught_listeners_
-      GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_read_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_write_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // The set of methods being deoptimized (by the debugger) which must be executed with interpreter
   // only.
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 179353e..f4658d5 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -459,4 +459,12 @@
   }
 }
 
+InternTable::Table::Table() {
+  Runtime* const runtime = Runtime::Current();
+  pre_zygote_table_.SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
+                                  runtime->GetHashTableMaxLoadFactor());
+  post_zygote_table_.SetLoadFactor(runtime->GetHashTableMinLoadFactor(),
+                                   runtime->GetHashTableMaxLoadFactor());
+}
+
 }  // namespace art
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 24c5af9..3a4e8d8 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -146,6 +146,7 @@
   // weak interns and strong interns.
   class Table {
    public:
+    Table();
     mirror::String* Find(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
     void Insert(mirror::String* s) SHARED_REQUIRES(Locks::mutator_lock_)
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index f783b04..7c0594a 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "interpreter_common.h"
+#include "interpreter.h"
 
 #include <limits>
 
+#include "interpreter_common.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -332,7 +333,7 @@
   // Set up shadow frame with matching number of reference slots to vregs.
   ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame();
   ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0);
+      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, /* dex pc */ 0);
   ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
   self->PushShadowFrame(shadow_frame);
 
@@ -448,8 +449,8 @@
   return Execute(self, code_item, *shadow_frame, JValue());
 }
 
-extern "C" void artInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                  ShadowFrame* shadow_frame, JValue* result) {
+void ArtInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
+                                       ShadowFrame* shadow_frame, JValue* result) {
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
     ThrowStackOverflowError(self);
@@ -457,10 +458,11 @@
   }
 
   self->PushShadowFrame(shadow_frame);
+  ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
-  const bool is_static = shadow_frame->GetMethod()->IsStatic();
+  const bool is_static = method->IsStatic();
   if (is_static) {
-    mirror::Class* declaring_class = shadow_frame->GetMethod()->GetDeclaringClass();
+    mirror::Class* declaring_class = method->GetDeclaringClass();
     if (UNLIKELY(!declaring_class->IsInitialized())) {
       StackHandleScope<1> hs(self);
       HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 61140a2..b21ea84 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -45,17 +45,12 @@
                                              ShadowFrame* shadow_frame)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+void ArtInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
+                                       ShadowFrame* shadow_frame, JValue* result)
+    SHARED_REQUIRES(Locks::mutator_lock_);
 
 }  // namespace interpreter
 
-extern "C" void artInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                  ShadowFrame* shadow_frame, JValue* result)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                   ShadowFrame* shadow_frame, JValue* result)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_H_
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index ad34c9a..18fb0d8 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -40,8 +40,9 @@
                 uint16_t inst_data) {
   const bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
   const uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                              Primitive::ComponentSize(field_type));
+  ArtField* f =
+      FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                    Primitive::ComponentSize(field_type));
   if (UNLIKELY(f == nullptr)) {
     CHECK(self->IsExceptionPending());
     return false;
@@ -234,8 +235,9 @@
   bool do_assignability_check = do_access_check;
   bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
   uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f = FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                              Primitive::ComponentSize(field_type));
+  ArtField* f =
+      FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                    Primitive::ComponentSize(field_type));
   if (UNLIKELY(f == nullptr)) {
     CHECK(self->IsExceptionPending());
     return false;
@@ -516,6 +518,39 @@
         Dbg::IsForcedInterpreterNeededForCalling(self, target);
 }
 
+static void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                               const DexFile::CodeItem* code_item,
+                                               ShadowFrame* shadow_frame,
+                                               JValue* result)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  // Ensure static methods are initialized.
+  if (method->IsStatic()) {
+    mirror::Class* declaringClass = method->GetDeclaringClass();
+    if (UNLIKELY(!declaringClass->IsInitialized())) {
+      self->PushShadowFrame(shadow_frame);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true,
+                                                                            true))) {
+        self->PopShadowFrame();
+        DCHECK(self->IsExceptionPending());
+        return;
+      }
+      self->PopShadowFrame();
+      CHECK(h_class->IsInitializing());
+      // Reload from shadow frame in case the method moved, this is faster than adding a handle.
+      method = shadow_frame->GetMethod();
+    }
+  }
+  uint16_t arg_offset = (code_item == nullptr)
+                            ? 0
+                            : code_item->registers_size_ - code_item->ins_size_;
+  method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
+                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
+                 result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
+}
+
 template <bool is_range,
           bool do_assignability_check,
           size_t kVarArgMax>
@@ -586,7 +621,7 @@
   // Allocate shadow frame on the stack.
   const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
   ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0);
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, /* dex pc */ 0);
   ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
 
   // Initialize new shadow frame by copying the registers from the callee shadow frame.
@@ -690,9 +725,9 @@
   // Do the call now.
   if (LIKELY(Runtime::Current()->IsStarted())) {
     if (NeedsInterpreter(self, new_shadow_frame)) {
-      artInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
+      ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
     } else {
-      artInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
+      ArtInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
     }
   } else {
     UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);
@@ -742,7 +777,7 @@
 
 template<bool is_range, bool do_assignability_check>
 bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-                  const Instruction* inst, uint16_t inst_data, JValue* result) {
+                  const Instruction* inst, uint16_t inst_data ATTRIBUTE_UNUSED, JValue* result) {
   const uint4_t num_additional_registers = inst->VRegB_25x();
   // Argument word count.
   const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth;
@@ -757,7 +792,6 @@
     vregC = inst->VRegC_3rc();
   } else {
     // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
-    UNUSED(inst_data);
     inst->GetAllArgs25x(arg);
   }
 
@@ -773,7 +807,8 @@
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
   // Argument word count.
-  const uint16_t number_of_inputs = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+  const uint16_t number_of_inputs =
+      (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
 
   // TODO: find a cleaner way to separate non-range and range information without duplicating
   //       code.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 5db8cf7..c8650c4 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -45,6 +45,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "stack.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
@@ -79,12 +80,28 @@
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-static inline void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
-  ref->MonitorEnter(self);
+template <bool kMonitorCounting>
+static inline void DoMonitorEnter(Thread* self,
+                                  ShadowFrame* frame,
+                                  Object* ref)
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_) {
+  StackHandleScope<1> hs(self);
+  Handle<Object> h_ref(hs.NewHandle(ref));
+  h_ref->MonitorEnter(self);
+  frame->GetLockCountData().AddMonitor<kMonitorCounting>(self, h_ref.Get());
 }
 
-static inline void DoMonitorExit(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
-  ref->MonitorExit(self);
+template <bool kMonitorCounting>
+static inline void DoMonitorExit(Thread* self,
+                                 ShadowFrame* frame,
+                                 Object* ref)
+    NO_THREAD_SAFETY_ANALYSIS
+    REQUIRES(!Roles::uninterruptible_) {
+  StackHandleScope<1> hs(self);
+  Handle<Object> h_ref(hs.NewHandle(ref));
+  h_ref->MonitorExit(self);
+  frame->GetLockCountData().RemoveMonitorOrThrow<kMonitorCounting>(self, h_ref.Get());
 }
 
 void AbortTransactionF(Thread* self, const char* fmt, ...)
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9677d79..9766299 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -19,6 +19,7 @@
 
 
 #include "base/stl_util.h"  // MakeUnique
+#include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "safe_math.h"
 
@@ -83,12 +84,17 @@
 #define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
   HANDLE_INSTRUCTION_START(opcode);                                                               \
   DCHECK(inst->IsExperimental());                                                                 \
-  if (Runtime::Current()->AreExperimentalLambdasEnabled()) {
+  if (Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas)) {
 #define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
   } else {                                                                                        \
       UnexpectedOpcode(inst, shadow_frame);                                                       \
   } HANDLE_INSTRUCTION_END();
 
+#define HANDLE_MONITOR_CHECKS()                                                                   \
+  if (!shadow_frame.GetLockCountData().                                                           \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+    HANDLE_PENDING_EXCEPTION();                                                                   \
+  }
 
 /**
  * Interpreter based on computed goto tables.
@@ -275,6 +281,7 @@
   HANDLE_INSTRUCTION_START(RETURN_VOID_NO_BARRIER) {
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -289,6 +296,7 @@
     QuasiAtomic::ThreadFenceForConstructor();
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -304,6 +312,7 @@
     result.SetJ(0);
     result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -318,6 +327,7 @@
     JValue result;
     result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -331,6 +341,7 @@
   HANDLE_INSTRUCTION_START(RETURN_OBJECT) {
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     const uint8_t vreg_index = inst->VRegA_11x(inst_data);
     Object* obj_result = shadow_frame.GetVRegReference(vreg_index);
     if (do_assignability_check && obj_result != nullptr) {
@@ -468,7 +479,7 @@
       ThrowNullPointerExceptionFromInterpreter();
       HANDLE_PENDING_EXCEPTION();
     } else {
-      DoMonitorEnter(self, obj);
+      DoMonitorEnter<do_access_check>(self, &shadow_frame, obj);
       POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
     }
   }
@@ -480,7 +491,7 @@
       ThrowNullPointerExceptionFromInterpreter();
       HANDLE_PENDING_EXCEPTION();
     } else {
-      DoMonitorExit(self, obj);
+      DoMonitorExit<do_access_check>(self, &shadow_frame, obj);
       POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
     }
   }
@@ -2544,6 +2555,8 @@
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
+      // Structured locking is to be enforced for abnormal termination, too.
+      shadow_frame.GetLockCountData().CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);
       return JValue(); /* Handled in caller. */
     } else {
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 083dfb5..bf95a0e 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -15,6 +15,7 @@
  */
 
 #include "base/stl_util.h"  // MakeUnique
+#include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "safe_math.h"
 
@@ -31,6 +32,9 @@
                                                                   inst->GetDexPc(insns),        \
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
+      /* Structured locking is to be enforced for abnormal termination, too. */                 \
+      shadow_frame.GetLockCountData().                                                          \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
       return JValue(); /* Handled in caller. */                                                 \
     } else {                                                                                    \
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
@@ -47,6 +51,12 @@
     }                                                                             \
   } while (false)
 
+#define HANDLE_MONITOR_CHECKS()                                                                   \
+  if (!shadow_frame.GetLockCountData().                                                           \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+    HANDLE_PENDING_EXCEPTION();                                                                   \
+  }
+
 // Code to run before each dex instruction.
 #define PREAMBLE()                                                                              \
   do {                                                                                          \
@@ -58,7 +68,7 @@
 
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
-  return Runtime::Current()->AreExperimentalLambdasEnabled();
+  return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
 }
 
 template<bool do_access_check, bool transaction_active>
@@ -182,6 +192,7 @@
         PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -194,6 +205,7 @@
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -207,6 +219,7 @@
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -219,6 +232,7 @@
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -230,6 +244,7 @@
         PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
         Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != nullptr) {
@@ -366,7 +381,7 @@
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
         } else {
-          DoMonitorEnter(self, obj);
+          DoMonitorEnter<do_assignability_check>(self, &shadow_frame, obj);
           POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
         }
         break;
@@ -378,7 +393,7 @@
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
         } else {
-          DoMonitorExit(self, obj);
+          DoMonitorExit<do_assignability_check>(self, &shadow_frame, obj);
           POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
         }
         break;
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index c559389..92b6e4f 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1099,7 +1099,7 @@
     (*iter->second)(self, shadow_frame, result, arg_offset);
   } else {
     // Not special, continue with regular interpreter execution.
-    artInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
+    ArtInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
   }
 }
 
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 0a4d6e3..df6936b 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -1072,9 +1072,8 @@
   return WriteTaggedObject(reply, contended_monitor);
 }
 
-static JdwpError TR_Interrupt(JdwpState*, Request* request, ExpandBuf* reply)
+static JdwpError TR_Interrupt(JdwpState*, Request* request, ExpandBuf* reply ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  UNUSED(reply);
   ObjectId thread_id = request->ReadThreadId();
   return Dbg::Interrupt(thread_id);
 }
@@ -1172,6 +1171,13 @@
   return VM_AllClassesImpl(pReply, false, false);
 }
 
+// Delete function class to use std::unique_ptr with JdwpEvent.
+struct JdwpEventDeleter {
+  void operator()(JdwpEvent* event) {
+    EventFree(event);
+  }
+};
+
 /*
  * Set an event trigger.
  *
@@ -1185,7 +1191,7 @@
 
   CHECK_LT(modifier_count, 256);    /* reasonableness check */
 
-  JdwpEvent* pEvent = EventAlloc(modifier_count);
+  std::unique_ptr<JDWP::JdwpEvent, JdwpEventDeleter> pEvent(EventAlloc(modifier_count));
   pEvent->eventKind = event_kind;
   pEvent->suspend_policy = suspend_policy;
   pEvent->modCount = modifier_count;
@@ -1294,8 +1300,6 @@
       break;
     default:
       LOG(WARNING) << "Unsupported modifier " << mod.modKind << " for event " << pEvent->eventKind;
-      // Free allocated event to avoid leak before leaving.
-      EventFree(pEvent);
       return JDWP::ERR_NOT_IMPLEMENTED;
     }
   }
@@ -1311,13 +1315,14 @@
   VLOG(jdwp) << StringPrintf("    --> event requestId=%#x", requestId);
 
   /* add it to the list */
-  JdwpError err = state->RegisterEvent(pEvent);
+  JdwpError err = state->RegisterEvent(pEvent.get());
   if (err != ERR_NONE) {
     /* registration failed, probably because event is bogus */
-    EventFree(pEvent);
     LOG(WARNING) << "WARNING: event request rejected";
+    return err;
   }
-  return err;
+  pEvent.release();
+  return ERR_NONE;
 }
 
 static JdwpError ER_Clear(JdwpState* state, Request* request, ExpandBuf*)
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 0607493..5afd28e 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -49,7 +49,7 @@
 void Jit::DumpInfo(std::ostream& os) {
   os << "Code cache size=" << PrettySize(code_cache_->CodeCacheSize())
      << " data cache size=" << PrettySize(code_cache_->DataCacheSize())
-     << " num methods=" << code_cache_->NumMethods()
+     << " number of compiled code=" << code_cache_->NumberOfCompiledCode()
      << "\n";
   cumulative_timings_.Dump(os);
 }
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e73ba82..1f89f9b 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -43,7 +43,7 @@
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
-  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000;
+  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 500;
   static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
 
   virtual ~Jit();
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 4c53162..ce972ef 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,113 +19,518 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
+#include "gc/accounting/bitmap-inl.h"
+#include "jit/profiling_info.h"
+#include "linear_alloc.h"
 #include "mem_map.h"
 #include "oat_file-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
 
 namespace art {
 namespace jit {
 
+static constexpr int kProtAll = PROT_READ | PROT_WRITE | PROT_EXEC;
+static constexpr int kProtData = PROT_READ | PROT_WRITE;
+static constexpr int kProtCode = PROT_READ | PROT_EXEC;
+
+#define CHECKED_MPROTECT(memory, size, prot)                \
+  do {                                                      \
+    int rc = mprotect(memory, size, prot);                  \
+    if (UNLIKELY(rc != 0)) {                                \
+      errno = rc;                                           \
+      PLOG(FATAL) << "Failed to mprotect jit code cache";   \
+    }                                                       \
+  } while (false)                                           \
+
 JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) {
   CHECK_GT(capacity, 0U);
   CHECK_LT(capacity, kMaxCapacity);
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
-  MemMap* map = MemMap::MapAnonymous("jit-code-cache", nullptr, capacity,
-                                     PROT_READ | PROT_WRITE | PROT_EXEC, false, false, &error_str);
-  if (map == nullptr) {
+  MemMap* data_map = MemMap::MapAnonymous(
+    "data-code-cache", nullptr, capacity, kProtAll, false, false, &error_str);
+  if (data_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
     *error_msg = oss.str();
     return nullptr;
   }
-  return new JitCodeCache(map);
-}
 
-JitCodeCache::JitCodeCache(MemMap* mem_map)
-    : lock_("Jit code cache", kJitCodeCacheLock), num_methods_(0) {
-  VLOG(jit) << "Created jit code cache size=" << PrettySize(mem_map->Size());
-  mem_map_.reset(mem_map);
-  uint8_t* divider = mem_map->Begin() + RoundUp(mem_map->Size() / 4, kPageSize);
-  // Data cache is 1 / 4 of the map. TODO: Make this variable?
-  // Put data at the start.
-  data_cache_ptr_ = mem_map->Begin();
-  data_cache_end_ = divider;
-  data_cache_begin_ = data_cache_ptr_;
-  mprotect(data_cache_ptr_, data_cache_end_ - data_cache_begin_, PROT_READ | PROT_WRITE);
-  // Code cache after.
-  code_cache_begin_ = divider;
-  code_cache_ptr_ = divider;
-  code_cache_end_ = mem_map->End();
-}
+  // Data cache is 1 / 2 of the map.
+  // TODO: Make this variable?
+  size_t data_size = RoundUp(data_map->Size() / 2, kPageSize);
+  size_t code_size = data_map->Size() - data_size;
+  uint8_t* divider = data_map->Begin() + data_size;
 
-bool JitCodeCache::ContainsMethod(ArtMethod* method) const {
-  return ContainsCodePtr(method->GetEntryPointFromQuickCompiledCode());
-}
-
-bool JitCodeCache::ContainsCodePtr(const void* ptr) const {
-  return ptr >= code_cache_begin_ && ptr < code_cache_end_;
-}
-
-void JitCodeCache::FlushInstructionCache() {
-  UNIMPLEMENTED(FATAL);
-  // TODO: Investigate if we need to do this.
-  // __clear_cache(reinterpret_cast<char*>(code_cache_begin_), static_cast<int>(CodeCacheSize()));
-}
-
-uint8_t* JitCodeCache::ReserveCode(Thread* self, size_t size) {
-  MutexLock mu(self, lock_);
-  if (size > CodeCacheRemain()) {
+  // We need to have 32 bit offsets from method headers in code cache which point to things
+  // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
+  MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
+  if (code_map == nullptr) {
+    std::ostringstream oss;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
+    *error_msg = oss.str();
     return nullptr;
   }
-  ++num_methods_;  // TODO: This is hacky but works since each method has exactly one code region.
-  code_cache_ptr_ += size;
-  return code_cache_ptr_ - size;
+  DCHECK_EQ(code_map->Size(), code_size);
+  DCHECK_EQ(code_map->Begin(), divider);
+  return new JitCodeCache(code_map, data_map);
+}
+
+JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map)
+    : lock_("Jit code cache", kJitCodeCacheLock),
+      lock_cond_("Jit code cache variable", lock_),
+      collection_in_progress_(false),
+      code_map_(code_map),
+      data_map_(data_map) {
+
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_map_->Size(), false /*locked*/);
+  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_map_->Size(), false /*locked*/);
+
+  if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
+    PLOG(FATAL) << "create_mspace_with_base failed";
+  }
+
+  // Prevent morecore requests from the mspace.
+  mspace_set_footprint_limit(code_mspace_, code_map_->Size());
+  mspace_set_footprint_limit(data_mspace_, data_map_->Size());
+
+  CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
+  CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
+
+  live_bitmap_.reset(CodeCacheBitmap::Create("code-cache-bitmap",
+                                             reinterpret_cast<uintptr_t>(code_map_->Begin()),
+                                             reinterpret_cast<uintptr_t>(code_map_->End())));
+
+  if (live_bitmap_.get() == nullptr) {
+    PLOG(FATAL) << "creating bitmaps for the JIT code cache failed";
+  }
+
+  VLOG(jit) << "Created jit code cache: data size="
+            << PrettySize(data_map_->Size())
+            << ", code size="
+            << PrettySize(code_map_->Size());
+}
+
+bool JitCodeCache::ContainsPc(const void* ptr) const {
+  return code_map_->Begin() <= ptr && ptr < code_map_->End();
+}
+
+class ScopedCodeCacheWrite {
+ public:
+  explicit ScopedCodeCacheWrite(MemMap* code_map) : code_map_(code_map) {
+    CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtAll);
+  }
+  ~ScopedCodeCacheWrite() {
+    CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
+  }
+ private:
+  MemMap* const code_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedCodeCacheWrite);
+};
+
+uint8_t* JitCodeCache::CommitCode(Thread* self,
+                                  ArtMethod* method,
+                                  const uint8_t* mapping_table,
+                                  const uint8_t* vmap_table,
+                                  const uint8_t* gc_map,
+                                  size_t frame_size_in_bytes,
+                                  size_t core_spill_mask,
+                                  size_t fp_spill_mask,
+                                  const uint8_t* code,
+                                  size_t code_size) {
+  uint8_t* result = CommitCodeInternal(self,
+                                       method,
+                                       mapping_table,
+                                       vmap_table,
+                                       gc_map,
+                                       frame_size_in_bytes,
+                                       core_spill_mask,
+                                       fp_spill_mask,
+                                       code,
+                                       code_size);
+  if (result == nullptr) {
+    // Retry.
+    GarbageCollectCache(self);
+    result = CommitCodeInternal(self,
+                                method,
+                                mapping_table,
+                                vmap_table,
+                                gc_map,
+                                frame_size_in_bytes,
+                                core_spill_mask,
+                                fp_spill_mask,
+                                code,
+                                code_size);
+  }
+  return result;
+}
+
+bool JitCodeCache::WaitForPotentialCollectionToComplete(Thread* self) {
+  bool in_collection = false;
+  while (collection_in_progress_) {
+    in_collection = true;
+    lock_cond_.Wait(self);
+  }
+  return in_collection;
+}
+
+static uintptr_t FromCodeToAllocation(const void* code) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  return reinterpret_cast<uintptr_t>(code) - RoundUp(sizeof(OatQuickMethodHeader), alignment);
+}
+
+void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
+  uintptr_t allocation = FromCodeToAllocation(code_ptr);
+  const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+  const uint8_t* data = method_header->GetNativeGcMap();
+  if (data != nullptr) {
+    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
+  }
+  data = method_header->GetMappingTable();
+  if (data != nullptr) {
+    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
+  }
+  // Use the offset directly to prevent sanity check that the method is
+  // compiled with optimizing.
+  // TODO(ngeoffray): Clean up.
+  if (method_header->vmap_table_offset_ != 0) {
+    data = method_header->code_ - method_header->vmap_table_offset_;
+    mspace_free(data_mspace_, const_cast<uint8_t*>(data));
+  }
+  mspace_free(code_mspace_, reinterpret_cast<uint8_t*>(allocation));
+}
+
+void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
+  MutexLock mu(self, lock_);
+  // We do not check if a code cache GC is in progress, as this method comes
+  // with the classlinker_classes_lock_ held, and suspending ourselves could
+  // lead to a deadlock.
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->second)) {
+        FreeCode(it->first, it->second);
+        it = method_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+    ProfilingInfo* info = *it;
+    if (alloc.ContainsUnsafe(info->GetMethod())) {
+      info->GetMethod()->SetProfilingInfo(nullptr);
+      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+      it = profiling_infos_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
+                                          ArtMethod* method,
+                                          const uint8_t* mapping_table,
+                                          const uint8_t* vmap_table,
+                                          const uint8_t* gc_map,
+                                          size_t frame_size_in_bytes,
+                                          size_t core_spill_mask,
+                                          size_t fp_spill_mask,
+                                          const uint8_t* code,
+                                          size_t code_size) {
+  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+  // Ensure the header ends up at expected instruction alignment.
+  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+  size_t total_size = header_size + code_size;
+
+  OatQuickMethodHeader* method_header = nullptr;
+  uint8_t* code_ptr = nullptr;
+
+  ScopedThreadSuspension sts(self, kSuspended);
+  MutexLock mu(self, lock_);
+  WaitForPotentialCollectionToComplete(self);
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    uint8_t* result = reinterpret_cast<uint8_t*>(
+        mspace_memalign(code_mspace_, alignment, total_size));
+    if (result == nullptr) {
+      return nullptr;
+    }
+    code_ptr = result + header_size;
+    DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+
+    std::copy(code, code + code_size, code_ptr);
+    method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+    new (method_header) OatQuickMethodHeader(
+        (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
+        (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
+        (gc_map == nullptr) ? 0 : code_ptr - gc_map,
+        frame_size_in_bytes,
+        core_spill_mask,
+        fp_spill_mask,
+        code_size);
+  }
+
+  __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
+                          reinterpret_cast<char*>(code_ptr + code_size));
+  method_code_map_.Put(code_ptr, method);
+  // We have checked there was no collection in progress earlier. If we
+  // were, setting the entry point of a method would be unsafe, as the collection
+  // could delete it.
+  DCHECK(!collection_in_progress_);
+  method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
+  return reinterpret_cast<uint8_t*>(method_header);
+}
+
+size_t JitCodeCache::CodeCacheSize() {
+  MutexLock mu(Thread::Current(), lock_);
+  size_t bytes_allocated = 0;
+  mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
+}
+
+size_t JitCodeCache::DataCacheSize() {
+  MutexLock mu(Thread::Current(), lock_);
+  size_t bytes_allocated = 0;
+  mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
+}
+
+size_t JitCodeCache::NumberOfCompiledCode() {
+  MutexLock mu(Thread::Current(), lock_);
+  return method_code_map_.size();
 }
 
 uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
-  MutexLock mu(self, lock_);
   size = RoundUp(size, sizeof(void*));
-  if (size > DataCacheRemain()) {
-    return nullptr;
+  uint8_t* result = nullptr;
+
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
   }
-  data_cache_ptr_ += size;
-  return data_cache_ptr_ - size;
+
+  if (result == nullptr) {
+    // Retry.
+    GarbageCollectCache(self);
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    result = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, size));
+  }
+
+  return result;
 }
 
 uint8_t* JitCodeCache::AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end) {
-  MutexLock mu(self, lock_);
-  const size_t size = RoundUp(end - begin, sizeof(void*));
-  if (size > DataCacheRemain()) {
+  uint8_t* result = ReserveData(self, end - begin);
+  if (result == nullptr) {
     return nullptr;  // Out of space in the data cache.
   }
-  std::copy(begin, end, data_cache_ptr_);
-  data_cache_ptr_ += size;
-  return data_cache_ptr_ - size;
+  std::copy(begin, end, result);
+  return result;
 }
 
-const void* JitCodeCache::GetCodeFor(ArtMethod* method) {
-  const void* code = method->GetEntryPointFromQuickCompiledCode();
-  if (ContainsCodePtr(code)) {
-    return code;
+class MarkCodeVisitor FINAL : public StackVisitor {
+ public:
+  MarkCodeVisitor(Thread* thread_in, JitCodeCache* code_cache_in)
+      : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        code_cache_(code_cache_in),
+        bitmap_(code_cache_->GetLiveBitmap()) {}
+
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    if (method_header == nullptr) {
+      return true;
+    }
+    const void* code = method_header->GetCode();
+    if (code_cache_->ContainsPc(code)) {
+      // Use the atomic set version, as multiple threads are executing this code.
+      bitmap_->AtomicTestAndSet(FromCodeToAllocation(code));
+    }
+    return true;
   }
-  MutexLock mu(Thread::Current(), lock_);
-  auto it = method_code_map_.find(method);
-  if (it != method_code_map_.end()) {
-    return it->second;
+
+ private:
+  JitCodeCache* const code_cache_;
+  CodeCacheBitmap* const bitmap_;
+};
+
+class MarkCodeClosure FINAL : public Closure {
+ public:
+  MarkCodeClosure(JitCodeCache* code_cache, Barrier* barrier)
+      : code_cache_(code_cache), barrier_(barrier) {}
+
+  void Run(Thread* thread) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(thread == Thread::Current() || thread->IsSuspended());
+    MarkCodeVisitor visitor(thread, code_cache_);
+    visitor.WalkStack();
+    barrier_->Pass(Thread::Current());
   }
-  return nullptr;
+
+ private:
+  JitCodeCache* const code_cache_;
+  Barrier* const barrier_;
+};
+
+void JitCodeCache::GarbageCollectCache(Thread* self) {
+  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+    LOG(INFO) << "Clearing code cache, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
+  }
+
+  size_t map_size = 0;
+  ScopedThreadSuspension sts(self, kSuspended);
+
+  // Walk over all compiled methods and set the entry points of these
+  // methods to interpreter.
+  {
+    MutexLock mu(self, lock_);
+    if (WaitForPotentialCollectionToComplete(self)) {
+      return;
+    }
+    collection_in_progress_ = true;
+    map_size = method_code_map_.size();
+    for (auto& it : method_code_map_) {
+      it.second->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+    }
+    for (ProfilingInfo* info : profiling_infos_) {
+      info->GetMethod()->SetProfilingInfo(nullptr);
+    }
+  }
+
+  // Run a checkpoint on all threads to mark the JIT compiled code they are running.
+  {
+    Barrier barrier(0);
+    size_t threads_running_checkpoint = 0;
+    {
+      // Walking the stack requires the mutator lock.
+      // We only take the lock when running the checkpoint and not waiting so that
+      // when we go back to suspended, we can execute checkpoints that were requested
+      // concurrently, and then move to waiting for our own checkpoint to finish.
+      ScopedObjectAccess soa(self);
+      MarkCodeClosure closure(this, &barrier);
+      threads_running_checkpoint =
+          Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+    }
+    if (threads_running_checkpoint != 0) {
+      barrier.Increment(self, threads_running_checkpoint);
+    }
+  }
+
+  {
+    MutexLock mu(self, lock_);
+    DCHECK_EQ(map_size, method_code_map_.size());
+    // Free unused compiled code, and restore the entry point of used compiled code.
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+        const void* code_ptr = it->first;
+        ArtMethod* method = it->second;
+        uintptr_t allocation = FromCodeToAllocation(code_ptr);
+        const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+        if (GetLiveBitmap()->Test(allocation)) {
+          method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
+          ++it;
+        } else {
+          method->ClearCounter();
+          DCHECK_NE(method->GetEntryPointFromQuickCompiledCode(), method_header->GetEntryPoint());
+          FreeCode(code_ptr, method);
+          it = method_code_map_.erase(it);
+        }
+      }
+    }
+    GetLiveBitmap()->Bitmap::Clear();
+
+    // Free all profiling info.
+    for (ProfilingInfo* info : profiling_infos_) {
+      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
+      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
+    }
+    profiling_infos_.clear();
+
+    collection_in_progress_ = false;
+    lock_cond_.Broadcast(self);
+  }
+
+  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+    LOG(INFO) << "After clearing code cache, code="
+              << PrettySize(CodeCacheSize())
+              << ", data=" << PrettySize(DataCacheSize());
+  }
 }
 
-void JitCodeCache::SaveCompiledCode(ArtMethod* method, const void* old_code_ptr) {
-  DCHECK_EQ(method->GetEntryPointFromQuickCompiledCode(), old_code_ptr);
-  DCHECK(ContainsCodePtr(old_code_ptr)) << PrettyMethod(method) << " old_code_ptr="
-      << old_code_ptr;
-  MutexLock mu(Thread::Current(), lock_);
-  auto it = method_code_map_.find(method);
-  if (it != method_code_map_.end()) {
-    return;
+
+OatQuickMethodHeader* JitCodeCache::LookupMethodHeader(uintptr_t pc, ArtMethod* method) {
+  static_assert(kRuntimeISA != kThumb2, "kThumb2 cannot be a runtime ISA");
+  if (kRuntimeISA == kArm) {
+    // On Thumb-2, the pc is offset by one.
+    --pc;
   }
-  method_code_map_.Put(method, old_code_ptr);
+  if (!ContainsPc(reinterpret_cast<const void*>(pc))) {
+    return nullptr;
+  }
+
+  MutexLock mu(Thread::Current(), lock_);
+  if (method_code_map_.empty()) {
+    return nullptr;
+  }
+  auto it = method_code_map_.lower_bound(reinterpret_cast<const void*>(pc));
+  --it;
+
+  const void* code_ptr = it->first;
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+  if (!method_header->Contains(pc)) {
+    return nullptr;
+  }
+  DCHECK_EQ(it->second, method)
+      << PrettyMethod(method) << " " << PrettyMethod(it->second) << " " << std::hex << pc;
+  return method_header;
+}
+
+ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self,
+                                              ArtMethod* method,
+                                              const std::vector<uint32_t>& entries,
+                                              bool retry_allocation) {
+  ProfilingInfo* info = AddProfilingInfoInternal(self, method, entries);
+
+  if (info == nullptr && retry_allocation) {
+    GarbageCollectCache(self);
+    info = AddProfilingInfoInternal(self, method, entries);
+  }
+  return info;
+}
+
+ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self,
+                                                      ArtMethod* method,
+                                                      const std::vector<uint32_t>& entries) {
+  size_t profile_info_size = RoundUp(
+      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(void*));
+  ScopedThreadSuspension sts(self, kSuspended);
+  MutexLock mu(self, lock_);
+  WaitForPotentialCollectionToComplete(self);
+
+  // Check whether some other thread has concurrently created it.
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info != nullptr) {
+    return info;
+  }
+
+  uint8_t* data = reinterpret_cast<uint8_t*>(mspace_malloc(data_mspace_, profile_info_size));
+  if (data == nullptr) {
+    return nullptr;
+  }
+  info = new (data) ProfilingInfo(method, entries);
+  method->SetProfilingInfo(info);
+  profiling_infos_.push_back(info);
+  return info;
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index f485e4a..e10f962 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -22,6 +22,8 @@
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "gc/accounting/bitmap.h"
+#include "gc/allocator/dlmalloc.h"
 #include "gc_root.h"
 #include "jni.h"
 #include "oat_file.h"
@@ -32,102 +34,150 @@
 namespace art {
 
 class ArtMethod;
-class CompiledMethod;
-class CompilerCallbacks;
+class LinearAlloc;
+class ProfilingInfo;
 
 namespace jit {
 
 class JitInstrumentationCache;
 
+// Alignment that will suit all architectures.
+static constexpr int kJitCodeAlignment = 16;
+using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAlignment>;
+
 class JitCodeCache {
  public:
   static constexpr size_t kMaxCapacity = 1 * GB;
-  static constexpr size_t kDefaultCapacity = 2 * MB;
+  // Put the default to a very low amount for debug builds to stress the code cache
+  // collection.
+  static constexpr size_t kDefaultCapacity = kIsDebugBuild ? 20 * KB : 2 * MB;
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
   static JitCodeCache* Create(size_t capacity, std::string* error_msg);
 
-  const uint8_t* CodeCachePtr() const {
-    return code_cache_ptr_;
-  }
+  // Number of bytes allocated in the code cache.
+  size_t CodeCacheSize() REQUIRES(!lock_);
 
-  size_t CodeCacheSize() const {
-    return code_cache_ptr_ - code_cache_begin_;
-  }
+  // Number of bytes allocated in the data cache.
+  size_t DataCacheSize() REQUIRES(!lock_);
 
-  size_t CodeCacheRemain() const {
-    return code_cache_end_ - code_cache_ptr_;
-  }
+  // Number of compiled code in the code cache. Note that this is not the number
+  // of methods that got JIT compiled, as we might have collected some.
+  size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
-  const uint8_t* DataCachePtr() const {
-    return data_cache_ptr_;
-  }
+  // Allocate and write code and its metadata to the code cache.
+  uint8_t* CommitCode(Thread* self,
+                      ArtMethod* method,
+                      const uint8_t* mapping_table,
+                      const uint8_t* vmap_table,
+                      const uint8_t* gc_map,
+                      size_t frame_size_in_bytes,
+                      size_t core_spill_mask,
+                      size_t fp_spill_mask,
+                      const uint8_t* code,
+                      size_t code_size)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
 
-  size_t DataCacheSize() const {
-    return data_cache_ptr_ - data_cache_begin_;
-  }
-
-  size_t DataCacheRemain() const {
-    return data_cache_end_ - data_cache_ptr_;
-  }
-
-  size_t NumMethods() const {
-    return num_methods_;
-  }
-
-  // Return true if the code cache contains the code pointer which si the entrypoint of the method.
-  bool ContainsMethod(ArtMethod* method) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Return true if the code cache contains a code ptr.
-  bool ContainsCodePtr(const void* ptr) const;
-
-  // Reserve a region of code of size at least "size". Returns null if there is no more room.
-  uint8_t* ReserveCode(Thread* self, size_t size) REQUIRES(!lock_);
+  // Return true if the code cache contains this pc.
+  bool ContainsPc(const void* pc) const;
 
   // Reserve a region of data of size at least "size". Returns null if there is no more room.
-  uint8_t* ReserveData(Thread* self, size_t size) REQUIRES(!lock_);
+  uint8_t* ReserveData(Thread* self, size_t size)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
 
   // Add a data array of size (end - begin) with the associated contents, returns null if there
   // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
-  // Get code for a method, returns null if it is not in the jit cache.
-  const void* GetCodeFor(ArtMethod* method)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+  CodeCacheBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
 
-  // Save the compiled code for a method so that GetCodeFor(method) will return old_code_ptr if the
-  // entrypoint isn't within the cache.
-  void SaveCompiledCode(ArtMethod* method, const void* old_code_ptr)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+  // Perform a collection on the code cache.
+  void GarbageCollectCache(Thread* self)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Given the 'pc', try to find the JIT compiled code associated with it.
+  // Return null if 'pc' is not in the code cache. 'method' is passed for
+  // sanity check.
+  OatQuickMethodHeader* LookupMethodHeader(uintptr_t pc, ArtMethod* method)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Remove all methods in our cache that were allocated by 'alloc'.
+  void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
+      REQUIRES(!lock_)
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Create a 'ProfileInfo' for 'method'. If 'retry_allocation' is true,
+  // will collect and retry if the first allocation is unsuccessful.
+  ProfilingInfo* AddProfilingInfo(Thread* self,
+                                  ArtMethod* method,
+                                  const std::vector<uint32_t>& entries,
+                                  bool retry_allocation)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Takes ownership of code_mem_map.
-  explicit JitCodeCache(MemMap* code_mem_map);
+  // Take ownership of code_mem_map.
+  JitCodeCache(MemMap* code_map, MemMap* data_map);
 
-  // Unimplemented, TODO: Determine if it is necessary.
-  void FlushInstructionCache();
+  // Internal version of 'CommitCode' that will not retry if the
+  // allocation fails. Return null if the allocation fails.
+  uint8_t* CommitCodeInternal(Thread* self,
+                              ArtMethod* method,
+                              const uint8_t* mapping_table,
+                              const uint8_t* vmap_table,
+                              const uint8_t* gc_map,
+                              size_t frame_size_in_bytes,
+                              size_t core_spill_mask,
+                              size_t fp_spill_mask,
+                              const uint8_t* code,
+                              size_t code_size)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Lock which guards.
+  ProfilingInfo* AddProfilingInfoInternal(Thread* self,
+                                          ArtMethod* method,
+                                          const std::vector<uint32_t>& entries)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // If a collection is in progress, wait for it to finish. Return
+  // whether the thread actually waited.
+  bool WaitForPotentialCollectionToComplete(Thread* self)
+      REQUIRES(lock_) REQUIRES(!Locks::mutator_lock_);
+
+  // Free in the mspace allocations taken by 'method'.
+  void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
+
+  // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
-  // Mem map which holds code and data. We do this since we need to have 32 bit offsets from method
-  // headers in code cache which point to things in the data cache. If the maps are more than 4GB
-  // apart, having multiple maps wouldn't work.
-  std::unique_ptr<MemMap> mem_map_;
-  // Code cache section.
-  uint8_t* code_cache_ptr_;
-  const uint8_t* code_cache_begin_;
-  const uint8_t* code_cache_end_;
-  // Data cache section.
-  uint8_t* data_cache_ptr_;
-  const uint8_t* data_cache_begin_;
-  const uint8_t* data_cache_end_;
-  size_t num_methods_;
-  // This map holds code for methods if they were deoptimized by the instrumentation stubs. This is
-  // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks.
-  SafeMap<ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_);
+  // Condition to wait on during collection.
+  ConditionVariable lock_cond_ GUARDED_BY(lock_);
+  // Whether there is a code cache collection in progress.
+  bool collection_in_progress_ GUARDED_BY(lock_);
+  // Mem map which holds code.
+  std::unique_ptr<MemMap> code_map_;
+  // Mem map which holds data (stack maps and profiling info).
+  std::unique_ptr<MemMap> data_map_;
+  // The opaque mspace for allocating code.
+  void* code_mspace_ GUARDED_BY(lock_);
+  // The opaque mspace for allocating data.
+  void* data_mspace_ GUARDED_BY(lock_);
+  // Bitmap for collecting code and data.
+  std::unique_ptr<CodeCacheBitmap> live_bitmap_;
+  // This map holds compiled code associated to the ArtMethod.
+  SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
+  // ProfilingInfo objects we have allocated.
+  std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc
deleted file mode 100644
index c76dc11..0000000
--- a/runtime/jit/jit_code_cache_test.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "common_runtime_test.h"
-
-#include "art_method-inl.h"
-#include "class_linker.h"
-#include "jit_code_cache.h"
-#include "scoped_thread_state_change.h"
-#include "thread-inl.h"
-
-namespace art {
-namespace jit {
-
-class JitCodeCacheTest : public CommonRuntimeTest {
- public:
-};
-
-TEST_F(JitCodeCacheTest, TestCoverage) {
-  std::string error_msg;
-  constexpr size_t kSize = 1 * MB;
-  std::unique_ptr<JitCodeCache> code_cache(
-      JitCodeCache::Create(kSize, &error_msg));
-  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
-  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
-  ASSERT_EQ(code_cache->CodeCacheSize(), 0u);
-  ASSERT_GT(code_cache->CodeCacheRemain(), 0u);
-  ASSERT_TRUE(code_cache->DataCachePtr() != nullptr);
-  ASSERT_EQ(code_cache->DataCacheSize(), 0u);
-  ASSERT_GT(code_cache->DataCacheRemain(), 0u);
-  ASSERT_EQ(code_cache->CodeCacheRemain() + code_cache->DataCacheRemain(), kSize);
-  ASSERT_EQ(code_cache->NumMethods(), 0u);
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<1> hs(soa.Self());
-  uint8_t* const reserved_code = code_cache->ReserveCode(soa.Self(), 4 * KB);
-  ASSERT_TRUE(reserved_code != nullptr);
-  ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code));
-  ASSERT_EQ(code_cache->NumMethods(), 1u);
-  Runtime* const runtime = Runtime::Current();
-  ClassLinker* const class_linker = runtime->GetClassLinker();
-  ArtMethod* method = &class_linker->AllocArtMethodArray(soa.Self(),
-                                                         runtime->GetLinearAlloc(),
-                                                         1)->At(0);
-  ASSERT_FALSE(code_cache->ContainsMethod(method));
-  method->SetEntryPointFromQuickCompiledCode(reserved_code);
-  ASSERT_TRUE(code_cache->ContainsMethod(method));
-  ASSERT_EQ(code_cache->GetCodeFor(method), reserved_code);
-  // Save the code and then change it.
-  code_cache->SaveCompiledCode(method, reserved_code);
-  method->SetEntryPointFromQuickCompiledCode(nullptr);
-  ASSERT_EQ(code_cache->GetCodeFor(method), reserved_code);
-  const uint8_t data_arr[] = {1, 2, 3, 4, 5};
-  uint8_t* data_ptr = code_cache->AddDataArray(soa.Self(), data_arr, data_arr + sizeof(data_arr));
-  ASSERT_TRUE(data_ptr != nullptr);
-  ASSERT_EQ(memcmp(data_ptr, data_arr, sizeof(data_arr)), 0);
-}
-
-TEST_F(JitCodeCacheTest, TestOverflow) {
-  std::string error_msg;
-  constexpr size_t kSize = 1 * MB;
-  std::unique_ptr<JitCodeCache> code_cache(
-      JitCodeCache::Create(kSize, &error_msg));
-  ASSERT_TRUE(code_cache.get() != nullptr) << error_msg;
-  ASSERT_TRUE(code_cache->CodeCachePtr() != nullptr);
-  size_t code_bytes = 0;
-  size_t data_bytes = 0;
-  constexpr size_t kCodeArrSize = 4 * KB;
-  constexpr size_t kDataArrSize = 4 * KB;
-  uint8_t data_arr[kDataArrSize];
-  std::fill_n(data_arr, arraysize(data_arr), 53);
-  // Add code and data until we are full.
-  uint8_t* code_ptr = nullptr;
-  uint8_t* data_ptr = nullptr;
-  do {
-    code_ptr = code_cache->ReserveCode(Thread::Current(), kCodeArrSize);
-    data_ptr = code_cache->AddDataArray(Thread::Current(), data_arr, data_arr + kDataArrSize);
-    if (code_ptr != nullptr) {
-      code_bytes += kCodeArrSize;
-    }
-    if (data_ptr != nullptr) {
-      data_bytes += kDataArrSize;
-    }
-  } while (code_ptr != nullptr || data_ptr != nullptr);
-  // Make sure we added a reasonable amount
-  CHECK_GT(code_bytes, 0u);
-  CHECK_LE(code_bytes, kSize);
-  CHECK_GT(data_bytes, 0u);
-  CHECK_LE(data_bytes, kSize);
-  CHECK_GE(code_bytes + data_bytes, kSize * 4 / 5);
-}
-
-}  // namespace jit
-}  // namespace art
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 9b9c5d2..7931306 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -26,7 +26,12 @@
 
 class JitCompileTask FINAL : public Task {
  public:
-  explicit JitCompileTask(ArtMethod* method) : method_(method) {
+  enum TaskKind {
+    kAllocateProfile,
+    kCompile
+  };
+
+  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
     ScopedObjectAccess soa(Thread::Current());
     // Add a global ref to the class to prevent class unloading until compilation is done.
     klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
@@ -40,9 +45,16 @@
 
   void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
-    VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-    if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
-      VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
+    if (kind_ == kCompile) {
+      VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
+        VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
+      }
+    } else {
+      DCHECK(kind_ == kAllocateProfile);
+      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
+      }
     }
   }
 
@@ -52,6 +64,7 @@
 
  private:
   ArtMethod* const method_;
+  const TaskKind kind_;
   jobject klass_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
@@ -73,11 +86,9 @@
 }
 
 void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
-  ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() || method->IsNative() ||
-      Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
+  if (method->IsClassInitializer() || method->IsNative()) {
     return;
   }
   if (thread_pool_.get() == nullptr) {
@@ -86,14 +97,18 @@
   }
   uint16_t sample_count = method->IncrementCounter();
   if (sample_count == warm_method_threshold_) {
-    ProfilingInfo* info = method->CreateProfilingInfo();
-    if (info != nullptr) {
+    if (ProfilingInfo::Create(self, method, /* retry_allocation */ false)) {
       VLOG(jit) << "Start profiling " << PrettyMethod(method);
+    } else {
+      // We failed allocating. Instead of doing the collection on the Java thread, we push
+      // an allocation to a compiler thread, that will do the collection.
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+      thread_pool_->StartWorkers(self);
     }
   }
+
   if (sample_count == hot_method_threshold_) {
-    thread_pool_->AddTask(self, new JitCompileTask(
-        method->GetInterfaceMethodIfProxy(sizeof(void*))));
+    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
     thread_pool_->StartWorkers(self);
   }
 }
@@ -108,14 +123,18 @@
                                                           ArtMethod* caller,
                                                           uint32_t dex_pc,
                                                           ArtMethod* callee ATTRIBUTE_UNUSED) {
+  instrumentation_cache_->AddSamples(thread, caller, 1);
+  // We make sure we cannot be suspended, as the profiling info can be concurrently deleted.
+  thread->StartAssertNoThreadSuspension("Instrumenting invoke");
   DCHECK(this_object != nullptr);
   ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
   if (info != nullptr) {
     // Since the instrumentation is marked from the declaring class we need to mark the card so
     // that mod-union tables and card rescanning know about the update.
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
-    info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
+    info->AddInvokeInfo(dex_pc, this_object->GetClass());
   }
+  thread->EndAssertNoThreadSuspension(nullptr);
 }
 
 void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 0c039f2..2e52b1b 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -25,18 +25,13 @@
 
 namespace art {
 
-ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
   // Walk over the dex instructions of the method and keep track of
   // instructions we are interested in profiling.
-  const uint16_t* code_ptr = nullptr;
-  const uint16_t* code_end = nullptr;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    DCHECK(!method->IsNative());
-    const DexFile::CodeItem& code_item = *method->GetCodeItem();
-    code_ptr = code_item.insns_;
-    code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
-  }
+  DCHECK(!method->IsNative());
+  const DexFile::CodeItem& code_item = *method->GetCodeItem();
+  const uint16_t* code_ptr = code_item.insns_;
+  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
 
   uint32_t dex_pc = 0;
   std::vector<uint32_t> entries;
@@ -62,23 +57,15 @@
   // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
   // object, it will never be filled.
   if (entries.empty()) {
-    return nullptr;
+    return true;
   }
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
-  size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
-  uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
-
-  if (data == nullptr) {
-    VLOG(jit) << "Cannot allocate profiling info anymore";
-    return nullptr;
-  }
-
-  return new (data) ProfilingInfo(entries);
+  return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
@@ -89,9 +76,8 @@
   }
   DCHECK(cache != nullptr);
 
-  ScopedObjectAccess soa(self);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+    mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
       // Receiver type is already in the cache, nothing else to do.
       return;
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index 73ca41a..b13a315 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -26,6 +26,10 @@
 
 class ArtMethod;
 
+namespace jit {
+class JitCodeCache;
+}
+
 namespace mirror {
 class Class;
 }
@@ -36,10 +40,17 @@
  */
 class ProfilingInfo {
  public:
-  static ProfilingInfo* Create(ArtMethod* method);
+  // Create a ProfilingInfo for 'method'. Return whether it succeeded, or if it is
+  // not needed in case the method does not have virtual/interface invocations.
+  static bool Create(Thread* self, ArtMethod* method, bool retry_allocation)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add information from an executed INVOKE instruction to the profile.
-  void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+  void AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls)
+      // Method should not be interruptible, as it manipulates the ProfilingInfo
+      // which can be concurrently collected.
+      REQUIRES(Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
@@ -52,6 +63,10 @@
     }
   }
 
+  ArtMethod* GetMethod() const {
+    return method_;
+  }
+
  private:
   // Structure to store the classes seen at runtime for a specific instruction.
   // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
@@ -84,8 +99,9 @@
     GcRoot<mirror::Class> classes_[kIndividualCacheSize];
   };
 
-  explicit ProfilingInfo(const std::vector<uint32_t>& entries)
-      : number_of_inline_caches_(entries.size()) {
+  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()),
+        method_(method) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       cache_[i].dex_pc = entries[i];
@@ -95,9 +111,14 @@
   // Number of instructions we are profiling in the ArtMethod.
   const uint32_t number_of_inline_caches_;
 
+  // Method this profiling info is for.
+  ArtMethod* const method_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
+  friend class jit::JitCodeCache;
+
   DISALLOW_COPY_AND_ASSIGN(ProfilingInfo);
 };
 
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index 4104d7a..dab1040 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -93,8 +93,7 @@
   monitors.Dump(os);
 }
 
-void JNIEnvExt::PushFrame(int capacity) {
-  UNUSED(capacity);  // cpplint gets confused with (int) and thinks its a cast.
+void JNIEnvExt::PushFrame(int capacity ATTRIBUTE_UNUSED) {
   // TODO: take 'capacity' into account.
   stacked_local_ref_cookies.push_back(local_ref_cookie);
   local_ref_cookie = locals.GetSegmentState();
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 6bc1829..234a733 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -1743,8 +1743,9 @@
     return static_cast<jchar*>(s->GetValue());
   }
 
-  static void ReleaseStringCritical(JNIEnv* env, jstring java_string, const jchar* chars) {
-    UNUSED(chars);
+  static void ReleaseStringCritical(JNIEnv* env,
+                                    jstring java_string,
+                                    const jchar* chars ATTRIBUTE_UNUSED) {
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     gc::Heap* heap = Runtime::Current()->GetHeap();
diff --git a/runtime/jvalue.h b/runtime/jvalue.h
index 6a6d198..7b91b0b 100644
--- a/runtime/jvalue.h
+++ b/runtime/jvalue.h
@@ -32,7 +32,7 @@
 
   int8_t GetB() const { return b; }
   void SetB(int8_t new_b) {
-    i = ((static_cast<int32_t>(new_b) << 24) >> 24);  // Sign-extend.
+    j = ((static_cast<int64_t>(new_b) << 56) >> 56);  // Sign-extend to 64 bits.
   }
 
   uint16_t GetC() const { return c; }
@@ -45,7 +45,9 @@
   void SetF(float new_f) { f = new_f; }
 
   int32_t GetI() const { return i; }
-  void SetI(int32_t new_i) { i = new_i; }
+  void SetI(int32_t new_i) {
+    j = ((static_cast<int64_t>(new_i) << 32) >> 32);  // Sign-extend to 64 bits.
+  }
 
   int64_t GetJ() const { return j; }
   void SetJ(int64_t new_j) { j = new_j; }
@@ -55,7 +57,7 @@
 
   int16_t GetS() const { return s; }
   void SetS(int16_t new_s) {
-    i = ((static_cast<int32_t>(new_s) << 16) >> 16);  // Sign-extend.
+    j = ((static_cast<int64_t>(new_s) << 48) >> 48);  // Sign-extend to 64 bits.
   }
 
   uint8_t GetZ() const { return z; }
diff --git a/runtime/leb128.h b/runtime/leb128.h
index baf9da2..74934ae 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -127,8 +127,9 @@
   return dest;
 }
 
-template<typename Allocator>
-static inline void EncodeUnsignedLeb128(std::vector<uint8_t, Allocator>* dest, uint32_t value) {
+template <typename Vector>
+static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   uint8_t out = value & 0x7f;
   value >>= 7;
   while (value != 0) {
@@ -165,8 +166,9 @@
   return dest;
 }
 
-template<typename Allocator>
-static inline void EncodeSignedLeb128(std::vector<uint8_t, Allocator>* dest, int32_t value) {
+template<typename Vector>
+static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
   uint8_t out = value & 0x7f;
   while (extra_bits != 0u) {
@@ -179,10 +181,12 @@
 }
 
 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
-template <typename Allocator = std::allocator<uint8_t>>
+template <typename Vector = std::vector<uint8_t>>
 class Leb128Encoder {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
-  explicit Leb128Encoder(std::vector<uint8_t, Allocator>* data) : data_(data) {
+  explicit Leb128Encoder(Vector* data) : data_(data) {
     DCHECK(data != nullptr);
   }
 
@@ -212,27 +216,29 @@
     }
   }
 
-  const std::vector<uint8_t, Allocator>& GetData() const {
+  const Vector& GetData() const {
     return *data_;
   }
 
  protected:
-  std::vector<uint8_t, Allocator>* const data_;
+  Vector* const data_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
 };
 
 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
-template <typename Allocator = std::allocator<uint8_t>>
-class Leb128EncodingVector FINAL : private std::vector<uint8_t, Allocator>,
-                                   public Leb128Encoder<Allocator> {
- public:
-  Leb128EncodingVector() : Leb128Encoder<Allocator>(this) { }
+template <typename Vector = std::vector<uint8_t>>
+class Leb128EncodingVector FINAL : private Vector,
+                                   public Leb128Encoder<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
-  explicit Leb128EncodingVector(const Allocator& alloc)
-    : std::vector<uint8_t, Allocator>(alloc),
-      Leb128Encoder<Allocator>(this) { }
+ public:
+  Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
+
+  explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
+    : Vector(alloc),
+      Leb128Encoder<Vector>(this) { }
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc
index 09f7ecc..122f55e 100644
--- a/runtime/leb128_test.cc
+++ b/runtime/leb128_test.cc
@@ -88,7 +88,7 @@
     {-0x08000000, {0x80, 0x80, 0x80, 0x40, 0}},
     {-0x08000001, {0xFF, 0xFF, 0xFF, 0xBF, 0x7F}},
     {-0x20000000, {0x80, 0x80, 0x80, 0x80, 0x7E}},
-    {(-1) << 31, {0x80, 0x80, 0x80, 0x80, 0x78}},
+    {static_cast<int32_t>(0x80000000), {0x80, 0x80, 0x80, 0x80, 0x78}},
 };
 
 TEST(Leb128Test, UnsignedSinglesVector) {
diff --git a/runtime/length_prefixed_array.h b/runtime/length_prefixed_array.h
index 0ff6d7a..e01b6cc 100644
--- a/runtime/length_prefixed_array.h
+++ b/runtime/length_prefixed_array.h
@@ -30,19 +30,34 @@
 class LengthPrefixedArray {
  public:
   explicit LengthPrefixedArray(size_t length)
-      : length_(dchecked_integral_cast<uint32_t>(length)) {}
+      : size_(dchecked_integral_cast<uint32_t>(length)) {}
 
   T& At(size_t index, size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
-    DCHECK_LT(index, length_);
+    DCHECK_LT(index, size_);
     return AtUnchecked(index, element_size, alignment);
   }
 
-  StrideIterator<T> Begin(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+  const T& At(size_t index, size_t element_size = sizeof(T), size_t alignment = alignof(T)) const {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index, element_size, alignment);
+  }
+
+  StrideIterator<T> begin(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
     return StrideIterator<T>(&AtUnchecked(0, element_size, alignment), element_size);
   }
 
-  StrideIterator<T> End(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
-    return StrideIterator<T>(&AtUnchecked(length_, element_size, alignment), element_size);
+  StrideIterator<const T> begin(size_t element_size = sizeof(T),
+                                size_t alignment = alignof(T)) const {
+    return StrideIterator<const T>(&AtUnchecked(0, element_size, alignment), element_size);
+  }
+
+  StrideIterator<T> end(size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
+    return StrideIterator<T>(&AtUnchecked(size_, element_size, alignment), element_size);
+  }
+
+  StrideIterator<const T> end(size_t element_size = sizeof(T),
+                              size_t alignment = alignof(T)) const {
+    return StrideIterator<const T>(&AtUnchecked(size_, element_size, alignment), element_size);
   }
 
   static size_t OffsetOfElement(size_t index,
@@ -60,13 +75,13 @@
     return result;
   }
 
-  uint64_t Length() const {
-    return length_;
+  size_t size() const {
+    return size_;
   }
 
   // Update the length but does not reallocate storage.
-  void SetLength(size_t length) {
-    length_ = dchecked_integral_cast<uint32_t>(length);
+  void SetSize(size_t length) {
+    size_ = dchecked_integral_cast<uint32_t>(length);
   }
 
  private:
@@ -75,7 +90,12 @@
         reinterpret_cast<uintptr_t>(this) + OffsetOfElement(index, element_size, alignment));
   }
 
-  uint32_t length_;
+  const T& AtUnchecked(size_t index, size_t element_size, size_t alignment) const {
+    return *reinterpret_cast<T*>(
+        reinterpret_cast<uintptr_t>(this) + OffsetOfElement(index, element_size, alignment));
+  }
+
+  uint32_t size_;
   uint8_t data[0];
 };
 
@@ -84,7 +104,7 @@
 IterationRange<StrideIterator<T>> MakeIterationRangeFromLengthPrefixedArray(
     LengthPrefixedArray<T>* arr, size_t element_size = sizeof(T), size_t alignment = alignof(T)) {
   return arr != nullptr ?
-      MakeIterationRange(arr->Begin(element_size, alignment), arr->End(element_size, alignment)) :
+      MakeIterationRange(arr->begin(element_size, alignment), arr->end(element_size, alignment)) :
       MakeEmptyIterationRange(StrideIterator<T>(nullptr, 0));
 }
 
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
index 43e81d9..f91b0ed 100644
--- a/runtime/linear_alloc.cc
+++ b/runtime/linear_alloc.cc
@@ -48,4 +48,8 @@
   return allocator_.Contains(ptr);
 }
 
+bool LinearAlloc::ContainsUnsafe(void* ptr) const {
+  return allocator_.Contains(ptr);
+}
+
 }  // namespace art
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index 1b21527..df7f17d 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -47,6 +47,10 @@
   // Return true if the linear alloc contrains an address.
   bool Contains(void* ptr) const REQUIRES(!lock_);
 
+  // Unsafe version of 'Contains' only to be used when the allocator is going
+  // to be deleted.
+  bool ContainsUnsafe(void* ptr) const NO_THREAD_SAFETY_ANALYSIS;
+
  private:
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ArenaAllocator allocator_ GUARDED_BY(lock_);
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 2a019c5..2d3581d 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -134,11 +134,25 @@
 uintptr_t MemMap::next_mem_pos_ = GenerateNextMemPos();
 #endif
 
-// Return true if the address range is contained in a single /proc/self/map entry.
-static bool ContainedWithinExistingMap(uint8_t* ptr, size_t size,
-                                       std::string* error_msg) {
+// Return true if the address range is contained in a single memory map by either reading
+// the maps_ variable or the /proc/self/map entry.
+bool MemMap::ContainedWithinExistingMap(uint8_t* ptr, size_t size, std::string* error_msg) {
   uintptr_t begin = reinterpret_cast<uintptr_t>(ptr);
   uintptr_t end = begin + size;
+
+  // There is a suspicion that BacktraceMap::Create is occasionally missing maps. TODO: Investigate
+  // further.
+  {
+    MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
+    for (auto& pair : *maps_) {
+      MemMap* const map = pair.second;
+      if (begin >= reinterpret_cast<uintptr_t>(map->Begin()) &&
+          end <= reinterpret_cast<uintptr_t>(map->End())) {
+        return true;
+      }
+    }
+  }
+
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
   if (map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to build process map");
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 196a7f6..7c11ceb 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -161,6 +161,8 @@
       REQUIRES(Locks::mem_maps_lock_);
   static MemMap* GetLargestMemMapAt(void* address)
       REQUIRES(Locks::mem_maps_lock_);
+  static bool ContainedWithinExistingMap(uint8_t* ptr, size_t size, std::string* error_msg)
+      REQUIRES(!Locks::mem_maps_lock_);
 
   const std::string name_;
   uint8_t* const begin_;  // Start of data.
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 3d54029..ec7d758 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -100,9 +100,8 @@
   explicit SetLengthVisitor(int32_t length) : length_(length) {
   }
 
-  void operator()(Object* obj, size_t usable_size) const
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    UNUSED(usable_size);
     // Avoid AsArray as object is not yet in live bitmap or allocation stack.
     Array* array = down_cast<Array*>(obj);
     // DCHECK(array->IsArrayInstance());
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 93f2aea..19ee7f4 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -392,7 +392,8 @@
 }
 
 inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method, size_t pointer_size) {
-  DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsMiranda());
+  // Only miranda or default methods may come from interfaces and be used as a virtual.
+  DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsDefault() || method->IsMiranda());
   // The argument method may from a super class.
   // Use the index to a potentially overridden one for this instance's class.
   return GetVTableEntry(method->GetMethodIndex(), pointer_size);
@@ -927,22 +928,22 @@
 
 inline uint32_t Class::NumDirectMethods() {
   LengthPrefixedArray<ArtMethod>* arr = GetDirectMethodsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return arr != nullptr ? arr->size() : 0u;
 }
 
 inline uint32_t Class::NumVirtualMethods() {
   LengthPrefixedArray<ArtMethod>* arr = GetVirtualMethodsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return arr != nullptr ? arr->size() : 0u;
 }
 
 inline uint32_t Class::NumInstanceFields() {
   LengthPrefixedArray<ArtField>* arr = GetIFieldsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return arr != nullptr ? arr->size() : 0u;
 }
 
 inline uint32_t Class::NumStaticFields() {
   LengthPrefixedArray<ArtField>* arr = GetSFieldsPtrUnchecked();
-  return arr != nullptr ? arr->Length() : 0u;
+  return arr != nullptr ? arr->size() : 0u;
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 2ac44fc..9d01a1d 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -565,24 +565,58 @@
   return nullptr;
 }
 
-ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
-  // Is the field in this class?
-  // Interfaces are not relevant because they can't contain instance fields.
-  for (size_t i = 0; i < NumInstanceFields(); ++i) {
-    ArtField* f = GetInstanceField(i);
-    if (name == f->GetName() && type == f->GetTypeDescriptor()) {
-      return f;
+// Custom binary search to avoid double comparisons from std::binary_search.
+static ArtField* FindFieldByNameAndType(LengthPrefixedArray<ArtField>* fields,
+                                        const StringPiece& name,
+                                        const StringPiece& type)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (fields == nullptr) {
+    return nullptr;
+  }
+  size_t low = 0;
+  size_t high = fields->size();
+  ArtField* ret = nullptr;
+  while (low < high) {
+    size_t mid = (low + high) / 2;
+    ArtField& field = fields->At(mid);
+    // Fields are sorted by class, then name, then type descriptor. This is verified in dex file
+    // verifier. There can be multiple fields with the same in the same class name due to proguard.
+    int result = StringPiece(field.GetName()).Compare(name);
+    if (result == 0) {
+      result = StringPiece(field.GetTypeDescriptor()).Compare(type);
+    }
+    if (result < 0) {
+      low = mid + 1;
+    } else if (result > 0) {
+      high = mid;
+    } else {
+      ret = &field;
+      break;
     }
   }
-  return nullptr;
+  if (kIsDebugBuild) {
+    ArtField* found = nullptr;
+    for (ArtField& field : MakeIterationRangeFromLengthPrefixedArray(fields)) {
+      if (name == field.GetName() && type == field.GetTypeDescriptor()) {
+        found = &field;
+        break;
+      }
+    }
+    CHECK_EQ(found, ret) << "Found " << PrettyField(found) << " vs  " << PrettyField(ret);
+  }
+  return ret;
+}
+
+ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
+  // Binary search by name. Interfaces are not relevant because they can't contain instance fields.
+  return FindFieldByNameAndType(GetIFieldsPtr(), name, type);
 }
 
 ArtField* Class::FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
   if (GetDexCache() == dex_cache) {
-    for (size_t i = 0; i < NumInstanceFields(); ++i) {
-      ArtField* f = GetInstanceField(i);
-      if (f->GetDexFieldIndex() == dex_field_idx) {
-        return f;
+    for (ArtField& field : GetIFields()) {
+      if (field.GetDexFieldIndex() == dex_field_idx) {
+        return &field;
       }
     }
   }
@@ -615,21 +649,14 @@
 
 ArtField* Class::FindDeclaredStaticField(const StringPiece& name, const StringPiece& type) {
   DCHECK(type != nullptr);
-  for (size_t i = 0; i < NumStaticFields(); ++i) {
-    ArtField* f = GetStaticField(i);
-    if (name == f->GetName() && type == f->GetTypeDescriptor()) {
-      return f;
-    }
-  }
-  return nullptr;
+  return FindFieldByNameAndType(GetSFieldsPtr(), name, type);
 }
 
 ArtField* Class::FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
   if (dex_cache == GetDexCache()) {
-    for (size_t i = 0; i < NumStaticFields(); ++i) {
-      ArtField* f = GetStaticField(i);
-      if (f->GetDexFieldIndex() == dex_field_idx) {
-        return f;
+    for (ArtField& field : GetSFields()) {
+      if (field.GetDexFieldIndex() == dex_field_idx) {
+        return &field;
       }
     }
   }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 2668b3d..8219d69 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -30,6 +30,7 @@
 #include "primitive.h"
 #include "read_barrier_option.h"
 #include "stride_iterator.h"
+#include "thread.h"
 #include "utils.h"
 
 #ifndef IMT_SIZE
@@ -229,6 +230,18 @@
     return (GetAccessFlags() & kAccClassIsFinalizable) != 0;
   }
 
+  ALWAYS_INLINE void SetRecursivelyInitialized() SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId());
+    uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+    SetAccessFlags(flags | kAccRecursivelyInitialized);
+  }
+
+  ALWAYS_INLINE void SetHasDefaultMethods() SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId());
+    uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+    SetAccessFlags(flags | kAccHasDefaultMethod);
+  }
+
   ALWAYS_INLINE void SetFinalizable() SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
     SetAccessFlags(flags | kAccClassIsFinalizable);
@@ -860,6 +873,14 @@
 
   ArtMethod* FindClassInitializer(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool HasDefaultMethods() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccHasDefaultMethod) != 0;
+  }
+
+  bool HasBeenRecursivelyInitialized() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccRecursivelyInitialized) != 0;
+  }
+
   ALWAYS_INLINE int32_t GetIfTableCount() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE IfTable* GetIfTable() SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 8fb860f..48f2ca5 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -20,9 +20,8 @@
 
 #include "class_linker.h"
 #include "common_runtime_test.h"
-#include "gc/heap.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
+#include "linear_alloc.h"
+#include "mirror/class_loader-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -36,7 +35,9 @@
   StackHandleScope<1> hs(soa.Self());
   ASSERT_TRUE(java_lang_dex_file_ != nullptr);
   Handle<DexCache> dex_cache(
-      hs.NewHandle(class_linker_->AllocDexCache(soa.Self(), *java_lang_dex_file_)));
+      hs.NewHandle(class_linker_->AllocDexCache(soa.Self(),
+                                                *java_lang_dex_file_,
+                                                Runtime::Current()->GetLinearAlloc())));
   ASSERT_TRUE(dex_cache.Get() != nullptr);
 
   EXPECT_EQ(java_lang_dex_file_->NumStringIds(), dex_cache->NumStrings());
@@ -45,5 +46,21 @@
   EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
 }
 
+TEST_F(DexCacheTest, LinearAlloc) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader(LoadDex("Main"));
+  ASSERT_TRUE(jclass_loader != nullptr);
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* const class_linker = runtime->GetClassLinker();
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LMain;", class_loader);
+  ASSERT_TRUE(klass != nullptr);
+  LinearAlloc* const linear_alloc = klass->GetClassLoader()->GetAllocator();
+  EXPECT_NE(linear_alloc, runtime->GetLinearAlloc());
+  EXPECT_TRUE(linear_alloc->Contains(klass->GetDexCache()->GetResolvedMethods()));
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 90180c5..5c12091 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -95,6 +95,12 @@
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
+inline bool Object::CasLockWordWeakRelease(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelease32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
 inline uint32_t Object::GetLockOwnerThreadId() {
   return Monitor::GetLockOwnerThreadId(this);
 }
@@ -175,7 +181,10 @@
         static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
     new_lw = lw;
     new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
-  } while (!CasLockWordWeakSequentiallyConsistent(expected_lw, new_lw));
+    // This CAS is a CAS release so that when GC updates all the fields of an object and then
+    // changes the object from gray to black, the field updates (stores) will be visible (won't be
+    // reordered after this CAS.)
+  } while (!CasLockWordWeakRelease(expected_lw, new_lw));
   return true;
 #elif USE_BROOKS_READ_BARRIER
   DCHECK(kUseBrooksReadBarrier);
@@ -671,6 +680,24 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelease32(MemberOffset field_offset,
+                                          int32_t old_value, int32_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
+
+  return atomic_addr->CompareExchangeWeakRelease(old_value, new_value);
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldStrongSequentiallyConsistent32(MemberOffset field_offset,
                                                            int32_t old_value, int32_t new_value) {
   if (kCheckTransaction) {
@@ -944,6 +971,62 @@
   return success;
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelaxedObjectWithoutWriteBarrier(
+    MemberOffset field_offset, Object* old_value, Object* new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref.reference_,
+                                                         new_ref.reference_);
+  return success;
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
+    MemberOffset field_offset, Object* old_value, Object* new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
+                                                           new_ref.reference_);
+  return success;
+}
+
 template<bool kIsStatic, typename Visitor>
 inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) {
   if (!kIsStatic && (ref_offsets != mirror::Class::kClassWalkSuper)) {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 50490bb..5c6520f 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -99,7 +99,7 @@
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
-  bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
+  ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -135,11 +135,17 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  bool CasLockWordWeakRelease(LockWord old_val, LockWord new_val)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
-  mirror::Object* MonitorEnter(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
-      EXCLUSIVE_LOCK_FUNCTION();
-  bool MonitorExit(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
+  mirror::Object* MonitorEnter(Thread* self)
+      EXCLUSIVE_LOCK_FUNCTION()
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool MonitorExit(Thread* self)
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       UNLOCK_FUNCTION();
   void Notify(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   void NotifyAll(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -272,7 +278,6 @@
                                                                    Object* old_value,
                                                                    Object* new_value)
       SHARED_REQUIRES(Locks::mutator_lock_);
-
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
@@ -284,6 +289,18 @@
                                                                      Object* old_value,
                                                                      Object* new_value)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                    Object* old_value,
+                                                    Object* new_value)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                      Object* old_value,
+                                                      Object* new_value)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset);
@@ -392,6 +409,12 @@
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakRelease32(MemberOffset field_offset, int32_t old_value,
+                             int32_t new_value) ALWAYS_INLINE
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongSequentiallyConsistent32(MemberOffset field_offset, int32_t old_value,
                                               int32_t new_value) ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 5b73557..5337760 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -270,7 +270,7 @@
 }
 
 template<class T> template<typename Visitor>
-void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
+inline void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
   const size_t length = static_cast<size_t>(GetLength());
   for (size_t i = 0; i < length; ++i) {
     visitor(this, OffsetOfElement(i), false);
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index f5a0445..c1284a6 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -307,10 +307,7 @@
   ScopedObjectAccess soa(Thread::Current());
   Class* java_util_Arrays = class_linker_->FindSystemClass(soa.Self(), "Ljava/util/Arrays;");
   ArtMethod* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V", sizeof(void*));
-  const DexFile::StringId* string_id = java_lang_dex_file_->FindStringId("[I");
-  ASSERT_TRUE(string_id != nullptr);
-  const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId(
-      java_lang_dex_file_->GetIndexForStringId(*string_id));
+  const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId("[I");
   ASSERT_TRUE(type_id != nullptr);
   uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
   Object* array = CheckAndAllocArrayFromCodeInstrumented(
@@ -367,16 +364,10 @@
   Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<ClassLoader*>(class_loader)));
   Class* klass = class_linker_->FindClass(soa.Self(), "LStaticsFromCode;", loader);
   ArtMethod* clinit = klass->FindClassInitializer(sizeof(void*));
-  const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
-  ASSERT_TRUE(klass_string_id != nullptr);
-  const DexFile::TypeId* klass_type_id = dex_file->FindTypeId(
-      dex_file->GetIndexForStringId(*klass_string_id));
+  const DexFile::TypeId* klass_type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(klass_type_id != nullptr);
 
-  const DexFile::StringId* type_string_id = dex_file->FindStringId("Ljava/lang/Object;");
-  ASSERT_TRUE(type_string_id != nullptr);
-  const DexFile::TypeId* type_type_id = dex_file->FindTypeId(
-      dex_file->GetIndexForStringId(*type_string_id));
+  const DexFile::TypeId* type_type_id = dex_file->FindTypeId("Ljava/lang/Object;");
   ASSERT_TRUE(type_type_id != nullptr);
 
   const DexFile::StringId* name_str_id = dex_file->FindStringId("s0");
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index f7ab10b..116cbe9 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -49,8 +49,13 @@
                                                                   // method (dex only)
 static constexpr uint32_t kAccFastNative =           0x00080000;  // method (dex only)
 static constexpr uint32_t kAccMiranda =              0x00200000;  // method (dex only)
+static constexpr uint32_t kAccDefault =              0x00400000;  // method (runtime)
 
 // Special runtime-only flags.
+// Interface and all its super-interfaces with default methods have been recursively initialized.
+static constexpr uint32_t kAccRecursivelyInitialized    = 0x20000000;
+// Interface declares some default method.
+static constexpr uint32_t kAccHasDefaultMethod          = 0x40000000;
 // class/ancestor overrides finalize()
 static constexpr uint32_t kAccClassIsFinalizable        = 0x80000000;
 
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index fa58418..81e7e6d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -274,7 +274,7 @@
           }
           if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
             const char* owners_filename;
-            uint32_t owners_line_number;
+            int32_t owners_line_number;
             TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
             if (wait_ms > kLongWaitMs && owners_method != nullptr) {
               LOG(WARNING) << "Long monitor contention event with owner method="
@@ -696,6 +696,7 @@
 mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
   DCHECK(self != nullptr);
   DCHECK(obj != nullptr);
+  self->AssertThreadSuspensionIsAllowable();
   obj = FakeLock(obj);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
@@ -771,6 +772,7 @@
 bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
   DCHECK(self != nullptr);
   DCHECK(obj != nullptr);
+  self->AssertThreadSuspensionIsAllowable();
   obj = FakeUnlock(obj);
   StackHandleScope<1> hs(self);
   Handle<mirror::Object> h_obj(hs.NewHandle(obj));
@@ -1084,7 +1086,7 @@
 }
 
 void Monitor::TranslateLocation(ArtMethod* method, uint32_t dex_pc,
-                                const char** source_file, uint32_t* line_number) const {
+                                const char** source_file, int32_t* line_number) const {
   // If method is null, location is unknown
   if (method == nullptr) {
     *source_file = "";
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 8cd93c6..707d0f1 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -65,12 +65,16 @@
   // NO_THREAD_SAFETY_ANALYSIS for mon->Lock.
   static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj)
       EXCLUSIVE_LOCK_FUNCTION(obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS;
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!Roles::uninterruptible_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS for mon->Unlock.
   static bool MonitorExit(Thread* thread, mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!Roles::uninterruptible_)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      UNLOCK_FUNCTION(obj) NO_THREAD_SAFETY_ANALYSIS;
+      UNLOCK_FUNCTION(obj);
 
   static void Notify(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
     DoNotify(self, obj, false);
@@ -179,7 +183,7 @@
       NO_THREAD_SAFETY_ANALYSIS;  // For m->Install(self)
 
   void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
-                          const char* owner_filename, uint32_t owner_line_number)
+                          const char* owner_filename, int32_t owner_line_number)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void FailedUnlock(mirror::Object* obj, Thread* expected_owner, Thread* found_owner,
@@ -231,7 +235,7 @@
 
   // Translates the provided method and pc into its declaring class' source file and line number.
   void TranslateLocation(ArtMethod* method, uint32_t pc,
-                         const char** source_file, uint32_t* line_number) const
+                         const char** source_file, int32_t* line_number) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   uint32_t GetOwnerThreadId() REQUIRES(!monitor_lock_);
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index efe2e82..82ef2d8 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -50,7 +50,7 @@
 }
 
 void Monitor::LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
-                                 const char* owner_filename, uint32_t owner_line_number) {
+                                 const char* owner_filename, int32_t owner_line_number) {
   // Emit the event list length, 1 byte.
   char eventBuffer[174];
   char* cp = eventBuffer;
@@ -80,7 +80,7 @@
   uint32_t pc;
   ArtMethod* m = self->GetCurrentMethod(&pc);
   const char* filename;
-  uint32_t line_number;
+  int32_t line_number;
   TranslateLocation(m, pc, &filename, &line_number);
   cp = EventLogWriteString(cp, filename, strlen(filename));
 
diff --git a/runtime/monitor_linux.cc b/runtime/monitor_linux.cc
index 856ebe4..1c77ac0 100644
--- a/runtime/monitor_linux.cc
+++ b/runtime/monitor_linux.cc
@@ -18,7 +18,7 @@
 
 namespace art {
 
-void Monitor::LogContentionEvent(Thread*, uint32_t, uint32_t, const char*, uint32_t) {
+void Monitor::LogContentionEvent(Thread*, uint32_t, uint32_t, const char*, int32_t) {
 }
 
 }  // namespace art
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4aebc2c..8b2f4d8 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -27,6 +27,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "oat_file_assistant.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "profiler.h"
 #include "runtime.h"
@@ -39,13 +40,16 @@
 
 namespace art {
 
-static std::unique_ptr<std::vector<const DexFile*>>
-ConvertJavaArrayToNative(JNIEnv* env, jobject arrayObject) {
+static bool ConvertJavaArrayToDexFiles(
+    JNIEnv* env,
+    jobject arrayObject,
+    /*out*/ std::vector<const DexFile*>& dex_files,
+    /*out*/ const OatFile*& oat_file) {
   jarray array = reinterpret_cast<jarray>(arrayObject);
 
   jsize array_size = env->GetArrayLength(array);
   if (env->ExceptionCheck() == JNI_TRUE) {
-    return std::unique_ptr<std::vector<const DexFile*>>();
+    return false;
   }
 
   // TODO: Optimize. On 32bit we can use an int array.
@@ -53,27 +57,24 @@
   jlong* long_data = env->GetLongArrayElements(reinterpret_cast<jlongArray>(array),
                                                &is_long_data_copied);
   if (env->ExceptionCheck() == JNI_TRUE) {
-    return std::unique_ptr<std::vector<const DexFile*>>();
+    return false;
   }
 
-  std::unique_ptr<std::vector<const DexFile*>> ret(new std::vector<const DexFile*>());
-  ret->reserve(array_size);
-  for (jsize i = 0; i < array_size; ++i) {
-    ret->push_back(reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(*(long_data + i))));
+  oat_file = reinterpret_cast<const OatFile*>(static_cast<uintptr_t>(long_data[kOatFileIndex]));
+  dex_files.reserve(array_size - 1);
+  for (jsize i = kDexFileIndexStart; i < array_size; ++i) {
+    dex_files.push_back(reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(long_data[i])));
   }
 
   env->ReleaseLongArrayElements(reinterpret_cast<jlongArray>(array), long_data, JNI_ABORT);
-  if (env->ExceptionCheck() == JNI_TRUE) {
-    return std::unique_ptr<std::vector<const DexFile*>>();
-  }
-
-  return ret;
+  return env->ExceptionCheck() != JNI_TRUE;
 }
 
-static jlongArray ConvertNativeToJavaArray(JNIEnv* env,
-                                           std::vector<std::unique_ptr<const DexFile>>& vec) {
-  size_t vec_size = vec.size();
-  jlongArray long_array = env->NewLongArray(static_cast<jsize>(vec_size));
+static jlongArray ConvertDexFilesToJavaArray(JNIEnv* env,
+                                             const OatFile* oat_file,
+                                             std::vector<std::unique_ptr<const DexFile>>& vec) {
+  // Add one for the oat file.
+  jlongArray long_array = env->NewLongArray(static_cast<jsize>(kDexFileIndexStart + vec.size()));
   if (env->ExceptionCheck() == JNI_TRUE) {
     return nullptr;
   }
@@ -84,10 +85,9 @@
     return nullptr;
   }
 
-  jlong* tmp = long_data;
-  for (auto& dex_file : vec) {
-    *tmp = reinterpret_cast<uintptr_t>(dex_file.get());
-    tmp++;
+  long_data[kOatFileIndex] = reinterpret_cast<uintptr_t>(oat_file);
+  for (size_t i = 0; i < vec.size(); ++i) {
+    long_data[kDexFileIndexStart + i] = reinterpret_cast<uintptr_t>(vec[i].get());
   }
 
   env->ReleaseLongArrayElements(long_array, long_data, 0);
@@ -160,14 +160,19 @@
     return 0;
   }
 
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* linker = runtime->GetClassLinker();
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::vector<std::string> error_msgs;
+  const OatFile* oat_file = nullptr;
 
-  dex_files = linker->OpenDexFilesFromOat(sourceName.c_str(), outputName.c_str(), &error_msgs);
+  dex_files = runtime->GetOatFileManager().OpenDexFilesFromOat(sourceName.c_str(),
+                                                               outputName.c_str(),
+                                                               /*out*/ &oat_file,
+                                                               /*out*/ &error_msgs);
 
   if (!dex_files.empty()) {
-    jlongArray array = ConvertNativeToJavaArray(env, dex_files);
+    jlongArray array = ConvertDexFilesToJavaArray(env, oat_file, dex_files);
     if (array == nullptr) {
       ScopedObjectAccess soa(env);
       for (auto& dex_file : dex_files) {
@@ -193,43 +198,56 @@
 }
 
 static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) {
-  ScopedObjectAccess soa(env);
-  mirror::Object* dex_files_object = soa.Decode<mirror::Object*>(cookie);
-  if (dex_files_object == nullptr) {
-    ThrowNullPointerException("cookie == null");
+  std::vector<const DexFile*> dex_files;
+  const OatFile* oat_file;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, dex_files, oat_file)) {
+    Thread::Current()->AssertPendingException();
     return JNI_FALSE;
   }
-  mirror::LongArray* dex_files = dex_files_object->AsLongArray();
-
-  // Delete dex files associated with this dalvik.system.DexFile since there should not be running
-  // code using it. dex_files is a vector due to multidex.
-  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  Runtime* const runtime = Runtime::Current();
   bool all_deleted = true;
-  for (int32_t i = 0, count = dex_files->GetLength(); i < count; ++i) {
-    auto* dex_file = reinterpret_cast<DexFile*>(dex_files->Get(i));
-    if (dex_file == nullptr) {
-      continue;
-    }
-    // Only delete the dex file if the dex cache is not found to prevent runtime crashes if there
-    // are calls to DexFile.close while the ART DexFile is still in use.
-    if (class_linker->FindDexCache(soa.Self(), *dex_file, true) == nullptr) {
-      // Clear the element in the array so that we can call close again.
-      dex_files->Set(i, 0);
-      delete dex_file;
-    } else {
-      all_deleted = false;
+  {
+    ScopedObjectAccess soa(env);
+    mirror::Object* dex_files_object = soa.Decode<mirror::Object*>(cookie);
+    mirror::LongArray* long_dex_files = dex_files_object->AsLongArray();
+    // Delete dex files associated with this dalvik.system.DexFile since there should not be running
+    // code using it. dex_files is a vector due to multidex.
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    int32_t i = kDexFileIndexStart;  // Oat file is at index 0.
+    for (const DexFile* dex_file : dex_files) {
+      if (dex_file != nullptr) {
+        // Only delete the dex file if the dex cache is not found to prevent runtime crashes if there
+        // are calls to DexFile.close while the ART DexFile is still in use.
+        if (class_linker->FindDexCache(soa.Self(), *dex_file, true) == nullptr) {
+          // Clear the element in the array so that we can call close again.
+          long_dex_files->Set(i, 0);
+          delete dex_file;
+        } else {
+          all_deleted = false;
+        }
+      }
+      ++i;
     }
   }
 
-  // TODO: Also unmap the OatFile for this dalvik.system.DexFile.
-
+  // oat_file can be null if we are running without dex2oat.
+  if (all_deleted && oat_file != nullptr) {
+    // If all of the dex files are no longer in use we can unmap the corresponding oat file.
+    VLOG(class_linker) << "Unregistering " << oat_file;
+    runtime->GetOatFileManager().UnRegisterAndDeleteOatFile(oat_file);
+  }
   return all_deleted ? JNI_TRUE : JNI_FALSE;
 }
 
-static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
-                                        jobject cookie) {
-  std::unique_ptr<std::vector<const DexFile*>> dex_files = ConvertJavaArrayToNative(env, cookie);
-  if (dex_files.get() == nullptr) {
+static jclass DexFile_defineClassNative(JNIEnv* env,
+                                        jclass,
+                                        jstring javaName,
+                                        jobject javaLoader,
+                                        jobject cookie,
+                                        jobject dexFile) {
+  std::vector<const DexFile*> dex_files;
+  const OatFile* oat_file;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, /*out*/ dex_files, /*out*/ oat_file)) {
     VLOG(class_linker) << "Failed to find dex_file";
     DCHECK(env->ExceptionCheck());
     return nullptr;
@@ -242,17 +260,27 @@
   }
   const std::string descriptor(DotToDescriptor(class_name.c_str()));
   const size_t hash(ComputeModifiedUtf8Hash(descriptor.c_str()));
-  for (auto& dex_file : *dex_files) {
+  for (auto& dex_file : dex_files) {
     const DexFile::ClassDef* dex_class_def = dex_file->FindClassDef(descriptor.c_str(), hash);
     if (dex_class_def != nullptr) {
       ScopedObjectAccess soa(env);
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      class_linker->RegisterDexFile(*dex_file);
       StackHandleScope<1> hs(soa.Self());
       Handle<mirror::ClassLoader> class_loader(
           hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
-      mirror::Class* result = class_linker->DefineClass(soa.Self(), descriptor.c_str(), hash,
-                                                        class_loader, *dex_file, *dex_class_def);
+      class_linker->RegisterDexFile(
+          *dex_file,
+          class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()));
+      mirror::Class* result = class_linker->DefineClass(soa.Self(),
+                                                        descriptor.c_str(),
+                                                        hash,
+                                                        class_loader,
+                                                        *dex_file,
+                                                        *dex_class_def);
+      // Add the used dex file. This only required for the DexFile.loadClass API since normal
+      // class loaders already keep their dex files live.
+      class_linker->InsertDexFileInToClassLoader(soa.Decode<mirror::Object*>(dexFile),
+                                                 class_loader.Get());
       if (result != nullptr) {
         VLOG(class_linker) << "DexFile_defineClassNative returning " << result
                            << " for " << class_name.c_str();
@@ -273,8 +301,9 @@
 
 // Note: this can be an expensive call, as we sort out duplicates in MultiDex files.
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jobject cookie) {
-  std::unique_ptr<std::vector<const DexFile*>> dex_files = ConvertJavaArrayToNative(env, cookie);
-  if (dex_files.get() == nullptr) {
+  const OatFile* oat_file = nullptr;
+  std::vector<const DexFile*> dex_files;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, /*out */ dex_files, /* out */ oat_file)) {
     DCHECK(env->ExceptionCheck());
     return nullptr;
   }
@@ -282,7 +311,7 @@
   // Push all class descriptors into a set. Use set instead of unordered_set as we want to
   // retrieve all in the end.
   std::set<const char*, CharPointerComparator> descriptors;
-  for (auto& dex_file : *dex_files) {
+  for (auto& dex_file : dex_files) {
     for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
       const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
       const char* descriptor = dex_file->GetClassDescriptor(class_def);
@@ -291,7 +320,8 @@
   }
 
   // Now create output array and copy the set into it.
-  jobjectArray result = env->NewObjectArray(descriptors.size(), WellKnownClasses::java_lang_String,
+  jobjectArray result = env->NewObjectArray(descriptors.size(),
+                                            WellKnownClasses::java_lang_String,
                                             nullptr);
   if (result != nullptr) {
     auto it = descriptors.begin();
@@ -309,9 +339,11 @@
   return result;
 }
 
-static jint GetDexOptNeeded(JNIEnv* env, const char* filename,
-    const char* pkgname, const char* instruction_set, const jboolean defer) {
-
+static jint GetDexOptNeeded(JNIEnv* env,
+                            const char* filename,
+                            const char* pkgname,
+                            const char* instruction_set,
+                            const jboolean defer) {
   if ((filename == nullptr) || !OS::FileExists(filename)) {
     LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
@@ -361,8 +393,12 @@
   return oat_file_assistant.GetDexOptNeeded();
 }
 
-static jint DexFile_getDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename,
-    jstring javaPkgname, jstring javaInstructionSet, jboolean defer) {
+static jint DexFile_getDexOptNeeded(JNIEnv* env,
+                                    jclass,
+                                    jstring javaFilename,
+                                    jstring javaPkgname,
+                                    jstring javaInstructionSet,
+                                    jboolean defer) {
   ScopedUtfChars filename(env, javaFilename);
   if (env->ExceptionCheck()) {
     return 0;
@@ -375,8 +411,11 @@
     return 0;
   }
 
-  return GetDexOptNeeded(env, filename.c_str(), pkgname.c_str(),
-                         instruction_set.c_str(), defer);
+  return GetDexOptNeeded(env,
+                         filename.c_str(),
+                         pkgname.c_str(),
+                         instruction_set.c_str(),
+                         defer);
 }
 
 // public API, null pkgname
@@ -390,8 +429,13 @@
 
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"),
-  NATIVE_METHOD(DexFile, defineClassNative,
-                "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"),
+  NATIVE_METHOD(DexFile,
+                defineClassNative,
+                "(Ljava/lang/String;"
+                "Ljava/lang/ClassLoader;"
+                "Ljava/lang/Object;"
+                "Ldalvik/system/DexFile;"
+                ")Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(DexFile, getDexOptNeeded,
diff --git a/runtime/native/dalvik_system_DexFile.h b/runtime/native/dalvik_system_DexFile.h
index 7585ab9..77d219d 100644
--- a/runtime/native/dalvik_system_DexFile.h
+++ b/runtime/native/dalvik_system_DexFile.h
@@ -18,9 +18,13 @@
 #define ART_RUNTIME_NATIVE_DALVIK_SYSTEM_DEXFILE_H_
 
 #include <jni.h>
+#include <unistd.h>
 
 namespace art {
 
+constexpr size_t kOatFileIndex = 0;
+constexpr size_t kDexFileIndexStart = 1;
+
 class DexFile;
 
 void register_dalvik_system_DexFile(JNIEnv* env);
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4f95723..4c5dc3a 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -497,7 +497,8 @@
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != nullptr);
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->RegisterDexFile(*dex_file)));
+    Handle<mirror::DexCache> dex_cache(
+        hs.NewHandle(linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc())));
 
     if (kPreloadDexCachesStrings) {
       for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 5da15df..5e42392 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -190,7 +190,7 @@
     return nullptr;
   }
   size_t low = 0;
-  size_t high = fields->Length();
+  size_t high = fields->size();
   const uint16_t* const data = name->GetValue();
   const size_t length = name->GetLength();
   while (low < high) {
@@ -522,6 +522,10 @@
   }
   if (classes == nullptr) {
     // Return an empty array instead of a null pointer.
+    if (soa.Self()->IsExceptionPending()) {
+      // Pending exception from GetDeclaredClasses.
+      return nullptr;
+    }
     mirror::Class* class_class = mirror::Class::GetJavaLangClass();
     mirror::Class* class_array_class =
         Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 5725b6f..40aca0d 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -134,6 +134,9 @@
   if (!IsAligned<kPageSize>(image_patch_delta_)) {
     return false;
   }
+  if (!IsValidInstructionSet(instruction_set_)) {
+    return false;
+  }
   return true;
 }
 
@@ -156,6 +159,9 @@
   if (!IsAligned<kPageSize>(image_patch_delta_)) {
     return "Image patch delta not page-aligned.";
   }
+  if (!IsValidInstructionSet(instruction_set_)) {
+    return StringPrintf("Invalid instruction set, %d.", static_cast<int>(instruction_set_));
+  }
   return "";
 }
 
@@ -477,15 +483,4 @@
 
 OatMethodOffsets::~OatMethodOffsets() {}
 
-OatQuickMethodHeader::OatQuickMethodHeader(
-    uint32_t mapping_table_offset, uint32_t vmap_table_offset, uint32_t gc_map_offset,
-    uint32_t frame_size_in_bytes, uint32_t core_spill_mask, uint32_t fp_spill_mask,
-    uint32_t code_size)
-    : mapping_table_offset_(mapping_table_offset), vmap_table_offset_(vmap_table_offset),
-      gc_map_offset_(gc_map_offset),
-      frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask), code_size_(code_size) {
-}
-
-OatQuickMethodHeader::~OatQuickMethodHeader() {}
-
 }  // namespace art
diff --git a/runtime/oat.h b/runtime/oat.h
index 24acbc8..5b780c3 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -22,7 +22,6 @@
 #include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "dex_file.h"
-#include "quick/quick_method_frame_info.h"
 #include "safe_map.h"
 
 namespace art {
@@ -32,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '1', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '3', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
@@ -170,30 +169,6 @@
   uint32_t code_offset_;
 };
 
-// OatQuickMethodHeader precedes the raw code chunk generated by the Quick compiler.
-class PACKED(4) OatQuickMethodHeader {
- public:
-  OatQuickMethodHeader(uint32_t mapping_table_offset = 0U, uint32_t vmap_table_offset = 0U,
-                       uint32_t gc_map_offset = 0U, uint32_t frame_size_in_bytes = 0U,
-                       uint32_t core_spill_mask = 0U, uint32_t fp_spill_mask = 0U,
-                       uint32_t code_size = 0U);
-
-  ~OatQuickMethodHeader();
-
-  OatQuickMethodHeader& operator=(const OatQuickMethodHeader&) = default;
-
-  // The offset in bytes from the start of the mapping table to the end of the header.
-  uint32_t mapping_table_offset_;
-  // The offset in bytes from the start of the vmap table to the end of the header.
-  uint32_t vmap_table_offset_;
-  // The offset in bytes from the start of the gc map to the end of the header.
-  uint32_t gc_map_offset_;
-  // The stack frame information.
-  QuickMethodFrameInfo frame_info_;
-  // The code size in bytes.
-  uint32_t code_size_;
-};
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_OAT_H_
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index 5df6525..7b92120 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -18,11 +18,12 @@
 #define ART_RUNTIME_OAT_FILE_INL_H_
 
 #include "oat_file.h"
+#include "oat_quick_method_header.h"
 
 namespace art {
 
 inline const OatQuickMethodHeader* OatFile::OatMethod::GetOatQuickMethodHeader() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -47,7 +48,7 @@
 }
 
 inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = EntryPointToCodePointer(GetQuickCode());
   if (code == nullptr) {
     return 0u;
   }
@@ -55,7 +56,7 @@
 }
 
 inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = EntryPointToCodePointer(GetQuickCode());
   if (code == nullptr) {
     return 0u;
   }
@@ -63,7 +64,7 @@
 }
 
 inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetQuickCode());
+  const void* code = EntryPointToCodePointer(GetQuickCode());
   if (code == nullptr) {
     return 0u;
   }
@@ -71,7 +72,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetGcMap() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -122,7 +123,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetMappingTable() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -134,7 +135,7 @@
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return nullptr;
   }
@@ -146,7 +147,7 @@
 }
 
 inline uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
-  const void* code = ArtMethod::EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
+  const void* code = EntryPointToCodePointer(GetOatPointer<const void*>(code_offset_));
   if (code == nullptr) {
     return 0u;
   }
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index a4a159e..680f4ac 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -18,6 +18,7 @@
 
 #include <dlfcn.h>
 #include <string.h>
+#include <type_traits>
 #include <unistd.h>
 
 #include <cstdlib>
@@ -42,9 +43,11 @@
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "oat_file-inl.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
 #include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 #include "vmap_table.h"
 
 namespace art {
@@ -115,15 +118,35 @@
   // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
   //       !executable is a sign that we may want to patch), which may not be allowed for
   //       various reasons.
-  if (kUseDlopen && (kIsTargetBuild || kUseDlopenOnHost) && executable) {
-    // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-    // this will register the oat file with the linker and allows libunwind to find our info.
-    ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-    if (ret.get() != nullptr) {
-      return ret.release();
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  Runtime* const runtime = Runtime::Current();
+  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
+  if (kUseDlopen && executable) {
+    bool success = kIsTargetBuild;
+    bool reserved_location = false;
+      // Manager may be null if we are running without a runtime.
+    if (!success && kUseDlopenOnHost && manager != nullptr) {
+      // RegisterOatFileLocation returns false if we are not the first caller to register that
+      // location.
+      reserved_location = manager->RegisterOatFileLocation(location);
+      success = reserved_location;
     }
-    if (kPrintDlOpenErrorMessage) {
-      LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+    if (success) {
+      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
+      // this will register the oat file with the linker and allows libunwind to find our info.
+      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
+      if (reserved_location) {
+        manager->UnRegisterOatFileLocation(location);
+      }
+      if (ret != nullptr) {
+        return ret.release();
+      }
+      if (kPrintDlOpenErrorMessage) {
+        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+      }
     }
   }
 
@@ -204,6 +227,10 @@
       is_executable_(is_executable), dlopen_handle_(nullptr),
       secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
   CHECK(!location_.empty());
+  Runtime* const runtime = Runtime::Current();
+  if (runtime != nullptr && !runtime->IsAotCompiler()) {
+    runtime->GetOatFileManager().RegisterOatFileLocation(location);
+  }
 }
 
 OatFile::~OatFile() {
@@ -211,6 +238,10 @@
   if (dlopen_handle_ != nullptr) {
     dlclose(dlopen_handle_);
   }
+  Runtime* const runtime = Runtime::Current();
+  if (runtime != nullptr && !runtime->IsAotCompiler()) {
+    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
+  }
 }
 
 bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
@@ -218,10 +249,7 @@
 #ifdef __APPLE__
   // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
   // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename);
-  UNUSED(requested_base);
-  UNUSED(abs_dex_location);
-  UNUSED(error_msg);
+  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
   return false;
 #else
   {
@@ -361,13 +389,13 @@
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = elf_file_->FindDynamicSymbolAddress("oatbss");
+  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
   if (bss_begin_ == nullptr) {
     // No .bss section. Clear dlerror().
     bss_end_ = nullptr;
     dlerror();
   } else {
-    bss_end_ = elf_file_->FindDynamicSymbolAddress("oatbsslastword");
+    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
     if (bss_end_ == nullptr) {
       *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
                                 file->GetPath().c_str());
@@ -380,10 +408,31 @@
   return Setup(abs_dex_location, error_msg);
 }
 
+// Read an unaligned entry from the OatDexFile data in OatFile and advance the read
+// position by the number of bytes read, i.e. sizeof(T).
+// Return true on success, false if the read would go beyond the end of the OatFile.
+template <typename T>
+inline static bool ReadOatDexFileData(const OatFile& oat_file,
+                                      /*inout*/const uint8_t** oat,
+                                      /*out*/T* value) {
+  DCHECK(oat != nullptr);
+  DCHECK(value != nullptr);
+  DCHECK_LE(*oat, oat_file.End());
+  if (UNLIKELY(static_cast<size_t>(oat_file.End() - *oat) < sizeof(T))) {
+    return false;
+  }
+  static_assert(std::is_trivial<T>::value, "T must be a trivial type");
+  typedef __attribute__((__aligned__(1))) T unaligned_type;
+  *value = *reinterpret_cast<const unaligned_type*>(*oat);
+  *oat += sizeof(T);
+  return true;
+}
+
 bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
-    *error_msg = StringPrintf("Invalid oat header for '%s': %s", GetLocation().c_str(),
+    *error_msg = StringPrintf("Invalid oat header for '%s': %s",
+                              GetLocation().c_str(),
                               cause.c_str());
     return false;
   }
@@ -397,33 +446,42 @@
   oat += GetOatHeader().GetKeyValueStoreSize();
   if (oat > End()) {
     *error_msg = StringPrintf("In oat file '%s' found truncated variable-size data: "
-                              "%p + %zd + %ud <= %p", GetLocation().c_str(),
-                              Begin(), sizeof(OatHeader), GetOatHeader().GetKeyValueStoreSize(),
+                                  "%p + %zu + %u <= %p",
+                              GetLocation().c_str(),
+                              Begin(),
+                              sizeof(OatHeader),
+                              GetOatHeader().GetKeyValueStoreSize(),
                               End());
     return false;
   }
 
+  size_t pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
+  uint8_t* dex_cache_arrays = bss_begin_;
   uint32_t dex_file_count = GetOatHeader().GetDexFileCount();
   oat_dex_files_storage_.reserve(dex_file_count);
   for (size_t i = 0; i < dex_file_count; i++) {
-    uint32_t dex_file_location_size = *reinterpret_cast<const uint32_t*>(oat);
-    if (UNLIKELY(dex_file_location_size == 0U)) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd with empty location name",
-                                GetLocation().c_str(), i);
+    uint32_t dex_file_location_size;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_location_size))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu truncated after dex file "
+                                    "location size",
+                                GetLocation().c_str(),
+                                i);
       return false;
     }
-    oat += sizeof(dex_file_location_size);
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd truncated after dex file "
-                                "location size", GetLocation().c_str(), i);
+    if (UNLIKELY(dex_file_location_size == 0U)) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with empty location name",
+                                GetLocation().c_str(),
+                                i);
       return false;
     }
 
     const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
     oat += dex_file_location_size;
     if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd with truncated dex file "
-                                "location", GetLocation().c_str(), i);
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file "
+                                    "location",
+                                GetLocation().c_str(),
+                                i);
       return false;
     }
 
@@ -431,59 +489,114 @@
         abs_dex_location,
         std::string(dex_file_location_data, dex_file_location_size));
 
-    uint32_t dex_file_checksum = *reinterpret_cast<const uint32_t*>(oat);
-    oat += sizeof(dex_file_checksum);
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated after "
-                                "dex file checksum", GetLocation().c_str(), i,
+    uint32_t dex_file_checksum;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_checksum))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated after "
+                                    "dex file checksum",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
 
-    uint32_t dex_file_offset = *reinterpret_cast<const uint32_t*>(oat);
+    uint32_t dex_file_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated "
+                                    "after dex file offsets",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str());
+      return false;
+    }
     if (UNLIKELY(dex_file_offset == 0U)) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with zero dex "
-                                "file offset", GetLocation().c_str(), i, dex_file_location.c_str());
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with zero dex "
+                                    "file offset",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str());
       return false;
     }
     if (UNLIKELY(dex_file_offset > Size())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with dex file "
-                                "offset %ud > %zd", GetLocation().c_str(), i,
-                                dex_file_location.c_str(), dex_file_offset, Size());
-      return false;
-    }
-    oat += sizeof(dex_file_offset);
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
-                                "after dex file offsets", GetLocation().c_str(), i,
-                                dex_file_location.c_str());
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u > %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                Size());
       return false;
     }
 
     const uint8_t* dex_file_pointer = Begin() + dex_file_offset;
     if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with invalid "
-                                "dex file magic '%s'", GetLocation().c_str(), i,
-                                dex_file_location.c_str(), dex_file_pointer);
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with invalid "
+                                    "dex file magic '%s'",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_pointer);
       return false;
     }
     if (UNLIKELY(!DexFile::IsVersionValid(dex_file_pointer))) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with invalid "
-                                "dex file version '%s'", GetLocation().c_str(), i,
-                                dex_file_location.c_str(), dex_file_pointer);
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with invalid "
+                                    "dex file version '%s'",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_pointer);
       return false;
     }
     const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer);
+
+    if (UNLIKELY(oat > End())) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
+                                "lookup table offset", GetLocation().c_str(), i,
+                                dex_file_location.c_str());
+      return false;
+    }
+    uint32_t lookup_table_offset = *reinterpret_cast<const uint32_t*>(oat);
+    oat += sizeof(lookup_table_offset);
+    if (Begin() + lookup_table_offset > End()) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
+                                "lookup table", GetLocation().c_str(), i,
+                                dex_file_location.c_str());
+      return false;
+    }
+    const uint8_t* lookup_table_data = lookup_table_offset != 0u
+        ? Begin() + lookup_table_offset
+        : nullptr;
+
     const uint32_t* methods_offsets_pointer = reinterpret_cast<const uint32_t*>(oat);
 
     oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_);
     if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "method offsets", GetLocation().c_str(), i,
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "method offsets",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
 
+    uint8_t* current_dex_cache_arrays = nullptr;
+    if (dex_cache_arrays != nullptr) {
+      DexCacheArraysLayout layout(pointer_size, *header);
+      if (layout.Size() != 0u) {
+        if (static_cast<size_t>(bss_end_ - dex_cache_arrays) < layout.Size()) {
+          *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with "
+                                        "truncated dex cache arrays, %zu < %zu.",
+                                    GetLocation().c_str(),
+                                    i,
+                                    dex_file_location.c_str(),
+                                    static_cast<size_t>(bss_end_ - dex_cache_arrays),
+                                    layout.Size());
+          return false;
+        }
+        current_dex_cache_arrays = dex_cache_arrays;
+        dex_cache_arrays += layout.Size();
+      }
+    }
+
     std::string canonical_location = DexFile::GetDexCanonicalLocation(dex_file_location.c_str());
 
     // Create the OatDexFile and add it to the owning container.
@@ -492,7 +605,9 @@
                                               canonical_location,
                                               dex_file_checksum,
                                               dex_file_pointer,
-                                              methods_offsets_pointer);
+                                              lookup_table_data,
+                                              methods_offsets_pointer,
+                                              current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
     // Add the location and canonical location (if different) to the oat_dex_files_ table.
@@ -503,6 +618,15 @@
       oat_dex_files_.Put(canonical_key, oat_dex_file);
     }
   }
+
+  if (dex_cache_arrays != bss_end_) {
+    // We expect the bss section to be either empty (dex_cache_arrays and bss_end_
+    // both null) or contain just the dex cache arrays and nothing else.
+    *error_msg = StringPrintf("In oat file '%s' found unexpected bss size bigger by %zu bytes.",
+                              GetLocation().c_str(),
+                              static_cast<size_t>(bss_end_ - dex_cache_arrays));
+    return false;
+  }
   return true;
 }
 
@@ -605,13 +729,17 @@
                                 const std::string& canonical_dex_file_location,
                                 uint32_t dex_file_location_checksum,
                                 const uint8_t* dex_file_pointer,
-                                const uint32_t* oat_class_offsets_pointer)
+                                const uint8_t* lookup_table_data,
+                                const uint32_t* oat_class_offsets_pointer,
+                                uint8_t* dex_cache_arrays)
     : oat_file_(oat_file),
       dex_file_location_(dex_file_location),
       canonical_dex_file_location_(canonical_dex_file_location),
       dex_file_location_checksum_(dex_file_location_checksum),
       dex_file_pointer_(dex_file_pointer),
-      oat_class_offsets_pointer_(oat_class_offsets_pointer) {}
+      lookup_table_data_(lookup_table_data),
+      oat_class_offsets_pointer_(oat_class_offsets_pointer),
+      dex_cache_arrays_(dex_cache_arrays) {}
 
 OatFile::OatDexFile::~OatDexFile() {}
 
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 27f8677..0a77654 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -29,6 +29,7 @@
 #include "mirror/class.h"
 #include "oat.h"
 #include "os.h"
+#include "utils.h"
 
 namespace art {
 
@@ -300,10 +301,10 @@
   const uint8_t* end_;
 
   // Pointer to the .bss section, if present, otherwise null.
-  const uint8_t* bss_begin_;
+  uint8_t* bss_begin_;
 
   // Pointer to the end of the .bss section, if present, otherwise null.
-  const uint8_t* bss_end_;
+  uint8_t* bss_end_;
 
   // Was this oat_file loaded executable?
   const bool is_executable_;
@@ -395,6 +396,14 @@
   // Returns the offset to the OatClass information. Most callers should use GetOatClass.
   uint32_t GetOatClassOffset(uint16_t class_def_index) const;
 
+  uint8_t* GetDexCacheArrays() const {
+    return dex_cache_arrays_;
+  }
+
+  const uint8_t* GetLookupTableData() const {
+    return lookup_table_data_;
+  }
+
   ~OatDexFile();
 
  private:
@@ -403,14 +412,18 @@
              const std::string& canonical_dex_file_location,
              uint32_t dex_file_checksum,
              const uint8_t* dex_file_pointer,
-             const uint32_t* oat_class_offsets_pointer);
+             const uint8_t* lookup_table_data,
+             const uint32_t* oat_class_offsets_pointer,
+             uint8_t* dex_cache_arrays);
 
   const OatFile* const oat_file_;
   const std::string dex_file_location_;
   const std::string canonical_dex_file_location_;
   const uint32_t dex_file_location_checksum_;
   const uint8_t* const dex_file_pointer_;
+  const uint8_t* lookup_table_data_;
   const uint32_t* const oat_class_offsets_pointer_;
+  uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 29b879e..99080f6 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -65,8 +65,10 @@
                                    const InstructionSet isa,
                                    bool load_executable,
                                    const char* package_name)
-    : dex_location_(dex_location), isa_(isa),
-      package_name_(package_name), load_executable_(load_executable) {
+    : isa_(isa), package_name_(package_name), load_executable_(load_executable) {
+  CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
+  dex_location_.assign(dex_location);
+
   if (load_executable_ && isa != kRuntimeISA) {
     LOG(WARNING) << "OatFileAssistant: Load executable specified, "
       << "but isa is not kRuntimeISA. Will not attempt to load executable.";
@@ -110,7 +112,7 @@
   ClassLinker* class_linker = runtime->GetClassLinker();
   const auto& boot_class_path = class_linker->GetBootClassPath();
   for (size_t i = 0; i < boot_class_path.size(); i++) {
-    if (boot_class_path[i]->GetLocation() == std::string(dex_location_)) {
+    if (boot_class_path[i]->GetLocation() == dex_location_) {
       VLOG(oat) << "Dex location " << dex_location_ << " is in boot class path";
       return true;
     }
@@ -266,7 +268,6 @@
 
 const std::string* OatFileAssistant::OdexFileName() {
   if (!cached_odex_file_name_attempted_) {
-    CHECK(dex_location_ != nullptr) << "OatFileAssistant: null dex location";
     cached_odex_file_name_attempted_ = true;
 
     std::string error_msg;
@@ -330,15 +331,13 @@
     cached_oat_file_name_attempted_ = true;
 
     // Compute the oat file name from the dex location.
-    CHECK(dex_location_ != nullptr) << "OatFileAssistant: null dex location";
-
     // TODO: The oat file assistant should be the definitive place for
     // determining the oat file name from the dex location, not
     // GetDalvikCacheFilename.
     std::string cache_dir = StringPrintf("%s%s",
         DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
     std::string error_msg;
-    cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_,
+    cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_.c_str(),
         cache_dir.c_str(), &cached_oat_file_name_, &error_msg);
     if (!cached_oat_file_name_found_) {
       // If we can't determine the oat file name, we treat the oat file as
@@ -413,7 +412,7 @@
   // what we provide, which verifies the primary dex checksum for us.
   const uint32_t* dex_checksum_pointer = GetRequiredDexChecksum();
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
-      dex_location_, dex_checksum_pointer, false);
+      dex_location_.c_str(), dex_checksum_pointer, false);
   if (oat_dex_file == nullptr) {
     return true;
   }
@@ -421,7 +420,7 @@
   // Verify the dex checksums for any secondary multidex files
   for (size_t i = 1; ; i++) {
     std::string secondary_dex_location
-      = DexFile::GetMultiDexLocation(i, dex_location_);
+      = DexFile::GetMultiDexLocation(i, dex_location_.c_str());
     const OatFile::OatDexFile* secondary_oat_dex_file
       = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
     if (secondary_oat_dex_file == nullptr) {
@@ -613,16 +612,14 @@
   CHECK(error_msg != nullptr);
 
   if (input_file == nullptr) {
-    *error_msg = "Patching of oat file for dex location "
-      + std::string(dex_location_)
+    *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the input file name could not be determined.";
     return false;
   }
   const std::string& input_file_name = *input_file;
 
   if (OatFileName() == nullptr) {
-    *error_msg = "Patching of oat file for dex location "
-      + std::string(dex_location_)
+    *error_msg = "Patching of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
     return false;
   }
@@ -665,36 +662,58 @@
 bool OatFileAssistant::GenerateOatFile(std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
+  Runtime* runtime = Runtime::Current();
+  if (!runtime->IsDex2OatEnabled()) {
+    *error_msg = "Generation of oat file for dex location " + dex_location_
+      + " not attempted because dex2oat is disabled.";
+    return false;
+  }
+
   if (OatFileName() == nullptr) {
-    *error_msg = "Generation of oat file for dex location "
-      + std::string(dex_location_)
+    *error_msg = "Generation of oat file for dex location " + dex_location_
       + " not attempted because the oat file name could not be determined.";
     return false;
   }
   const std::string& oat_file_name = *OatFileName();
 
-  Runtime* runtime = Runtime::Current();
-  if (!runtime->IsDex2OatEnabled()) {
+  // dex2oat ignores missing dex files and doesn't report an error.
+  // Check explicitly here so we can detect the error properly.
+  // TODO: Why does dex2oat behave that way?
+  if (!OS::FileExists(dex_location_.c_str())) {
+    *error_msg = "Dex location " + dex_location_ + " does not exists.";
+    return false;
+  }
+
+  std::unique_ptr<File> oat_file;
+  oat_file.reset(OS::CreateEmptyFile(oat_file_name.c_str()));
+  if (oat_file.get() == nullptr) {
     *error_msg = "Generation of oat file " + oat_file_name
-      + " not attempted because dex2oat is disabled";
+      + " not attempted because the oat file could not be created.";
+    return false;
+  }
+
+  if (fchmod(oat_file->Fd(), 0644) != 0) {
+    *error_msg = "Generation of oat file " + oat_file_name
+      + " not attempted because the oat file could not be made world readable.";
+    oat_file->Erase();
     return false;
   }
 
   std::vector<std::string> args;
-  args.push_back("--dex-file=" + std::string(dex_location_));
-  args.push_back("--oat-file=" + oat_file_name);
-
-  // dex2oat ignores missing dex files and doesn't report an error.
-  // Check explicitly here so we can detect the error properly.
-  // TODO: Why does dex2oat behave that way?
-  if (!OS::FileExists(dex_location_)) {
-    *error_msg = "Dex location " + std::string(dex_location_) + " does not exists.";
-    return false;
-  }
+  args.push_back("--dex-file=" + dex_location_);
+  args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
+  args.push_back("--oat-location=" + oat_file_name);
 
   if (!Dex2Oat(args, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
+    oat_file->Erase();
+    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
+    return false;
+  }
+
+  if (oat_file->FlushCloseOrErase() != 0) {
+    *error_msg = "Unable to close oat file " + oat_file_name;
     TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
     return false;
   }
@@ -839,8 +858,7 @@
     required_dex_checksum_attempted_ = true;
     required_dex_checksum_found_ = false;
     std::string error_msg;
-    CHECK(dex_location_ != nullptr) << "OatFileAssistant provided no dex location";
-    if (DexFile::GetChecksum(dex_location_, &cached_required_dex_checksum_, &error_msg)) {
+    if (DexFile::GetChecksum(dex_location_.c_str(), &cached_required_dex_checksum_, &error_msg)) {
       required_dex_checksum_found_ = true;
       has_original_dex_files_ = true;
     } else {
@@ -853,7 +871,7 @@
       const OatFile* odex_file = GetOdexFile();
       if (odex_file != nullptr) {
         const OatFile::OatDexFile* odex_dex_file = odex_file->GetOatDexFile(
-            dex_location_, nullptr, false);
+            dex_location_.c_str(), nullptr, false);
         if (odex_dex_file != nullptr) {
           cached_required_dex_checksum_ = odex_dex_file->GetDexFileLocationChecksum();
           required_dex_checksum_found_ = true;
@@ -873,7 +891,7 @@
       std::string error_msg;
       cached_odex_file_.reset(OatFile::Open(odex_file_name.c_str(),
             odex_file_name.c_str(), nullptr, nullptr, load_executable_,
-            dex_location_, &error_msg));
+            dex_location_.c_str(), &error_msg));
       if (cached_odex_file_.get() == nullptr) {
         VLOG(oat) << "OatFileAssistant test for existing pre-compiled oat file "
           << odex_file_name << ": " << error_msg;
@@ -904,7 +922,7 @@
       std::string error_msg;
       cached_oat_file_.reset(OatFile::Open(oat_file_name.c_str(),
             oat_file_name.c_str(), nullptr, nullptr, load_executable_,
-            dex_location_, &error_msg));
+            dex_location_.c_str(), &error_msg));
       if (cached_oat_file_.get() == nullptr) {
         VLOG(oat) << "OatFileAssistant test for existing oat file "
           << oat_file_name << ": " << error_msg;
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 664db98..f781532 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -369,9 +369,7 @@
   // remaining lifetime of the OatFileAssistant object.
   ScopedFlock flock_;
 
-  // In a properly constructed OatFileAssistant object, dex_location_ should
-  // never be null.
-  const char* dex_location_ = nullptr;
+  std::string dex_location_;
 
   // In a properly constructed OatFileAssistant object, isa_ should be either
   // the 32 or 64 bit variant for the current device.
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 20347a9..c54d7f8 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -31,6 +31,7 @@
 #include "compiler_callbacks.h"
 #include "gc/space/image_space.h"
 #include "mem_map.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
@@ -848,6 +849,38 @@
   EXPECT_FALSE(ofm.OatFileExists());
 }
 
+// Case: We have a DEX file but can't write the oat file.
+// Expect: We should fail to make the oat file up to date.
+TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) {
+  std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar";
+
+  // Make the oat location unwritable by inserting some non-existent
+  // intermediate directories.
+  std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat";
+
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+  std::string error_msg;
+  ASSERT_FALSE(oat_file_assistant.MakeUpToDate(&error_msg));
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() == nullptr);
+}
+
+// Case: We don't have a DEX file and can't write the oat file.
+// Expect: We should fail to generate the oat file without crashing.
+TEST_F(OatFileAssistantTest, GenNoDex) {
+  std::string dex_location = GetScratchDir() + "/GenNoDex.jar";
+  std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
+
+  OatFileAssistant oat_file_assistant(
+      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+  std::string error_msg;
+  ASSERT_FALSE(oat_file_assistant.GenerateOatFile(&error_msg));
+}
+
 // Turn an absolute path into a path relative to the current working
 // directory.
 static std::string MakePathRelative(std::string target) {
@@ -953,18 +986,21 @@
       loaded_oat_file_(nullptr)
   {}
 
-  void Run(Thread* self) {
-    UNUSED(self);
-
+  void Run(Thread* self ATTRIBUTE_UNUSED) {
     // Load the dex files, and save a pointer to the loaded oat file, so that
     // we can verify only one oat file was loaded for the dex location.
-    ClassLinker* linker = Runtime::Current()->GetClassLinker();
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     std::vector<std::string> error_msgs;
-    dex_files = linker->OpenDexFilesFromOat(dex_location_.c_str(), oat_location_.c_str(), &error_msgs);
+    const OatFile* oat_file = nullptr;
+    dex_files = Runtime::Current()->GetOatFileManager().OpenDexFilesFromOat(
+        dex_location_.c_str(),
+        oat_location_.c_str(),
+        &oat_file,
+        &error_msgs);
     CHECK(!dex_files.empty()) << Join(error_msgs, '\n');
     CHECK(dex_files[0]->GetOatDexFile() != nullptr) << dex_files[0]->GetLocation();
     loaded_oat_file_ = dex_files[0]->GetOatDexFile()->GetOatFile();
+    CHECK_EQ(loaded_oat_file_, oat_file);
   }
 
   const OatFile* GetLoadedOatFile() const {
@@ -980,8 +1016,9 @@
 // Test the case where multiple processes race to generate an oat file.
 // This simulates multiple processes using multiple threads.
 //
-// We want only one Oat file to be loaded when there is a race to load, to
-// avoid using up the virtual memory address space.
+// We want unique Oat files to be loaded even when there is a race to load.
+// TODO: The test case no longer tests locking the way it was intended since we now get multiple
+// copies of the same Oat files mapped at different locations.
 TEST_F(OatFileAssistantTest, RaceToGenerate) {
   std::string dex_location = GetScratchDir() + "/RaceToGenerate.jar";
   std::string oat_location = GetOdexDir() + "/RaceToGenerate.oat";
@@ -1002,10 +1039,12 @@
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
 
-  // Verify every task got the same pointer.
-  const OatFile* expected = tasks[0]->GetLoadedOatFile();
+  // Verify every task got a unique oat file.
+  std::set<const OatFile*> oat_files;
   for (auto& task : tasks) {
-    EXPECT_EQ(expected, task->GetLoadedOatFile());
+    const OatFile* oat_file = task->GetLoadedOatFile();
+    EXPECT_TRUE(oat_files.find(oat_file) == oat_files.end());
+    oat_files.insert(oat_file);
   }
 }
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
new file mode 100644
index 0000000..9eee156
--- /dev/null
+++ b/runtime/oat_file_manager.cc
@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oat_file_manager.h"
+
+#include <memory>
+#include <queue>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "dex_file-inl.h"
+#include "gc/space/image_space.h"
+#include "oat_file_assistant.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// For b/21333911.
+// Only enabled for debug builds to prevent bit rot. There are too many performance regressions for
+// normal builds.
+static constexpr bool kDuplicateClassesCheck = kIsDebugBuild;
+
+const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
+  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  DCHECK(oat_file != nullptr);
+  if (kIsDebugBuild) {
+    CHECK(oat_files_.find(oat_file) == oat_files_.end());
+    for (const std::unique_ptr<const OatFile>& existing : oat_files_) {
+      CHECK_NE(oat_file.get(), existing.get()) << oat_file->GetLocation();
+      // Check that we don't have an oat file with the same address. Copies of the same oat file
+      // should be loaded at different addresses.
+      CHECK_NE(oat_file->Begin(), existing->Begin()) << "Oat file already mapped at that location";
+    }
+  }
+  have_non_pic_oat_file_ = have_non_pic_oat_file_ || !oat_file->IsPic();
+  const OatFile* ret = oat_file.get();
+  oat_files_.insert(std::move(oat_file));
+  return ret;
+}
+
+void OatFileManager::UnRegisterAndDeleteOatFile(const OatFile* oat_file) {
+  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  DCHECK(oat_file != nullptr);
+  std::unique_ptr<const OatFile> compare(oat_file);
+  auto it = oat_files_.find(compare);
+  CHECK(it != oat_files_.end());
+  oat_files_.erase(it);
+  compare.release();
+}
+
+const OatFile* OatFileManager::FindOpenedOatFileFromOatLocation(const std::string& oat_location)
+    const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  return FindOpenedOatFileFromOatLocationLocked(oat_location);
+}
+
+const OatFile* OatFileManager::FindOpenedOatFileFromOatLocationLocked(
+    const std::string& oat_location) const {
+  for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+    if (oat_file->GetLocation() == oat_location) {
+      return oat_file.get();
+    }
+  }
+  return nullptr;
+}
+
+const OatFile* OatFileManager::GetBootOatFile() const {
+  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+  if (image_space == nullptr) {
+    return nullptr;
+  }
+  return image_space->GetOatFile();
+}
+
+const OatFile* OatFileManager::GetPrimaryOatFile() const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  const OatFile* boot_oat_file = GetBootOatFile();
+  if (boot_oat_file != nullptr) {
+    for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+      if (oat_file.get() != boot_oat_file) {
+        return oat_file.get();
+      }
+    }
+  }
+  return nullptr;
+}
+
+OatFileManager::~OatFileManager() {
+  // Explicitly clear oat_files_ since the OatFile destructor calls back into OatFileManager for
+  // UnRegisterOatFileLocation.
+  oat_files_.clear();
+}
+
+const OatFile* OatFileManager::RegisterImageOatFile(gc::space::ImageSpace* space) {
+  return RegisterOatFile(space->ReleaseOatFile());
+}
+
+class DexFileAndClassPair : ValueObject {
+ public:
+  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
+     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
+       dex_file_(dex_file),
+       current_class_index_(current_class_index),
+       from_loaded_oat_(from_loaded_oat) {}
+
+  DexFileAndClassPair(const DexFileAndClassPair& rhs) = default;
+
+  DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) = default;
+
+  const char* GetCachedDescriptor() const {
+    return cached_descriptor_;
+  }
+
+  bool operator<(const DexFileAndClassPair& rhs) const {
+    const int cmp = strcmp(cached_descriptor_, rhs.cached_descriptor_);
+    if (cmp != 0) {
+      // Note that the order must be reversed. We want to iterate over the classes in dex files.
+      // They are sorted lexicographically. Thus, the priority-queue must be a min-queue.
+      return cmp > 0;
+    }
+    return dex_file_ < rhs.dex_file_;
+  }
+
+  bool DexFileHasMoreClasses() const {
+    return current_class_index_ + 1 < dex_file_->NumClassDefs();
+  }
+
+  void Next() {
+    ++current_class_index_;
+    cached_descriptor_ = GetClassDescriptor(dex_file_.get(), current_class_index_);
+  }
+
+  size_t GetCurrentClassIndex() const {
+    return current_class_index_;
+  }
+
+  bool FromLoadedOat() const {
+    return from_loaded_oat_;
+  }
+
+  const DexFile* GetDexFile() const {
+    return dex_file_.get();
+  }
+
+ private:
+  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
+    DCHECK(IsUint<16>(index));
+    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
+    return dex_file->StringByTypeIdx(class_def.class_idx_);
+  }
+
+  const char* cached_descriptor_;
+  std::shared_ptr<const DexFile> dex_file_;
+  size_t current_class_index_;
+  bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
+                          // and what was loaded before. Any old duplicates must have been
+                          // OK, and any new "internal" duplicates are as well (they must
+                          // be from multidex, which resolves correctly).
+};
+
+static void AddDexFilesFromOat(const OatFile* oat_file,
+                               bool already_loaded,
+                               /*out*/std::priority_queue<DexFileAndClassPair>* heap) {
+  for (const OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
+    std::string error;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
+    if (dex_file == nullptr) {
+      LOG(WARNING) << "Could not create dex file from oat file: " << error;
+    } else if (dex_file->NumClassDefs() > 0U) {
+      heap->emplace(dex_file.release(), /*current_class_index*/0U, already_loaded);
+    }
+  }
+}
+
+static void AddNext(/*inout*/DexFileAndClassPair* original,
+                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap) {
+  if (original->DexFileHasMoreClasses()) {
+    original->Next();
+    heap->push(std::move(*original));
+  }
+}
+
+// Check for class-def collisions in dex files.
+//
+// This works by maintaining a heap with one class from each dex file, sorted by the class
+// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
+// against the following top element. If the descriptor is the same, it is now checked whether
+// the two elements agree on whether their dex file was from an already-loaded oat-file or the
+// new oat file. Any disagreement indicates a collision.
+bool OatFileManager::HasCollisions(const OatFile* oat_file,
+                                   std::string* error_msg /*out*/) const {
+  DCHECK(oat_file != nullptr);
+  DCHECK(error_msg != nullptr);
+  if (!kDuplicateClassesCheck) {
+    return false;
+  }
+
+  // Dex files are registered late - once a class is actually being loaded. We have to compare
+  // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses.
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+
+  std::priority_queue<DexFileAndClassPair> queue;
+
+  // Add dex files from already loaded oat files, but skip boot.
+  const OatFile* boot_oat = GetBootOatFile();
+  // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
+  // need to check both against each other since they would have resolved the same way at compile
+  // time.
+  std::unordered_set<std::string> unique_locations;
+  for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
+    DCHECK_NE(loaded_oat_file.get(), oat_file);
+    const std::string& location = loaded_oat_file->GetLocation();
+    if (loaded_oat_file.get() != boot_oat &&
+        location != oat_file->GetLocation() &&
+        unique_locations.find(location) == unique_locations.end()) {
+      unique_locations.insert(location);
+      AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
+    }
+  }
+
+  if (queue.empty()) {
+    // No other oat files, return early.
+    return false;
+  }
+
+  // Add dex files from the oat file to check.
+  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue);
+
+  // Now drain the queue.
+  while (!queue.empty()) {
+    // Modifying the top element is only safe if we pop right after.
+    DexFileAndClassPair compare_pop(queue.top());
+    queue.pop();
+
+    // Compare against the following elements.
+    while (!queue.empty()) {
+      DexFileAndClassPair top(queue.top());
+
+      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
+        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
+        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
+          *error_msg =
+              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
+                           compare_pop.GetCachedDescriptor(),
+                           compare_pop.GetDexFile()->GetLocation().c_str(),
+                           top.GetDexFile()->GetLocation().c_str());
+          return true;
+        }
+        queue.pop();
+        AddNext(&top, &queue);
+      } else {
+        // Something else. Done here.
+        break;
+      }
+    }
+    AddNext(&compare_pop, &queue);
+  }
+
+  return false;
+}
+
+std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
+    const char* dex_location,
+    const char* oat_location,
+    const OatFile** out_oat_file,
+    std::vector<std::string>* error_msgs) {
+  CHECK(dex_location != nullptr);
+  CHECK(error_msgs != nullptr);
+
+  // Verify we aren't holding the mutator lock, which could starve GC if we
+  // have to generate or relocate an oat file.
+  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
+
+  OatFileAssistant oat_file_assistant(dex_location,
+                                      oat_location,
+                                      kRuntimeISA,
+                                      !Runtime::Current()->IsAotCompiler());
+
+  // Lock the target oat location to avoid races generating and loading the
+  // oat file.
+  std::string error_msg;
+  if (!oat_file_assistant.Lock(/*out*/&error_msg)) {
+    // Don't worry too much if this fails. If it does fail, it's unlikely we
+    // can generate an oat file anyway.
+    VLOG(class_linker) << "OatFileAssistant::Lock: " << error_msg;
+  }
+
+  const OatFile* source_oat_file = nullptr;
+
+  // Update the oat file on disk if we can. This may fail, but that's okay.
+  // Best effort is all that matters here.
+  if (!oat_file_assistant.MakeUpToDate(/*out*/&error_msg)) {
+    LOG(WARNING) << error_msg;
+  }
+
+  // Get the oat file on disk.
+  std::unique_ptr<const OatFile> oat_file(oat_file_assistant.GetBestOatFile().release());
+  if (oat_file != nullptr) {
+    // Take the file only if it has no collisions, or we must take it because of preopting.
+    bool accept_oat_file = !HasCollisions(oat_file.get(), /*out*/ &error_msg);
+    if (!accept_oat_file) {
+      // Failed the collision check. Print warning.
+      if (Runtime::Current()->IsDexFileFallbackEnabled()) {
+        LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
+                     << dex_location;
+      } else {
+        LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
+                        " load classes for " << dex_location;
+      }
+      LOG(WARNING) << error_msg;
+
+      // However, if the app was part of /system and preopted, there is no original dex file
+      // available. In that case grudgingly accept the oat file.
+      if (!DexFile::MaybeDex(dex_location)) {
+        accept_oat_file = true;
+        LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
+                     << "Allow oat file use. This is potentially dangerous.";
+      }
+    }
+
+    if (accept_oat_file) {
+      VLOG(class_linker) << "Registering " << oat_file->GetLocation();
+      source_oat_file = RegisterOatFile(std::move(oat_file));
+      *out_oat_file = source_oat_file;
+    }
+  }
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+
+  // Load the dex files from the oat file.
+  if (source_oat_file != nullptr) {
+    dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+    if (dex_files.empty()) {
+      error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
+    }
+  }
+
+  // Fall back to running out of the original dex file if we couldn't load any
+  // dex_files from the oat file.
+  if (dex_files.empty()) {
+    if (oat_file_assistant.HasOriginalDexFiles()) {
+      if (Runtime::Current()->IsDexFileFallbackEnabled()) {
+        if (!DexFile::Open(dex_location, dex_location, /*out*/ &error_msg, &dex_files)) {
+          LOG(WARNING) << error_msg;
+          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
+        }
+      } else {
+        error_msgs->push_back("Fallback mode disabled, skipping dex files.");
+      }
+    } else {
+      error_msgs->push_back("No original dex files found for dex location "
+          + std::string(dex_location));
+    }
+  }
+  return dex_files;
+}
+
+bool OatFileManager::RegisterOatFileLocation(const std::string& oat_location) {
+  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
+  auto it = oat_file_count_.find(oat_location);
+  if (it != oat_file_count_.end()) {
+    ++it->second;
+    return false;
+  }
+  oat_file_count_.insert(std::pair<std::string, size_t>(oat_location, 1u));
+  return true;
+}
+
+void OatFileManager::UnRegisterOatFileLocation(const std::string& oat_location) {
+  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
+  auto it = oat_file_count_.find(oat_location);
+  if (it != oat_file_count_.end()) {
+    --it->second;
+    if (it->second == 0) {
+      oat_file_count_.erase(it);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
new file mode 100644
index 0000000..af7efb4
--- /dev/null
+++ b/runtime/oat_file_manager.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OAT_FILE_MANAGER_H_
+#define ART_RUNTIME_OAT_FILE_MANAGER_H_
+
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+
+namespace art {
+
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
+
+class DexFile;
+class OatFile;
+
+// Class for dealing with oat file management.
+//
+// This class knows about all the loaded oat files and provides utility functions. The oat file
+// pointers returned from functions are always valid.
+class OatFileManager {
+ public:
+  OatFileManager() : have_non_pic_oat_file_(false) {}
+  ~OatFileManager();
+
+  // Add an oat file to the internal accounting, std::aborts if there already exists an oat file
+  // with the same base address. Returns the oat file pointer from oat_file.
+  const OatFile* RegisterOatFile(std::unique_ptr<const OatFile> oat_file)
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  void UnRegisterAndDeleteOatFile(const OatFile* oat_file)
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Find the first opened oat file with the same location, returns null if there are none.
+  const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Attempt to reserve a location, returns false if it is already reserved or already in used by
+  // an oat file.
+  bool RegisterOatFileLocation(const std::string& oat_location)
+      REQUIRES(!Locks::oat_file_count_lock_);
+
+  // Unreserve oat file location, should only be used for error cases since RegisterOatFile will
+  // remove the reserved location.
+  void UnRegisterOatFileLocation(const std::string& oat_location)
+      REQUIRES(!Locks::oat_file_count_lock_);
+
+  // Returns true if we have a non pic oat file.
+  bool HaveNonPicOatFile() const {
+    return have_non_pic_oat_file_;
+  }
+
+  // Returns the boot image oat file.
+  const OatFile* GetBootOatFile() const;
+
+  // Returns the first non-image oat file in the class path.
+  const OatFile* GetPrimaryOatFile() const REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Return the oat file for an image, registers the oat file. Takes ownership of the imagespace's
+  // underlying oat file.
+  const OatFile* RegisterImageOatFile(gc::space::ImageSpace* space)
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Finds or creates the oat file holding dex_location. Then loads and returns
+  // all corresponding dex files (there may be more than one dex file loaded
+  // in the case of multidex).
+  // This may return the original, unquickened dex files if the oat file could
+  // not be generated.
+  //
+  // Returns an empty vector if the dex files could not be loaded. In this
+  // case, there will be at least one error message returned describing why no
+  // dex files could not be loaded. The 'error_msgs' argument must not be
+  // null, regardless of whether there is an error or not.
+  //
+  // This method should not be called with the mutator_lock_ held, because it
+  // could end up starving GC if we need to generate or relocate any oat
+  // files.
+  std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
+      const char* dex_location,
+      const char* oat_location,
+      /*out*/ const OatFile** out_oat_file,
+      /*out*/ std::vector<std::string>* error_msgs)
+      REQUIRES(!Locks::oat_file_manager_lock_, !Locks::mutator_lock_);
+
+ private:
+  // Check for duplicate class definitions of the given oat file against all open oat files.
+  // Return true if there are any class definition collisions in the oat_file.
+  bool HasCollisions(const OatFile* oat_file, /*out*/std::string* error_msg) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  const OatFile* FindOpenedOatFileFromOatLocationLocked(const std::string& oat_location) const
+      REQUIRES(Locks::oat_file_manager_lock_);
+
+  std::set<std::unique_ptr<const OatFile>> oat_files_ GUARDED_BY(Locks::oat_file_manager_lock_);
+  std::unordered_map<std::string, size_t> oat_file_count_ GUARDED_BY(Locks::oat_file_count_lock_);
+  bool have_non_pic_oat_file_;
+  DISALLOW_COPY_AND_ASSIGN(OatFileManager);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_OAT_FILE_MANAGER_H_
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
new file mode 100644
index 0000000..9786c05
--- /dev/null
+++ b/runtime/oat_quick_method_header.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oat_quick_method_header.h"
+
+#include "art_method.h"
+#include "mapping_table.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+
+namespace art {
+
+OatQuickMethodHeader::OatQuickMethodHeader(
+    uint32_t mapping_table_offset,
+    uint32_t vmap_table_offset,
+    uint32_t gc_map_offset,
+    uint32_t frame_size_in_bytes,
+    uint32_t core_spill_mask,
+    uint32_t fp_spill_mask,
+    uint32_t code_size)
+    : mapping_table_offset_(mapping_table_offset),
+      vmap_table_offset_(vmap_table_offset),
+      gc_map_offset_(gc_map_offset),
+      frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
+      code_size_(code_size) {}
+
+OatQuickMethodHeader::~OatQuickMethodHeader() {}
+
+uint32_t OatQuickMethodHeader::ToDexPc(ArtMethod* method,
+                                       const uintptr_t pc,
+                                       bool abort_on_failure) const {
+  const void* entry_point = GetEntryPoint();
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
+  if (IsOptimized()) {
+    CodeInfo code_info = GetOptimizedCodeInfo();
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+    StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset, encoding);
+    if (stack_map.IsValid()) {
+      return stack_map.GetDexPc(encoding);
+    }
+  } else {
+    MappingTable table(GetMappingTable());
+    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
+    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
+    if (table.TotalSize() == 0) {
+      DCHECK(method->IsNative());
+      return DexFile::kDexNoIndex;
+    }
+
+    // Assume the caller wants a pc-to-dex mapping so check here first.
+    typedef MappingTable::PcToDexIterator It;
+    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
+      if (cur.NativePcOffset() == sought_offset) {
+        return cur.DexPc();
+      }
+    }
+    // Now check dex-to-pc mappings.
+    typedef MappingTable::DexToPcIterator It2;
+    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
+      if (cur.NativePcOffset() == sought_offset) {
+        return cur.DexPc();
+      }
+    }
+  }
+  if (abort_on_failure) {
+    ScopedObjectAccess soa(Thread::Current());
+    LOG(FATAL) << "Failed to find Dex offset for PC offset "
+           << reinterpret_cast<void*>(sought_offset)
+           << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
+           << " current entry_point=" << method->GetEntryPointFromQuickCompiledCode()
+           << ") in " << PrettyMethod(method);
+  }
+  return DexFile::kDexNoIndex;
+}
+
+uintptr_t OatQuickMethodHeader::ToNativeQuickPc(ArtMethod* method,
+                                                const uint32_t dex_pc,
+                                                bool is_for_catch_handler,
+                                                bool abort_on_failure) const {
+  const void* entry_point = GetEntryPoint();
+  if (IsOptimized()) {
+    // Optimized code does not have a mapping table. Search for the dex-to-pc
+    // mapping in stack maps.
+    CodeInfo code_info = GetOptimizedCodeInfo();
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+
+    // All stack maps are stored in the same CodeItem section, safepoint stack
+    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
+    // the order of iteration.
+    StackMap stack_map =
+        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
+                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
+    if (stack_map.IsValid()) {
+      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
+    }
+  } else {
+    MappingTable table(GetMappingTable());
+    if (table.TotalSize() == 0) {
+      DCHECK_EQ(dex_pc, 0U);
+      return 0;   // Special no mapping/pc == 0 case
+    }
+    // Assume the caller wants a dex-to-pc mapping so check here first.
+    typedef MappingTable::DexToPcIterator It;
+    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
+      if (cur.DexPc() == dex_pc) {
+        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+      }
+    }
+    // Now check pc-to-dex mappings.
+    typedef MappingTable::PcToDexIterator It2;
+    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
+      if (cur.DexPc() == dex_pc) {
+        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+      }
+    }
+  }
+
+  if (abort_on_failure) {
+    ScopedObjectAccess soa(Thread::Current());
+    LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
+               << " in " << PrettyMethod(method);
+  }
+  return UINTPTR_MAX;
+}
+
+}  // namespace art
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
new file mode 100644
index 0000000..03cad08
--- /dev/null
+++ b/runtime/oat_quick_method_header.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OAT_QUICK_METHOD_HEADER_H_
+#define ART_RUNTIME_OAT_QUICK_METHOD_HEADER_H_
+
+#include "arch/instruction_set.h"
+#include "base/macros.h"
+#include "quick/quick_method_frame_info.h"
+#include "stack_map.h"
+#include "utils.h"
+
+namespace art {
+
+class ArtMethod;
+
+// OatQuickMethodHeader precedes the raw code chunk generated by the compiler.
+class PACKED(4) OatQuickMethodHeader {
+ public:
+  OatQuickMethodHeader(uint32_t mapping_table_offset = 0U,
+                       uint32_t vmap_table_offset = 0U,
+                       uint32_t gc_map_offset = 0U,
+                       uint32_t frame_size_in_bytes = 0U,
+                       uint32_t core_spill_mask = 0U,
+                       uint32_t fp_spill_mask = 0U,
+                       uint32_t code_size = 0U);
+
+  ~OatQuickMethodHeader();
+
+  static OatQuickMethodHeader* FromCodePointer(const void* code_ptr) {
+    uintptr_t code = reinterpret_cast<uintptr_t>(code_ptr);
+    uintptr_t header = code - OFFSETOF_MEMBER(OatQuickMethodHeader, code_);
+    DCHECK(IsAlignedParam(code, GetInstructionSetAlignment(kRuntimeISA)) ||
+           IsAlignedParam(header, GetInstructionSetAlignment(kRuntimeISA)));
+    return reinterpret_cast<OatQuickMethodHeader*>(header);
+  }
+
+  static OatQuickMethodHeader* FromEntryPoint(const void* entry_point) {
+    return FromCodePointer(EntryPointToCodePointer(entry_point));
+  }
+
+  OatQuickMethodHeader& operator=(const OatQuickMethodHeader&) = default;
+
+  uintptr_t NativeQuickPcOffset(const uintptr_t pc) const {
+    return pc - reinterpret_cast<uintptr_t>(GetEntryPoint());
+  }
+
+  bool IsOptimized() const {
+    return gc_map_offset_ == 0 && vmap_table_offset_ != 0;
+  }
+
+  CodeInfo GetOptimizedCodeInfo() const {
+    DCHECK(IsOptimized());
+    const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
+    return CodeInfo(data);
+  }
+
+  const uint8_t* GetCode() const {
+    return code_;
+  }
+
+  const uint8_t* GetNativeGcMap() const {
+    return (gc_map_offset_ == 0) ? nullptr : code_ - gc_map_offset_;
+  }
+
+  const uint8_t* GetMappingTable() const {
+    return (mapping_table_offset_ == 0) ? nullptr : code_ - mapping_table_offset_;
+  }
+
+  const uint8_t* GetVmapTable() const {
+    CHECK(!IsOptimized()) << "Unimplemented vmap table for optimizing compiler";
+    return (vmap_table_offset_ == 0) ? nullptr : code_ - vmap_table_offset_;
+  }
+
+  bool Contains(uintptr_t pc) const {
+    uintptr_t code_start = reinterpret_cast<uintptr_t>(code_);
+    static_assert(kRuntimeISA != kThumb2, "kThumb2 cannot be a runtime ISA");
+    if (kRuntimeISA == kArm) {
+      // On Thumb-2, the pc is offset by one.
+      code_start++;
+    }
+    return code_start <= pc && pc <= (code_start + code_size_);
+  }
+
+  const uint8_t* GetEntryPoint() const {
+    // When the runtime architecture is ARM, `kRuntimeISA` is set to `kArm`
+    // (not `kThumb2`), *but* we always generate code for the Thumb-2
+    // instruction set anyway. Thumb-2 requires the entrypoint to be of
+    // offset 1.
+    static_assert(kRuntimeISA != kThumb2, "kThumb2 cannot be a runtime ISA");
+    return (kRuntimeISA == kArm)
+        ? reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(code_) | 1)
+        : code_;
+  }
+
+  template <bool kCheckFrameSize = true>
+  uint32_t GetFrameSizeInBytes() {
+    uint32_t result = frame_info_.FrameSizeInBytes();
+    if (kCheckFrameSize) {
+      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
+    }
+    return result;
+  }
+
+  QuickMethodFrameInfo GetFrameInfo() const {
+    return frame_info_;
+  }
+
+  uintptr_t ToNativeQuickPc(ArtMethod* method,
+                            const uint32_t dex_pc,
+                            bool is_for_catch_handler,
+                            bool abort_on_failure = true) const;
+
+  uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
+
+  // The offset in bytes from the start of the mapping table to the end of the header.
+  uint32_t mapping_table_offset_;
+  // The offset in bytes from the start of the vmap table to the end of the header.
+  uint32_t vmap_table_offset_;
+  // The offset in bytes from the start of the gc map to the end of the header.
+  uint32_t gc_map_offset_;
+  // The stack frame information.
+  QuickMethodFrameInfo frame_info_;
+  // The code size in bytes.
+  uint32_t code_size_;
+  // The actual code.
+  uint8_t code_[0];
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_OAT_QUICK_METHOD_HEADER_H_
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 50e2053..ae16c7f 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -269,10 +269,10 @@
       .Define("-Xfingerprint:_")
           .WithType<std::string>()
           .IntoKey(M::Fingerprint)
-      .Define({"-Xexperimental-lambdas", "-Xnoexperimental-lambdas"})
-          .WithType<bool>()
-          .WithValues({true, false})
-          .IntoKey(M::ExperimentalLambdas)
+      .Define("-Xexperimental:_")
+          .WithType<ExperimentalFlags>()
+          .AppendValues()
+          .IntoKey(M::Experimental)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -557,7 +557,14 @@
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
-  if (args.GetOrDefault(M::ExperimentalLambdas)) {
+  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kDefaultMethods) {
+    LOG(WARNING) << "Default method support has been enabled. The verifier will be less strict "
+                 << "in some cases. All existing invoke opcodes have an unstable updated "
+                 << "specification and are nearly guaranteed to change over time. Do not attempt "
+                 << "to write shipping code against the invoke opcodes with this flag.";
+  }
+
+  if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kLambdas) {
     LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
                  << "an unstable specification and are nearly guaranteed to change over time. "
                  << "Do not attempt to write shipping code against these opcodes.";
@@ -682,8 +689,8 @@
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
-  UsageMessage(stream, "  -X[no]experimental-lambdas\n"
-                       "     (Enable new experimental dalvik opcodes, off by default)\n");
+  UsageMessage(stream, "  -Xexperimental:{lambdas,default-methods} "
+                       "(Enable new experimental dalvik opcodes and semantics, off by default)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index bc9ba37..57472ad 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -216,10 +216,10 @@
 
   LengthPrefixedArray<ArtField>* static_fields0 = proxyClass0->GetSFieldsPtr();
   ASSERT_TRUE(static_fields0 != nullptr);
-  ASSERT_EQ(2u, static_fields0->Length());
+  ASSERT_EQ(2u, static_fields0->size());
   LengthPrefixedArray<ArtField>* static_fields1 = proxyClass1->GetSFieldsPtr();
   ASSERT_TRUE(static_fields1 != nullptr);
-  ASSERT_EQ(2u, static_fields1->Length());
+  ASSERT_EQ(2u, static_fields1->size());
 
   EXPECT_EQ(static_fields0->At(0).GetDeclaringClass(), proxyClass0.Get());
   EXPECT_EQ(static_fields0->At(1).GetDeclaringClass(), proxyClass0.Get());
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 99e262e..6554394 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -73,7 +73,6 @@
 bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier,
                                              InlineMethod* method) {
   DCHECK(verifier != nullptr);
-  DCHECK_EQ(Runtime::Current()->IsCompiler(), method != nullptr);
   if (!Runtime::Current()->UseJit()) {
     DCHECK_EQ(verifier->CanLoadClasses(), method != nullptr);
   }
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 64c2249..837662d 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -74,6 +74,7 @@
   kIntrinsicUnsafeGet,
   kIntrinsicUnsafePut,
   kIntrinsicSystemArrayCopyCharArray,
+  kIntrinsicSystemArrayCopy,
 
   kInlineOpNop,
   kInlineOpReturnArg,
diff --git a/runtime/quick/quick_method_frame_info.h b/runtime/quick/quick_method_frame_info.h
index 684d4da..71f8265 100644
--- a/runtime/quick/quick_method_frame_info.h
+++ b/runtime/quick/quick_method_frame_info.h
@@ -50,6 +50,10 @@
     return fp_spill_mask_;
   }
 
+  size_t GetReturnPcOffset() const {
+    return FrameSizeInBytes() - sizeof(void*);
+  }
+
  private:
   uint32_t frame_size_in_bytes_;
   uint32_t core_spill_mask_;
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 63f43cf..1552318 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -26,6 +26,8 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/throwable.h"
+#include "oat_quick_method_header.h"
+#include "stack_map.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -34,13 +36,19 @@
 static constexpr size_t kInvalidFrameDepth = 0xffffffff;
 
 QuickExceptionHandler::QuickExceptionHandler(Thread* self, bool is_deoptimization)
-  : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization),
-    method_tracing_active_(is_deoptimization ||
-                           Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
-    handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0),
-    handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false),
-    handler_frame_depth_(kInvalidFrameDepth) {
-}
+    : self_(self),
+      context_(self->GetLongJumpContext()),
+      is_deoptimization_(is_deoptimization),
+      method_tracing_active_(is_deoptimization ||
+                             Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
+      handler_quick_frame_(nullptr),
+      handler_quick_frame_pc_(0),
+      handler_method_header_(nullptr),
+      handler_quick_arg0_(0),
+      handler_method_(nullptr),
+      handler_dex_pc_(0),
+      clear_exception_(false),
+      handler_frame_depth_(kInvalidFrameDepth) {}
 
 // Finds catch handler.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
@@ -60,6 +68,7 @@
       // This is the upcall, we remember the frame and last pc so that we may long jump to them.
       exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
       exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+      exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
       uint32_t next_dex_pc;
       ArtMethod* next_art_method;
       bool has_next = GetNextMethodAndDexPc(&next_art_method, &next_dex_pc);
@@ -99,8 +108,10 @@
         exception_handler_->SetHandlerMethod(method);
         exception_handler_->SetHandlerDexPc(found_dex_pc);
         exception_handler_->SetHandlerQuickFramePc(
-            method->ToNativeQuickPc(found_dex_pc, /* is_catch_handler */ true));
+            GetCurrentOatQuickMethodHeader()->ToNativeQuickPc(
+                method, found_dex_pc, /* is_catch_handler */ true));
         exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+        exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
         return false;  // End stack walk.
       } else if (UNLIKELY(GetThread()->HasDebuggerShadowFrames())) {
         // We are going to unwind this frame. Did we prepare a shadow frame for debugging?
@@ -158,8 +169,8 @@
   }
   // If the handler is in optimized code, we need to set the catch environment.
   if (*handler_quick_frame_ != nullptr &&
-      handler_method_ != nullptr &&
-      handler_method_->IsOptimized(sizeof(void*))) {
+      handler_method_header_ != nullptr &&
+      handler_method_header_->IsOptimized()) {
     SetCatchEnvironmentForOptimizedHandler(&visitor);
   }
 }
@@ -200,14 +211,14 @@
 void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor) {
   DCHECK(!is_deoptimization_);
   DCHECK(*handler_quick_frame_ != nullptr) << "Method should not be called on upcall exceptions";
-  DCHECK(handler_method_ != nullptr && handler_method_->IsOptimized(sizeof(void*)));
+  DCHECK(handler_method_ != nullptr && handler_method_header_->IsOptimized());
 
   if (kDebugExceptionDelivery) {
     self_->DumpStack(LOG(INFO) << "Setting catch phis: ");
   }
 
   const size_t number_of_vregs = handler_method_->GetCodeItem()->registers_size_;
-  CodeInfo code_info = handler_method_->GetOptimizedCodeInfo();
+  CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
   // Find stack map of the throwing instruction.
@@ -283,6 +294,7 @@
       // and last pc so that we may long jump to them.
       exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
       exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+      exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
       if (!stacked_shadow_frame_pushed_) {
         // In case there is no deoptimized shadow frame for this upcall, we still
         // need to push a nullptr to the stack since there is always a matching pop after
@@ -303,7 +315,43 @@
       CHECK_EQ(GetFrameDepth(), 1U);
       return true;
     } else {
-      HandleDeoptimization(method);
+      // Check if a shadow frame already exists for debugger's set-local-value purpose.
+      const size_t frame_id = GetFrameId();
+      ShadowFrame* new_frame = GetThread()->FindDebuggerShadowFrame(frame_id);
+      const bool* updated_vregs;
+      const size_t num_regs = method->GetCodeItem()->registers_size_;
+      if (new_frame == nullptr) {
+        new_frame = ShadowFrame::CreateDeoptimizedFrame(num_regs, nullptr, method, GetDexPc());
+        updated_vregs = nullptr;
+      } else {
+        updated_vregs = GetThread()->GetUpdatedVRegFlags(frame_id);
+        DCHECK(updated_vregs != nullptr);
+      }
+      if (GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        HandleOptimizingDeoptimization(method, new_frame, updated_vregs);
+      } else {
+        HandleQuickDeoptimization(method, new_frame, updated_vregs);
+      }
+      if (updated_vregs != nullptr) {
+        // Calling Thread::RemoveDebuggerShadowFrameMapping will also delete the updated_vregs
+        // array so this must come after we processed the frame.
+        GetThread()->RemoveDebuggerShadowFrameMapping(frame_id);
+        DCHECK(GetThread()->FindDebuggerShadowFrame(frame_id) == nullptr);
+      }
+      if (prev_shadow_frame_ != nullptr) {
+        prev_shadow_frame_->SetLink(new_frame);
+      } else {
+        // Will be popped after the long jump after DeoptimizeStack(),
+        // right before interpreter::EnterInterpreterFromDeoptimize().
+        stacked_shadow_frame_pushed_ = true;
+        GetThread()->PushStackedShadowFrame(
+            new_frame,
+            single_frame_deopt_
+                ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
+                : StackedShadowFrameType::kDeoptimizationShadowFrame);
+      }
+      prev_shadow_frame_ = new_frame;
+
       if (single_frame_deopt_ && !IsInInlinedFrame()) {
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
@@ -314,16 +362,108 @@
   }
 
  private:
+  void HandleOptimizingDeoptimization(ArtMethod* m,
+                                      ShadowFrame* new_frame,
+                                      const bool* updated_vregs)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    CodeInfo code_info = method_header->GetOptimizedCodeInfo();
+    uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+    StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+    const size_t number_of_vregs = m->GetCodeItem()->registers_size_;
+    MemoryRegion stack_mask = stack_map.GetStackMask(encoding);
+    uint32_t register_mask = stack_map.GetRegisterMask(encoding);
+    DexRegisterMap vreg_map = IsInInlinedFrame()
+        ? code_info.GetDexRegisterMapAtDepth(GetCurrentInliningDepth() - 1,
+                                             code_info.GetInlineInfoOf(stack_map, encoding),
+                                             encoding,
+                                             number_of_vregs)
+        : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+    for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+      if (updated_vregs != nullptr && updated_vregs[vreg]) {
+        // Keep the value set by debugger.
+        continue;
+      }
+
+      DexRegisterLocation::Kind location =
+          vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+      static constexpr uint32_t kDeadValue = 0xEBADDE09;
+      uint32_t value = kDeadValue;
+      bool is_reference = false;
+
+      switch (location) {
+        case DexRegisterLocation::Kind::kInStack: {
+          const int32_t offset = vreg_map.GetStackOffsetInBytes(vreg,
+                                                                number_of_vregs,
+                                                                code_info,
+                                                                encoding);
+          const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
+          value = *reinterpret_cast<const uint32_t*>(addr);
+          uint32_t bit = (offset >> 2);
+          if (stack_mask.size_in_bits() > bit && stack_mask.LoadBit(bit)) {
+            is_reference = true;
+          }
+          break;
+        }
+        case DexRegisterLocation::Kind::kInRegister:
+        case DexRegisterLocation::Kind::kInRegisterHigh:
+        case DexRegisterLocation::Kind::kInFpuRegister:
+        case DexRegisterLocation::Kind::kInFpuRegisterHigh: {
+          uint32_t reg = vreg_map.GetMachineRegister(vreg, number_of_vregs, code_info, encoding);
+          bool result = GetRegisterIfAccessible(reg, ToVRegKind(location), &value);
+          CHECK(result);
+          if (location == DexRegisterLocation::Kind::kInRegister) {
+            if (((1u << reg) & register_mask) != 0) {
+              is_reference = true;
+            }
+          }
+          break;
+        }
+        case DexRegisterLocation::Kind::kConstant: {
+          value = vreg_map.GetConstant(vreg, number_of_vregs, code_info, encoding);
+          if (value == 0) {
+            // Make it a reference for extra safety.
+            is_reference = true;
+          }
+          break;
+        }
+        case DexRegisterLocation::Kind::kNone: {
+          break;
+        }
+        default: {
+          LOG(FATAL)
+              << "Unexpected location kind"
+              << DexRegisterLocation::PrettyDescriptor(
+                    vreg_map.GetLocationInternalKind(vreg,
+                                                     number_of_vregs,
+                                                     code_info,
+                                                     encoding));
+          UNREACHABLE();
+        }
+      }
+      if (is_reference) {
+        new_frame->SetVRegReference(vreg, reinterpret_cast<mirror::Object*>(value));
+      } else {
+        new_frame->SetVReg(vreg, value);
+      }
+    }
+  }
+
   static VRegKind GetVRegKind(uint16_t reg, const std::vector<int32_t>& kinds) {
     return static_cast<VRegKind>(kinds.at(reg * 2));
   }
 
-  void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void HandleQuickDeoptimization(ArtMethod* m,
+                                 ShadowFrame* new_frame,
+                                 const bool* updated_vregs)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
     CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
     uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
-    StackHandleScope<2> hs(GetThread());  // Dex cache, class loader and method.
+    StackHandleScope<2> hs(GetThread());  // Dex cache and class loader.
     mirror::Class* declaring_class = m->GetDeclaringClass();
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
@@ -333,17 +473,6 @@
                                       true, true);
     bool verifier_success = verifier.Verify();
     CHECK(verifier_success) << PrettyMethod(m);
-    // Check if a shadow frame already exists for debugger's set-local-value purpose.
-    const size_t frame_id = GetFrameId();
-    ShadowFrame* new_frame = GetThread()->FindDebuggerShadowFrame(frame_id);
-    const bool* updated_vregs;
-    if (new_frame == nullptr) {
-      new_frame = ShadowFrame::CreateDeoptimizedFrame(num_regs, nullptr, m, dex_pc);
-      updated_vregs = nullptr;
-    } else {
-      updated_vregs = GetThread()->GetUpdatedVRegFlags(frame_id);
-      DCHECK(updated_vregs != nullptr);
-    }
     {
       ScopedStackedShadowFramePusher pusher(GetThread(), new_frame,
                                             StackedShadowFrameType::kShadowFrameUnderConstruction);
@@ -450,25 +579,6 @@
         }
       }
     }
-    if (updated_vregs != nullptr) {
-      // Calling Thread::RemoveDebuggerShadowFrameMapping will also delete the updated_vregs
-      // array so this must come after we processed the frame.
-      GetThread()->RemoveDebuggerShadowFrameMapping(frame_id);
-      DCHECK(GetThread()->FindDebuggerShadowFrame(frame_id) == nullptr);
-    }
-    if (prev_shadow_frame_ != nullptr) {
-      prev_shadow_frame_->SetLink(new_frame);
-    } else {
-      // Will be popped after the long jump after DeoptimizeStack(),
-      // right before interpreter::EnterInterpreterFromDeoptimize().
-      stacked_shadow_frame_pushed_ = true;
-      GetThread()->PushStackedShadowFrame(
-          new_frame,
-          single_frame_deopt_
-              ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
-              : StackedShadowFrameType::kDeoptimizationShadowFrame);
-    }
-    prev_shadow_frame_ = new_frame;
   }
 
   QuickExceptionHandler* const exception_handler_;
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 89d6a25..eedf83f 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -71,6 +71,10 @@
     handler_quick_frame_pc_ = handler_quick_frame_pc;
   }
 
+  void SetHandlerMethodHeader(const OatQuickMethodHeader* handler_method_header) {
+    handler_method_header_ = handler_method_header;
+  }
+
   void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) {
     handler_quick_arg0_ = handler_quick_arg0;
   }
@@ -115,6 +119,8 @@
   ArtMethod** handler_quick_frame_;
   // PC to branch to for the handler.
   uintptr_t handler_quick_frame_pc_;
+  // Quick code of the handler.
+  const OatQuickMethodHeader* handler_method_header_;
   // The value for argument 0.
   uintptr_t handler_quick_arg0_;
   // The handler method to report to the debugger.
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 85ac4aa..7de6c06 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -19,7 +19,7 @@
 
 #include "read_barrier.h"
 
-#include "gc/collector/concurrent_copying.h"
+#include "gc/collector/concurrent_copying-inl.h"
 #include "gc/heap.h"
 #include "mirror/object_reference.h"
 #include "mirror/reference.h"
@@ -63,7 +63,7 @@
       ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
       // Update the field atomically. This may fail if mutator updates before us, but it's ok.
       if (ref != old_ref) {
-        obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>(
+        obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
             offset, old_ref, ref);
       }
     }
@@ -101,7 +101,7 @@
       // Update the field atomically. This may fail if mutator updates before us, but it's ok.
       if (ref != old_ref) {
         Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-        atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref);
+        atomic_root->CompareExchangeStrongRelaxed(old_ref, ref);
       }
     }
     AssertToSpaceInvariant(gc_root_source, ref);
@@ -140,7 +140,7 @@
       if (new_ref.AsMirrorPtr() != old_ref.AsMirrorPtr()) {
         auto* atomic_root =
             reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root);
-        atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
+        atomic_root->CompareExchangeStrongRelaxed(old_ref, new_ref);
       }
     }
     AssertToSpaceInvariant(gc_root_source, ref);
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index bd89be5..c7c2709 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -157,7 +157,8 @@
     result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
-    args[0].b = (SCHAR_MIN << 24) >> 24;
+    static_assert(SCHAR_MIN == -128, "SCHAR_MIN unexpected");
+    args[0].b = SCHAR_MIN;
     result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1f447d0..556ba56 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -64,6 +64,7 @@
 #include "debugger.h"
 #include "elf_file.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "experimental_flags.h"
 #include "fault_handler.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
@@ -115,6 +116,7 @@
 #include "native/sun_misc_Unsafe.h"
 #include "native_bridge_art_interface.h"
 #include "oat_file.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "parsed_options.h"
 #include "profiler.h"
@@ -138,6 +140,12 @@
 
 // If a signal isn't handled properly, enable a handler that attempts to dump the Java stack.
 static constexpr bool kEnableJavaStackTraceHandler = false;
+// Tuned by compiling GmsCore under perf and measuring time spent in DescriptorEquals for class
+// linking.
+static constexpr double kLowMemoryMinLoadFactor = 0.5;
+static constexpr double kLowMemoryMaxLoadFactor = 0.8;
+static constexpr double kNormalMinLoadFactor = 0.4;
+static constexpr double kNormalMaxLoadFactor = 0.7;
 Runtime* Runtime::instance_ = nullptr;
 
 struct TraceConfig {
@@ -198,7 +206,9 @@
       no_sig_chain_(false),
       is_native_bridge_loaded_(false),
       zygote_max_failed_boots_(0),
-      experimental_lambdas_(false) {
+      experimental_flags_(ExperimentalFlags::kNone),
+      oat_file_manager_(nullptr),
+      is_low_memory_mode_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
 }
@@ -284,6 +294,7 @@
   delete heap_;
   delete intern_table_;
   delete java_vm_;
+  delete oat_file_manager_;
   Thread::Shutdown();
   QuasiAtomic::Shutdown();
   verifier::MethodVerifier::Shutdown();
@@ -315,7 +326,7 @@
     if (self == nullptr) {
       os << "(Aborting thread was not attached to runtime!)\n";
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
-      DumpNativeStack(os, GetTid(), "  native: ", nullptr);
+      DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
     } else {
       os << "Aborting thread:\n";
       if (Locks::mutator_lock_->IsExclusiveHeld(self) || Locks::mutator_lock_->IsSharedHeld(self)) {
@@ -698,7 +709,7 @@
 }
 
 bool Runtime::IsDebuggable() const {
-  const OatFile* oat_file = GetClassLinker()->GetPrimaryOatFile();
+  const OatFile* oat_file = GetOatFileManager().GetPrimaryOatFile();
   return oat_file != nullptr && oat_file->IsDebuggable();
 }
 
@@ -756,9 +767,9 @@
   if (elf_file.get() == nullptr) {
     return false;
   }
-  std::unique_ptr<OatFile> oat_file(OatFile::OpenWithElfFile(elf_file.release(), oat_location,
-                                                             nullptr, &error_msg));
-  if (oat_file.get() == nullptr) {
+  std::unique_ptr<const OatFile> oat_file(
+      OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
+  if (oat_file == nullptr) {
     LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
@@ -775,7 +786,7 @@
       dex_files->push_back(std::move(dex_file));
     }
   }
-  Runtime::Current()->GetClassLinker()->RegisterOatFile(oat_file.release());
+  Runtime::Current()->GetOatFileManager().RegisterOatFile(std::move(oat_file));
   return true;
 }
 
@@ -831,6 +842,8 @@
 
   QuasiAtomic::Startup();
 
+  oat_file_manager_ = new OatFileManager;
+
   Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold),
                 runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
 
@@ -880,7 +893,8 @@
   }
 
   zygote_max_failed_boots_ = runtime_options.GetOrDefault(Opt::ZygoteMaxFailedBoots);
-  experimental_lambdas_ = runtime_options.GetOrDefault(Opt::ExperimentalLambdas);
+  experimental_flags_ = runtime_options.GetOrDefault(Opt::Experimental);
+  is_low_memory_mode_ = runtime_options.Exists(Opt::LowMemoryMode);
 
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   ATRACE_BEGIN("CreateHeap");
@@ -1426,6 +1440,7 @@
     // Guaranteed to have no new roots in the constant roots.
     VisitConstantRoots(visitor);
   }
+  Dbg::VisitRoots(visitor);
 }
 
 void Runtime::VisitTransactionRoots(RootVisitor* visitor) {
@@ -1798,4 +1813,12 @@
       : new LinearAlloc(arena_pool_.get());
 }
 
+double Runtime::GetHashTableMinLoadFactor() const {
+  return is_low_memory_mode_ ? kLowMemoryMinLoadFactor : kNormalMinLoadFactor;
+}
+
+double Runtime::GetHashTableMaxLoadFactor() const {
+  return is_low_memory_mode_ ? kLowMemoryMaxLoadFactor : kNormalMaxLoadFactor;
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6154c34..7b1fdb2 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -28,6 +28,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/macros.h"
+#include "experimental_flags.h"
 #include "gc_root.h"
 #include "instrumentation.h"
 #include "jobject_comparator.h"
@@ -82,6 +83,7 @@
 class MonitorList;
 class MonitorPool;
 class NullPointerHandler;
+class OatFileManager;
 class SignalCatcher;
 class StackOverflowHandler;
 class SuspensionHandler;
@@ -531,8 +533,8 @@
     return zygote_max_failed_boots_;
   }
 
-  bool AreExperimentalLambdasEnabled() const {
-    return experimental_lambdas_;
+  bool AreExperimentalFlagsEnabled(ExperimentalFlags flags) {
+    return (experimental_flags_ & flags) != ExperimentalFlags::kNone;
   }
 
   lambda::BoxTable* GetLambdaBoxTable() const {
@@ -573,6 +575,14 @@
   // Create a normal LinearAlloc or low 4gb version if we are 64 bit AOT compiler.
   LinearAlloc* CreateLinearAlloc();
 
+  OatFileManager& GetOatFileManager() const {
+    DCHECK(oat_file_manager_ != nullptr);
+    return *oat_file_manager_;
+  }
+
+  double GetHashTableMinLoadFactor() const;
+  double GetHashTableMaxLoadFactor() const;
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -763,13 +773,19 @@
   // eventually publish them as public-usable opcodes, but they aren't ready yet.
   //
   // Experimental opcodes should not be used by other production code.
-  bool experimental_lambdas_;
+  ExperimentalFlags experimental_flags_;
 
   MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
 
   // Contains the build fingerprint, if given as a parameter.
   std::string fingerprint_;
 
+  // Oat file manager, keeps track of what oat files are open.
+  OatFileManager* oat_file_manager_;
+
+  // Whether or not we are on a low RAM device.
+  bool is_low_memory_mode_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index f0b3c4e..122dcb1 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -41,7 +41,7 @@
  public:
   explicit Backtrace(void* raw_context) : raw_context_(raw_context) {}
   void Dump(std::ostream& os) const {
-    DumpNativeStack(os, GetTid(), "\t", nullptr, raw_context_);
+    DumpNativeStack(os, GetTid(), nullptr, "\t", nullptr, raw_context_);
   }
  private:
   // Stores the context of the signal that was unexpected and will terminate the runtime. The
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index d88e84b..7b5bc1a 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -114,7 +114,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
-RUNTIME_OPTIONS_KEY (bool,                ExperimentalLambdas,            false) // -X[no]experimental-lambdas
+RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{, lambdas, default-methods}
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 1d21a64..d7edfad 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -24,10 +24,13 @@
 #include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_quick_method_header.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime.h"
 #include "thread.h"
@@ -102,6 +105,7 @@
       cur_shadow_frame_(nullptr),
       cur_quick_frame_(nullptr),
       cur_quick_frame_pc_(0),
+      cur_oat_quick_method_header_(nullptr),
       num_frames_(num_frames),
       cur_depth_(0),
       current_inlining_depth_(0),
@@ -110,9 +114,9 @@
 }
 
 InlineInfo StackVisitor::GetCurrentInlineInfo() const {
-  ArtMethod* outer_method = GetOuterMethod();
-  uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
-  CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  uint32_t native_pc_offset = method_header->NativeQuickPcOffset(cur_quick_frame_pc_);
+  CodeInfo code_info = method_header->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
   DCHECK(stack_map.IsValid());
@@ -141,8 +145,11 @@
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
       return GetCurrentInlineInfo().GetDexPcAtDepth(depth_in_stack_map);
+    } else if (cur_oat_quick_method_header_ == nullptr) {
+      return DexFile::kDexNoIndex;
     } else {
-      return GetMethod()->ToDexPc(cur_quick_frame_pc_, abort_on_failure);
+      return cur_oat_quick_method_header_->ToDexPc(
+          GetMethod(), cur_quick_frame_pc_, abort_on_failure);
     }
   } else {
     return 0;
@@ -160,7 +167,7 @@
   } else if (m->IsNative()) {
     if (cur_quick_frame_ != nullptr) {
       HandleScope* hs = reinterpret_cast<HandleScope*>(
-          reinterpret_cast<char*>(cur_quick_frame_) + m->GetHandleScopeOffset().SizeValue());
+          reinterpret_cast<char*>(cur_quick_frame_) + sizeof(ArtMethod*));
       return hs->GetReference(0);
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
@@ -190,7 +197,7 @@
 
 size_t StackVisitor::GetNativePcOffset() const {
   DCHECK(!IsShadowFrame());
-  return GetMethod()->NativeQuickPcOffset(cur_quick_frame_pc_);
+  return GetCurrentOatQuickMethodHeader()->NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
 bool StackVisitor::IsReferenceVReg(ArtMethod* m, uint16_t vreg) {
@@ -199,10 +206,11 @@
   if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
     return false;
   }
-  if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  if (method_header->IsOptimized()) {
     return true;  // TODO: Implement.
   }
-  const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+  const uint8_t* native_gc_map = method_header->GetNativeGcMap();
   CHECK(native_gc_map != nullptr) << PrettyMethod(m);
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   // Can't be null or how would we compile its instructions?
@@ -211,9 +219,7 @@
   size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
   const uint8_t* reg_bitmap = nullptr;
   if (num_regs > 0) {
-    Runtime* runtime = Runtime::Current();
-    const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
-    uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+    uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
     reg_bitmap = map.FindBitMap(native_pc_offset);
     DCHECK(reg_bitmap != nullptr);
   }
@@ -252,7 +258,7 @@
     if (GetVRegFromDebuggerShadowFrame(vreg, kind, val)) {
       return true;
     }
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (cur_oat_quick_method_header_->IsOptimized()) {
       return GetVRegFromOptimizedCode(m, vreg, kind, val);
     } else {
       return GetVRegFromQuickCode(m, vreg, kind, val);
@@ -266,10 +272,10 @@
 
 bool StackVisitor::GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                         uint32_t* val) const {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  DCHECK_EQ(m, GetMethod());
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
+  const VmapTable vmap_table(method_header->GetVmapTable());
   uint32_t vmap_offset;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
@@ -289,19 +295,17 @@
 
 bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                             uint32_t* val) const {
-  ArtMethod* outer_method = GetOuterMethod();
-  const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
   DCHECK_EQ(m, GetMethod());
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
                                                     // its instructions?
   uint16_t number_of_dex_registers = code_item->registers_size_;
   DCHECK_LT(vreg, code_item->registers_size_);
-  CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  CodeInfo code_info = method_header->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
-  uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
+  uint32_t native_pc_offset = method_header->NativeQuickPcOffset(cur_quick_frame_pc_);
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
   DCHECK(stack_map.IsValid());
   size_t depth_in_stack_map = current_inlining_depth_ - 1;
@@ -406,7 +410,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (cur_oat_quick_method_header_->IsOptimized()) {
       return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
     } else {
       return GetVRegPairFromQuickCode(m, vreg, kind_lo, kind_hi, val);
@@ -420,10 +424,10 @@
 
 bool StackVisitor::GetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind kind_lo,
                                             VRegKind kind_hi, uint64_t* val) const {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  DCHECK_EQ(m, GetMethod());
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
+  const VmapTable vmap_table(method_header->GetVmapTable());
   uint32_t vmap_offset_lo, vmap_offset_hi;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
@@ -482,7 +486,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (cur_oat_quick_method_header_->IsOptimized()) {
       return false;
     } else {
       return SetVRegFromQuickCode(m, vreg, new_value, kind);
@@ -497,10 +501,9 @@
                                         VRegKind kind) {
   DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
   DCHECK(m == GetMethod());
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
+  const VmapTable vmap_table(method_header->GetVmapTable());
   uint32_t vmap_offset;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
@@ -591,7 +594,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
+    if (cur_oat_quick_method_header_->IsOptimized()) {
       return false;
     } else {
       return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
@@ -605,10 +608,10 @@
 
 bool StackVisitor::SetVRegPairFromQuickCode(
     ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-  QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+  DCHECK_EQ(m, GetMethod());
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
+  const VmapTable vmap_table(method_header->GetVmapTable());
   uint32_t vmap_offset_lo, vmap_offset_hi;
   // TODO: IsInContext stops before spotting floating point registers.
   if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
@@ -725,14 +728,14 @@
 uintptr_t StackVisitor::GetReturnPc() const {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   DCHECK(sp != nullptr);
-  uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue();
+  uint8_t* pc_addr = sp + GetCurrentQuickFrameInfo().GetReturnPcOffset();
   return *reinterpret_cast<uintptr_t*>(pc_addr);
 }
 
 void StackVisitor::SetReturnPc(uintptr_t new_ret_pc) {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   CHECK(sp != nullptr);
-  uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue();
+  uint8_t* pc_addr = sp + GetCurrentQuickFrameInfo().GetReturnPcOffset();
   *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc;
 }
 
@@ -829,6 +832,43 @@
   return thread->GetInstrumentationStack()->at(depth);
 }
 
+static void AssertPcIsWithinQuickCode(ArtMethod* method, uintptr_t pc)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (method->IsNative() || method->IsRuntimeMethod() || method->IsProxyMethod()) {
+    return;
+  }
+
+  if (pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
+    return;
+  }
+
+  const void* code = method->GetEntryPointFromQuickCompiledCode();
+  if (code == GetQuickInstrumentationEntryPoint()) {
+    return;
+  }
+
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  if (class_linker->IsQuickToInterpreterBridge(code) ||
+      class_linker->IsQuickResolutionStub(code)) {
+    return;
+  }
+
+  // If we are the JIT then we may have just compiled the method after the
+  // IsQuickToInterpreterBridge check.
+  jit::Jit* const jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) {
+    return;
+  }
+
+  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
+  uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
+  CHECK(code_start <= pc && pc <= (code_start + code_size))
+      << PrettyMethod(method)
+      << " pc=" << std::hex << pc
+      << " code=" << code
+      << " size=" << code_size;
+}
+
 void StackVisitor::SanityCheckFrame() const {
   if (kIsDebugBuild) {
     ArtMethod* method = GetMethod();
@@ -867,9 +907,9 @@
       }
     }
     if (cur_quick_frame_ != nullptr) {
-      method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
+      AssertPcIsWithinQuickCode(method, cur_quick_frame_pc_);
       // Frame sanity.
-      size_t frame_size = method->GetFrameSizeInBytes();
+      size_t frame_size = GetCurrentQuickFrameInfo().FrameSizeInBytes();
       CHECK_NE(frame_size, 0u);
       // A rough guess at an upper size we expect to see for a frame.
       // 256 registers
@@ -879,37 +919,101 @@
       // TODO: 083-compiler-regressions ManyFloatArgs shows this estimate is wrong.
       // const size_t kMaxExpectedFrameSize = (256 + 2 + 3 + 3) * sizeof(word);
       const size_t kMaxExpectedFrameSize = 2 * KB;
-      CHECK_LE(frame_size, kMaxExpectedFrameSize);
-      size_t return_pc_offset = method->GetReturnPcOffset().SizeValue();
+      CHECK_LE(frame_size, kMaxExpectedFrameSize) << PrettyMethod(method);
+      size_t return_pc_offset = GetCurrentQuickFrameInfo().GetReturnPcOffset();
       CHECK_LT(return_pc_offset, frame_size);
     }
   }
 }
 
+// Counts the number of references in the parameter list of the corresponding method.
+// Note: Thus does _not_ include "this" for non-static methods.
+static uint32_t GetNumberOfReferenceArgsWithoutReceiver(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint32_t shorty_len;
+  const char* shorty = method->GetShorty(&shorty_len);
+  uint32_t refs = 0;
+  for (uint32_t i = 1; i < shorty_len ; ++i) {
+    if (shorty[i] == 'L') {
+      refs++;
+    }
+  }
+  return refs;
+}
+
+QuickMethodFrameInfo StackVisitor::GetCurrentQuickFrameInfo() const {
+  if (cur_oat_quick_method_header_ != nullptr) {
+    return cur_oat_quick_method_header_->GetFrameInfo();
+  }
+
+  ArtMethod* method = GetMethod();
+  Runtime* runtime = Runtime::Current();
+
+  if (method->IsAbstract()) {
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  }
+
+  // This goes before IsProxyMethod since runtime methods have a null declaring class.
+  if (method->IsRuntimeMethod()) {
+    return runtime->GetRuntimeMethodFrameInfo(method);
+  }
+
+  if (method->IsProxyMethod()) {
+    // There is only one direct method of a proxy class: the constructor. A direct method is
+    // cloned from the original java.lang.reflect.Proxy and is executed as usual quick
+    // compiled method without any stubs. Therefore the method must have a OatQuickMethodHeader.
+    DCHECK(!method->IsDirect() && !method->IsConstructor())
+        << "Constructors of proxy classes must have a OatQuickMethodHeader";
+    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+  }
+
+  // The only remaining case is if the method is native and uses the generic JNI stub.
+  DCHECK(method->IsNative());
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method, sizeof(void*));
+  DCHECK(class_linker->IsQuickGenericJniStub(entry_point)) << PrettyMethod(method);
+  // Generic JNI frame.
+  uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
+  size_t scope_size = HandleScope::SizeOf(handle_refs);
+  QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
+
+  // Callee saves + handle scope + method ref + alignment
+  // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
+  size_t frame_size = RoundUp(
+      callee_info.FrameSizeInBytes() - sizeof(void*) + sizeof(ArtMethod*) + scope_size,
+      kStackAlignment);
+  return QuickMethodFrameInfo(frame_size, callee_info.CoreSpillMask(), callee_info.FpSpillMask());
+}
+
 void StackVisitor::WalkStack(bool include_transitions) {
   DCHECK(thread_ == Thread::Current() || thread_->IsSuspended());
   CHECK_EQ(cur_depth_, 0U);
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
+  size_t inlined_frames_count = 0;
 
   for (const ManagedStack* current_fragment = thread_->GetManagedStack();
        current_fragment != nullptr; current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = 0;
+    cur_oat_quick_method_header_ = nullptr;
 
     if (cur_quick_frame_ != nullptr) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == nullptr);
       ArtMethod* method = *cur_quick_frame_;
       while (method != nullptr) {
+        cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_);
         SanityCheckFrame();
 
         if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
-            && method->IsOptimized(sizeof(void*))) {
-          CodeInfo code_info = method->GetOptimizedCodeInfo();
+            && (cur_oat_quick_method_header_ != nullptr)
+            && cur_oat_quick_method_header_->IsOptimized()) {
+          CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo();
           StackMapEncoding encoding = code_info.ExtractEncoding();
-          uint32_t native_pc_offset = method->NativeQuickPcOffset(cur_quick_frame_pc_);
+          uint32_t native_pc_offset =
+              cur_oat_quick_method_header_->NativeQuickPcOffset(cur_quick_frame_pc_);
           StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
           if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding)) {
             InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
@@ -922,6 +1026,7 @@
                 return;
               }
               cur_depth_++;
+              inlined_frames_count++;
             }
           }
         }
@@ -931,14 +1036,16 @@
           return;
         }
 
+        QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
         if (context_ != nullptr) {
-          context_->FillCalleeSaves(*this);
+          context_->FillCalleeSaves(reinterpret_cast<uint8_t*>(cur_quick_frame_), frame_info);
         }
-        size_t frame_size = method->GetFrameSizeInBytes();
         // Compute PC for next stack frame from return PC.
-        size_t return_pc_offset = method->GetReturnPcOffset(frame_size).SizeValue();
+        size_t frame_size = frame_info.FrameSizeInBytes();
+        size_t return_pc_offset = frame_size - sizeof(void*);
         uint8_t* return_pc_addr = reinterpret_cast<uint8_t*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
+
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -952,28 +1059,35 @@
               ArtMethod* callee = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
               CHECK_EQ(GetMethod(), callee) << "Expected: " << PrettyMethod(callee) << " Found: "
                                             << PrettyMethod(GetMethod());
-            } else if (instrumentation_frame.method_ != GetMethod()) {
-              LOG(FATAL)  << "Expected: " << PrettyMethod(instrumentation_frame.method_)
-                          << " Found: " << PrettyMethod(GetMethod());
+            } else {
+              CHECK_EQ(instrumentation_frame.method_, GetMethod())
+                  << "Expected: " << PrettyMethod(instrumentation_frame.method_)
+                  << " Found: " << PrettyMethod(GetMethod());
             }
             if (num_frames_ != 0) {
               // Check agreement of frame Ids only if num_frames_ is computed to avoid infinite
               // recursion.
-              CHECK(instrumentation_frame.frame_id_ == GetFrameId())
-                    << "Expected: " << instrumentation_frame.frame_id_
-                    << " Found: " << GetFrameId();
+              size_t frame_id = instrumentation::Instrumentation::ComputeFrameId(
+                  thread_,
+                  cur_depth_,
+                  inlined_frames_count);
+              CHECK_EQ(instrumentation_frame.frame_id_, frame_id);
             }
             return_pc = instrumentation_frame.return_pc_;
           }
         }
+
         cur_quick_frame_pc_ = return_pc;
         uint8_t* next_frame = reinterpret_cast<uint8_t*>(cur_quick_frame_) + frame_size;
         cur_quick_frame_ = reinterpret_cast<ArtMethod**>(next_frame);
 
         if (kDebugStackWalk) {
           LOG(INFO) << PrettyMethod(method) << "@" << method << " size=" << frame_size
-              << " optimized=" << method->IsOptimized(sizeof(void*))
+              << std::boolalpha
+              << " optimized=" << (cur_oat_quick_method_header_ != nullptr &&
+                                   cur_oat_quick_method_header_->IsOptimized())
               << " native=" << method->IsNative()
+              << std::noboolalpha
               << " entrypoints=" << method->GetEntryPointFromQuickCompiledCode()
               << "," << method->GetEntryPointFromJni()
               << " next=" << *cur_quick_frame_;
@@ -1051,4 +1165,87 @@
   }
 }
 
+void LockCountData::AddMonitorInternal(Thread* self, mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+
+  // If there's an error during enter, we won't have locked the monitor. So check there's no
+  // exception.
+  if (self->IsExceptionPending()) {
+    return;
+  }
+
+  if (monitors_ == nullptr) {
+    monitors_.reset(new std::vector<mirror::Object*>());
+  }
+  monitors_->push_back(obj);
+}
+
+void LockCountData::RemoveMonitorInternal(Thread* self, const mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+  bool found_object = false;
+  if (monitors_ != nullptr) {
+    // We need to remove one pointer to ref, as duplicates are used for counting recursive locks.
+    // We arbitrarily choose the first one.
+    auto it = std::find(monitors_->begin(), monitors_->end(), obj);
+    if (it != monitors_->end()) {
+      monitors_->erase(it);
+      found_object = true;
+    }
+  }
+  if (!found_object) {
+    // The object wasn't found. Time for an IllegalMonitorStateException.
+    // The order here isn't fully clear. Assume that any other pending exception is swallowed.
+    // TODO: Maybe make already pending exception a suppressed exception.
+    self->ClearException();
+    self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
+                             "did not lock monitor on object of type '%s' before unlocking",
+                             PrettyTypeOf(const_cast<mirror::Object*>(obj)).c_str());
+  }
+}
+
+// Helper to unlock a monitor. Must be NO_THREAD_SAFETY_ANALYSIS, as we can't statically show
+// that the object was locked.
+void MonitorExitHelper(Thread* self, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(self != nullptr);
+  DCHECK(obj != nullptr);
+  obj->MonitorExit(self);
+}
+
+bool LockCountData::CheckAllMonitorsReleasedInternal(Thread* self) {
+  DCHECK(self != nullptr);
+  if (monitors_ != nullptr) {
+    if (!monitors_->empty()) {
+      // There may be an exception pending, if the method is terminating abruptly. Clear it.
+      // TODO: Should we add this as a suppressed exception?
+      self->ClearException();
+
+      // OK, there are monitors that are still locked. To enforce structured locking (and avoid
+      // deadlocks) we unlock all of them before we raise the IllegalMonitorState exception.
+      for (mirror::Object* obj : *monitors_) {
+        MonitorExitHelper(self, obj);
+        // If this raised an exception, ignore. TODO: Should we add this as suppressed
+        // exceptions?
+        if (self->IsExceptionPending()) {
+          self->ClearException();
+        }
+      }
+      // Raise an exception, just give the first object as the sample.
+      mirror::Object* first = (*monitors_)[0];
+      self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
+                               "did not unlock monitor on object of type '%s'",
+                               PrettyTypeOf(first).c_str());
+
+      // To make sure this path is not triggered again, clean out the monitors.
+      monitors_->clear();
+
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/stack.h b/runtime/stack.h
index 31acf0e..aa7b616 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -21,9 +21,12 @@
 #include <string>
 
 #include "arch/instruction_set.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "mirror/object_reference.h"
+#include "quick/quick_method_frame_info.h"
 #include "read_barrier.h"
 #include "verify_object.h"
 
@@ -37,6 +40,7 @@
 class Context;
 class HandleScope;
 class InlineInfo;
+class OatQuickMethodHeader;
 class ScopedObjectAccess;
 class ShadowFrame;
 class StackVisitor;
@@ -66,6 +70,72 @@
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
+// The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
+// thread roots).
+// Note: implementation is split so that the call sites may be optimized to no-ops in case no
+//       lock counting is necessary. The actual implementation is in the cc file to avoid
+//       dependencies.
+class LockCountData {
+ public:
+  // Add the given object to the list of monitors, that is, objects that have been locked. This
+  // will not throw (but be skipped if there is an exception pending on entry).
+  template <bool kLockCounting>
+  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return;
+    }
+    AddMonitorInternal(self, obj);
+  }
+
+  // Try to remove the given object from the monitor list, indicating an unlock operation.
+  // This will throw an IllegalMonitorStateException (clearing any already pending exception), in
+  // case that there wasn't a lock recorded for the object.
+  template <bool kLockCounting>
+  void RemoveMonitorOrThrow(Thread* self,
+                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return;
+    }
+    RemoveMonitorInternal(self, obj);
+  }
+
+  // Check whether all acquired monitors have been released. This will potentially throw an
+  // IllegalMonitorStateException, clearing any already pending exception. Returns true if the
+  // check shows that everything is OK wrt/ lock counting, false otherwise.
+  template <bool kLockCounting>
+  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return true;
+    }
+    return CheckAllMonitorsReleasedInternal(self);
+  }
+
+  template <typename T, typename... Args>
+  void VisitMonitors(T visitor, Args&&... args) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (monitors_ != nullptr) {
+      // Visitors may change the Object*. Be careful with the foreach loop.
+      for (mirror::Object*& obj : *monitors_) {
+        visitor(/* inout */ &obj, std::forward<Args>(args)...);
+      }
+    }
+  }
+
+ private:
+  // Internal implementations.
+  void AddMonitorInternal(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+  void RemoveMonitorInternal(Thread* self, const mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool CheckAllMonitorsReleasedInternal(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Stores references to the locked-on objects. As noted, this should be visited during thread
+  // marking.
+  std::unique_ptr<std::vector<mirror::Object*>> monitors_;
+};
+
 // ShadowFrame has 2 possible layouts:
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
@@ -272,6 +342,10 @@
     }
   }
 
+  LockCountData& GetLockCountData() {
+    return lock_count_data_;
+  }
+
   static size_t LinkOffset() {
     return OFFSETOF_MEMBER(ShadowFrame, link_);
   }
@@ -330,6 +404,7 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   uint32_t dex_pc_;
+  LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
@@ -458,6 +533,9 @@
   StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  public:
   virtual ~StackVisitor() {}
 
@@ -487,18 +565,6 @@
 
   size_t GetNativePcOffset() const SHARED_REQUIRES(Locks::mutator_lock_);
 
-  uintptr_t* CalleeSaveAddress(int num, size_t frame_size) const
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    // Callee saves are held at the top of the frame
-    DCHECK(GetMethod() != nullptr);
-    uint8_t* save_addr =
-        reinterpret_cast<uint8_t*>(cur_quick_frame_) + frame_size - ((num + 1) * sizeof(void*));
-#if defined(__i386__) || defined(__x86_64__)
-    save_addr -= sizeof(void*);  // account for return address
-#endif
-    return reinterpret_cast<uintptr_t*>(save_addr);
-  }
-
   // Returns the height of the stack in the managed stack frames, including transitions.
   size_t GetFrameHeight() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetNumFrames() - cur_depth_ - 1;
@@ -632,6 +698,10 @@
     return current_inlining_depth_ != 0;
   }
 
+  size_t GetCurrentInliningDepth() const {
+    return current_inlining_depth_;
+  }
+
   uintptr_t GetCurrentQuickFramePc() const {
     return cur_quick_frame_pc_;
   }
@@ -644,6 +714,10 @@
     return cur_shadow_frame_;
   }
 
+  bool IsCurrentFrameInInterpreter() const {
+    return cur_shadow_frame_ != nullptr;
+  }
+
   HandleScope* GetCurrentHandleScope(size_t pointer_size) const {
     ArtMethod** sp = GetCurrentQuickFrame();
     // Skip ArtMethod*; handle scope comes next;
@@ -657,6 +731,12 @@
 
   static void DescribeStack(Thread* thread) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  const OatQuickMethodHeader* GetCurrentOatQuickMethodHeader() const {
+    return cur_oat_quick_method_header_;
+  }
+
+  QuickMethodFrameInfo GetCurrentQuickFrameInfo() const SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Private constructor known in the case that num_frames_ has already been computed.
   StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind, size_t num_frames)
@@ -694,8 +774,6 @@
   bool GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                 uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool GetVRegPairFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind_lo, VRegKind kind_hi,
                                           uint64_t* val) const
@@ -733,6 +811,7 @@
   ShadowFrame* cur_shadow_frame_;
   ArtMethod** cur_quick_frame_;
   uintptr_t cur_quick_frame_pc_;
+  const OatQuickMethodHeader* cur_oat_quick_method_header_;
   // Lazily computed, number of frames in the stack.
   size_t num_frames_;
   // Depth of the frame we're currently at.
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index a9da51b..ac04c3b 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -19,6 +19,8 @@
 
 #include <iterator>
 
+#include "base/logging.h"
+
 namespace art {
 
 template<typename T>
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 65f71ef..b0cf418 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -57,6 +57,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "quick_exception_handler.h"
 #include "quick/quick_method_frame_info.h"
@@ -66,6 +67,7 @@
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "stack.h"
+#include "stack_map.h"
 #include "thread_list.h"
 #include "thread-inl.h"
 #include "utils.h"
@@ -106,19 +108,17 @@
   UNIMPLEMENTED(FATAL);
 }
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints);
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints);
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
-  uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.interpreter_entrypoints);
+  uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
   uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
       sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
-  InitEntryPoints(&tlsPtr_.interpreter_entrypoints, &tlsPtr_.jni_entrypoints,
-                  &tlsPtr_.quick_entrypoints);
+  InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints);
 }
 
 void Thread::InitStringEntryPoints() {
@@ -732,6 +732,18 @@
   // a native peer!
   if (create_peer) {
     self->CreatePeer(thread_name, as_daemon, thread_group);
+    if (self->IsExceptionPending()) {
+      // We cannot keep the exception around, as we're deleting self. Try to be helpful and log it.
+      {
+        ScopedObjectAccess soa(self);
+        LOG(ERROR) << "Exception creating thread peer:";
+        LOG(ERROR) << self->GetException()->Dump();
+        self->ClearException();
+      }
+      runtime->GetThreadList()->Unregister(self);
+      // Unregister deletes self, no need to do this here.
+      return nullptr;
+    }
   } else {
     // These aren't necessary, but they improve diagnostics for unit tests & command-line tools.
     if (thread_name != nullptr) {
@@ -790,7 +802,9 @@
                                 WellKnownClasses::java_lang_Thread,
                                 WellKnownClasses::java_lang_Thread_init,
                                 thread_group, thread_name.get(), thread_priority, thread_is_daemon);
-  AssertNoPendingException();
+  if (IsExceptionPending()) {
+    return;
+  }
 
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
@@ -905,9 +919,9 @@
      << "]";
 }
 
-void Thread::Dump(std::ostream& os) const {
+void Thread::Dump(std::ostream& os, BacktraceMap* backtrace_map) const {
   DumpState(os);
-  DumpStack(os);
+  DumpStack(os, backtrace_map);
 }
 
 mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
@@ -1466,7 +1480,7 @@
   }
 }
 
-void Thread::DumpStack(std::ostream& os) const {
+void Thread::DumpStack(std::ostream& os, BacktraceMap* backtrace_map) const {
   // TODO: we call this code when dying but may not have suspended the thread ourself. The
   //       IsSuspended check is therefore racy with the use for dumping (normally we inhibit
   //       the race with the thread_suspend_count_lock_).
@@ -1481,7 +1495,8 @@
     // If we're currently in native code, dump that stack before dumping the managed stack.
     if (dump_for_abort || ShouldShowNativeStack(this)) {
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
-      DumpNativeStack(os, GetTid(), "  native: ", GetCurrentMethod(nullptr, !dump_for_abort));
+      ArtMethod* method = GetCurrentMethod(nullptr, !dump_for_abort);
+      DumpNativeStack(os, GetTid(), backtrace_map, "  native: ", method);
     }
     DumpJavaStack(os);
   } else {
@@ -1538,6 +1553,7 @@
   // Finish attaching the main thread.
   ScopedObjectAccess soa(Thread::Current());
   Thread::Current()->CreatePeer("main", false, runtime->GetMainThreadGroup());
+  Thread::Current()->AssertNoPendingException();
 
   Runtime::Current()->GetClassLinker()->RunRootClinits();
 }
@@ -2367,15 +2383,6 @@
   DO_THREAD_OFFSET(ThreadSuspendTriggerOffset<ptr_size>(), "suspend_trigger")
 #undef DO_THREAD_OFFSET
 
-#define INTERPRETER_ENTRY_POINT_INFO(x) \
-    if (INTERPRETER_ENTRYPOINT_OFFSET(ptr_size, x).Uint32Value() == offset) { \
-      os << #x; \
-      return; \
-    }
-  INTERPRETER_ENTRY_POINT_INFO(pInterpreterToInterpreterBridge)
-  INTERPRETER_ENTRY_POINT_INFO(pInterpreterToCompiledCodeBridge)
-#undef INTERPRETER_ENTRY_POINT_INFO
-
 #define JNI_ENTRY_POINT_INFO(x) \
     if (JNI_ENTRYPOINT_OFFSET(ptr_size, x).Uint32Value() == offset) { \
       os << #x; \
@@ -2632,41 +2639,20 @@
     VisitDeclaringClass(m);
     DCHECK(m != nullptr);
     size_t num_regs = shadow_frame->NumberOfVRegs();
-    if (m->IsNative() || shadow_frame->HasReferenceArray()) {
-      // handle scope for JNI or References for interpreter.
-      for (size_t reg = 0; reg < num_regs; ++reg) {
-        mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-        if (ref != nullptr) {
-          mirror::Object* new_ref = ref;
-          visitor_(&new_ref, reg, this);
-          if (new_ref != ref) {
-            shadow_frame->SetVRegReference(reg, new_ref);
-          }
-        }
-      }
-    } else {
-      // Java method.
-      // Portable path use DexGcMap and store in Method.native_gc_map_.
-      const uint8_t* gc_map = m->GetNativeGcMap(sizeof(void*));
-      CHECK(gc_map != nullptr) << PrettyMethod(m);
-      verifier::DexPcToReferenceMap dex_gc_map(gc_map);
-      uint32_t dex_pc = shadow_frame->GetDexPC();
-      const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
-      DCHECK(reg_bitmap != nullptr);
-      num_regs = std::min(dex_gc_map.RegWidth() * 8, num_regs);
-      for (size_t reg = 0; reg < num_regs; ++reg) {
-        if (TestBitmap(reg, reg_bitmap)) {
-          mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-          if (ref != nullptr) {
-            mirror::Object* new_ref = ref;
-            visitor_(&new_ref, reg, this);
-            if (new_ref != ref) {
-              shadow_frame->SetVRegReference(reg, new_ref);
-            }
-          }
+    DCHECK(m->IsNative() || shadow_frame->HasReferenceArray());
+    // handle scope for JNI or References for interpreter.
+    for (size_t reg = 0; reg < num_regs; ++reg) {
+      mirror::Object* ref = shadow_frame->GetVRegReference(reg);
+      if (ref != nullptr) {
+        mirror::Object* new_ref = ref;
+        visitor_(&new_ref, reg, this);
+        if (new_ref != ref) {
+          shadow_frame->SetVRegReference(reg, new_ref);
         }
       }
     }
+    // Mark lock count map required for structured locking checks.
+    shadow_frame->GetLockCountData().VisitMonitors(visitor_, -1, this);
   }
 
  private:
@@ -2692,13 +2678,12 @@
 
     // Process register map (which native and runtime methods don't have)
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
-      if (m->IsOptimized(sizeof(void*))) {
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      if (method_header->IsOptimized()) {
         auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
             reinterpret_cast<uintptr_t>(cur_quick_frame));
-        Runtime* runtime = Runtime::Current();
-        const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
-        uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
-        CodeInfo code_info = m->GetOptimizedCodeInfo();
+        uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
+        CodeInfo code_info = method_header->GetOptimizedCodeInfo();
         StackMapEncoding encoding = code_info.ExtractEncoding();
         StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
         DCHECK(map.IsValid());
@@ -2728,7 +2713,7 @@
           }
         }
       } else {
-        const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+        const uint8_t* native_gc_map = method_header->GetNativeGcMap();
         CHECK(native_gc_map != nullptr) << PrettyMethod(m);
         const DexFile::CodeItem* code_item = m->GetCodeItem();
         // Can't be null or how would we compile its instructions?
@@ -2736,14 +2721,11 @@
         NativePcOffsetToReferenceMap map(native_gc_map);
         size_t num_regs = map.RegWidth() * 8;
         if (num_regs > 0) {
-          Runtime* runtime = Runtime::Current();
-          const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
-          uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+          uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc());
           const uint8_t* reg_bitmap = map.FindBitMap(native_pc_offset);
           DCHECK(reg_bitmap != nullptr);
-          const void* code_pointer = ArtMethod::EntryPointToCodePointer(entry_point);
-          const VmapTable vmap_table(m->GetVmapTable(code_pointer, sizeof(void*)));
-          QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
+          const VmapTable vmap_table(method_header->GetVmapTable());
+          QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
           // For all dex registers in the bitmap
           DCHECK(cur_quick_frame != nullptr);
           for (size_t reg = 0; reg < num_regs; ++reg) {
diff --git a/runtime/thread.h b/runtime/thread.h
index d262c62..138c143 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -30,7 +30,6 @@
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "globals.h"
@@ -43,6 +42,8 @@
 #include "stack.h"
 #include "thread_state.h"
 
+class BacktraceMap;
+
 namespace art {
 
 namespace gc {
@@ -185,7 +186,7 @@
   void ShortDump(std::ostream& os) const;
 
   // Dumps the detailed thread state and the thread stack (used for SIGQUIT).
-  void Dump(std::ostream& os) const
+  void Dump(std::ostream& os, BacktraceMap* backtrace_map = nullptr) const
       REQUIRES(!Locks::thread_suspend_count_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -580,12 +581,6 @@
   }
 
   template<size_t pointer_size>
-  static ThreadOffset<pointer_size> InterpreterEntryPointOffset(size_t interp_entrypoint_offset) {
-    return ThreadOffsetFromTlsPtr<pointer_size>(
-        OFFSETOF_MEMBER(tls_ptr_sized_values, interpreter_entrypoints) + interp_entrypoint_offset);
-  }
-
-  template<size_t pointer_size>
   static ThreadOffset<pointer_size> JniEntryPointOffset(size_t jni_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_entrypoints) + jni_entrypoint_offset);
@@ -633,6 +628,24 @@
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects));
   }
 
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> RosAllocRunsOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                rosalloc_runs));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalAllocStackTopOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_alloc_stack_top));
+  }
+
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> ThreadLocalAllocStackEndOffset() {
+    return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values,
+                                                                thread_local_alloc_stack_end));
+  }
+
   // Size of stack less any space reserved for stack overflow
   size_t GetStackSize() const {
     return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin);
@@ -1031,7 +1044,7 @@
   void VerifyStackImpl() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void DumpState(std::ostream& os) const SHARED_REQUIRES(Locks::mutator_lock_);
-  void DumpStack(std::ostream& os) const
+  void DumpStack(std::ostream& os, BacktraceMap* backtrace_map = nullptr) const
       REQUIRES(!Locks::thread_suspend_count_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1329,7 +1342,6 @@
 
     // Entrypoint function pointers.
     // TODO: move this to more of a global offset table model to avoid per-thread duplication.
-    InterpreterEntryPoints interpreter_entrypoints;
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 6176acd..b09b87f 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -18,6 +18,7 @@
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 
+#include <backtrace/BacktraceMap.h>
 #include <cutils/trace.h>
 #include <dirent.h>
 #include <ScopedLocalRef.h>
@@ -59,8 +60,11 @@
 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
 
 ThreadList::ThreadList()
-    : suspend_all_count_(0), debug_suspend_all_count_(0), unregistering_count_(0),
-      suspend_all_historam_("suspend all histogram", 16, 64), long_suspend_(false) {
+    : suspend_all_count_(0),
+      debug_suspend_all_count_(0),
+      unregistering_count_(0),
+      suspend_all_historam_("suspend all histogram", 16, 64),
+      long_suspend_(false) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
@@ -109,9 +113,10 @@
 
 void ThreadList::DumpNativeStacks(std::ostream& os) {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid()));
   for (const auto& thread : list_) {
     os << "DUMPING THREAD " << thread->GetTid() << "\n";
-    DumpNativeStack(os, thread->GetTid(), "\t");
+    DumpNativeStack(os, thread->GetTid(), map.get(), "\t");
     os << "\n";
   }
 }
@@ -138,7 +143,7 @@
   // TODO: Reenable this when the native code in system_server can handle it.
   // Currently "adb shell kill -3 `pid system_server`" will cause it to exit.
   if (false) {
-    DumpNativeStack(os, tid, "  native: ");
+    DumpNativeStack(os, tid, nullptr, "  native: ");
   }
   os << "\n";
 }
@@ -175,7 +180,8 @@
 // A closure used by Thread::Dump.
 class DumpCheckpoint FINAL : public Closure {
  public:
-  explicit DumpCheckpoint(std::ostream* os) : os_(os), barrier_(0) {}
+  explicit DumpCheckpoint(std::ostream* os)
+      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(GetTid())) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
@@ -184,7 +190,7 @@
     std::ostringstream local_os;
     {
       ScopedObjectAccess soa(self);
-      thread->Dump(local_os);
+      thread->Dump(local_os, backtrace_map_.get());
     }
     local_os << "\n";
     {
@@ -192,9 +198,7 @@
       MutexLock mu(self, *Locks::logging_lock_);
       *os_ << local_os.str();
     }
-    if (thread->GetState() == kRunnable) {
-      barrier_.Pass(self);
-    }
+    barrier_.Pass(self);
   }
 
   void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
@@ -213,6 +217,8 @@
   std::ostream* const os_;
   // The barrier to be passed through and for the requestor to wait upon.
   Barrier barrier_;
+  // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
+  std::unique_ptr<BacktraceMap> backtrace_map_;
 };
 
 void ThreadList::Dump(std::ostream& os) {
@@ -269,9 +275,6 @@
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
-  if (kDebugLocking && gAborting == 0) {
-    CHECK_NE(self->GetState(), kRunnable);
-  }
 
   std::vector<Thread*> suspended_count_modified_threads;
   size_t count = 0;
@@ -280,12 +283,12 @@
     // manually called.
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    count = list_.size();
     for (const auto& thread : list_) {
       if (thread != self) {
         while (true) {
           if (thread->RequestCheckpoint(checkpoint_function)) {
             // This thread will run its checkpoint some time in the near future.
-            count++;
             break;
           } else {
             // We are probably suspended, try to make sure that we stay suspended.
@@ -378,7 +381,8 @@
 // from-space to to-space refs. Used to synchronize threads at a point
 // to mark the initiation of marking while maintaining the to-space
 // invariant.
-size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
+                                   Closure* flip_callback,
                                    gc::collector::GarbageCollector* collector) {
   TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
   const uint64_t start_time = NanoTime();
@@ -506,7 +510,9 @@
 // Debugger thread might be set to kRunnable for a short period of time after the
 // SuspendAllInternal. This is safe because it will be set back to suspended state before
 // the SuspendAll returns.
-void ThreadList::SuspendAllInternal(Thread* self, Thread* ignore1, Thread* ignore2,
+void ThreadList::SuspendAllInternal(Thread* self,
+                                    Thread* ignore1,
+                                    Thread* ignore2,
                                     bool debug_suspend) {
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
@@ -695,12 +701,14 @@
   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
 }
 
-static void ThreadSuspendByPeerWarning(Thread* self, LogSeverity severity, const char* message,
+static void ThreadSuspendByPeerWarning(Thread* self,
+                                       LogSeverity severity,
+                                       const char* message,
                                        jobject peer) {
   JNIEnvExt* env = self->GetJniEnv();
   ScopedLocalRef<jstring>
-      scoped_name_string(env, (jstring)env->GetObjectField(
-          peer, WellKnownClasses::java_lang_Thread_name));
+      scoped_name_string(env, static_cast<jstring>(env->GetObjectField(
+          peer, WellKnownClasses::java_lang_Thread_name)));
   ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
   if (scoped_name_chars.c_str() == nullptr) {
       LOG(severity) << message << ": " << peer;
@@ -710,8 +718,10 @@
   }
 }
 
-Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
-                                        bool debug_suspension, bool* timed_out) {
+Thread* ThreadList::SuspendThreadByPeer(jobject peer,
+                                        bool request_suspension,
+                                        bool debug_suspension,
+                                        bool* timed_out) {
   const uint64_t start_time = NanoTime();
   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
   *timed_out = false;
@@ -808,12 +818,14 @@
   }
 }
 
-static void ThreadSuspendByThreadIdWarning(LogSeverity severity, const char* message,
+static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
+                                           const char* message,
                                            uint32_t thread_id) {
   LOG(severity) << StringPrintf("%s: %d", message, thread_id);
 }
 
-Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
+                                            bool debug_suspension,
                                             bool* timed_out) {
   const uint64_t start_time = NanoTime();
   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
@@ -1217,7 +1229,7 @@
       std::string thread_name;
       self->GetThreadName(thread_name);
       std::ostringstream os;
-      DumpNativeStack(os, GetTid(), "  native: ", nullptr);
+      DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
       LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str();
       break;
     } else {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index c727432..07ea10d 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -55,8 +55,8 @@
 
   // Thread suspension support.
   void ResumeAll()
-      UNLOCK_FUNCTION(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      UNLOCK_FUNCTION(Locks::mutator_lock_);
   void Resume(Thread* thread, bool for_debugger = false)
       REQUIRES(!Locks::thread_suspend_count_lock_);
 
@@ -76,7 +76,8 @@
   // is set to true.
   Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
                               bool* timed_out)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
@@ -84,14 +85,16 @@
   // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
   // thread, that may be terminating. If the suspension times out then *timeout is set to true.
   Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   // Find an already suspended thread (or self) by its id.
   Thread* FindThreadByThreadId(uint32_t thin_lock_id);
 
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
-  // of the suspend check. Returns how many checkpoints we should expect to run.
+  // of the suspend check. Returns how many checkpoints that are expected to run, including for
+  // already suspended threads for b/24191051.
   size_t RunCheckpoint(Closure* checkpoint_function)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
@@ -100,14 +103,17 @@
 
   // Flip thread roots from from-space refs to to-space refs. Used by
   // the concurrent copying collector.
-  size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+  size_t FlipThreadRoots(Closure* thread_flip_visitor,
+                         Closure* flip_callback,
                          gc::collector::GarbageCollector* collector)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   // Suspends all threads
   void SuspendAllForDebugger()
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
   void SuspendSelfForDebugger()
@@ -126,10 +132,14 @@
 
   // Add/remove current thread from list.
   void Register(Thread* self)
-      REQUIRES(Locks::runtime_shutdown_lock_, !Locks::mutator_lock_, !Locks::thread_list_lock_,
+      REQUIRES(Locks::runtime_shutdown_lock_)
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
-  void Unregister(Thread* self) REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
-                                         !Locks::thread_suspend_count_lock_);
+  void Unregister(Thread* self)
+      REQUIRES(!Locks::mutator_lock_,
+               !Locks::thread_list_lock_,
+               !Locks::thread_suspend_count_lock_);
 
   void VisitRoots(RootVisitor* visitor) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -159,7 +169,9 @@
   void WaitForOtherNonDaemonThreadsToExit()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
-  void SuspendAllInternal(Thread* self, Thread* ignore1, Thread* ignore2 = nullptr,
+  void SuspendAllInternal(Thread* self,
+                          Thread* ignore1,
+                          Thread* ignore2 = nullptr,
                           bool debug_suspend = false)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
@@ -200,8 +212,8 @@
               !Locks::mutator_lock_);
   // No REQUIRES(mutator_lock_) since the unlock function already asserts this.
   ~ScopedSuspendAll()
-      UNLOCK_FUNCTION(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
+      UNLOCK_FUNCTION(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 745aa63..ab342aa 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -752,26 +752,31 @@
   }
 }
 
-void Trace::DexPcMoved(Thread* thread, mirror::Object* this_object,
-                       ArtMethod* method, uint32_t new_dex_pc) {
-  UNUSED(thread, this_object, method, new_dex_pc);
+void Trace::DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
+                       mirror::Object* this_object ATTRIBUTE_UNUSED,
+                       ArtMethod* method,
+                       uint32_t new_dex_pc) {
   // We're not recorded to listen to this kind of event, so complain.
   LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
 }
 
-void Trace::FieldRead(Thread* thread, mirror::Object* this_object,
-                       ArtMethod* method, uint32_t dex_pc, ArtField* field)
+void Trace::FieldRead(Thread* thread ATTRIBUTE_UNUSED,
+                      mirror::Object* this_object ATTRIBUTE_UNUSED,
+                      ArtMethod* method,
+                      uint32_t dex_pc,
+                      ArtField* field ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  UNUSED(thread, this_object, method, dex_pc, field);
   // We're not recorded to listen to this kind of event, so complain.
   LOG(ERROR) << "Unexpected field read event in tracing " << PrettyMethod(method) << " " << dex_pc;
 }
 
-void Trace::FieldWritten(Thread* thread, mirror::Object* this_object,
-                          ArtMethod* method, uint32_t dex_pc, ArtField* field,
-                          const JValue& field_value)
+void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
+                         mirror::Object* this_object ATTRIBUTE_UNUSED,
+                         ArtMethod* method,
+                         uint32_t dex_pc,
+                         ArtField* field ATTRIBUTE_UNUSED,
+                         const JValue& field_value ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  UNUSED(thread, this_object, method, dex_pc, field, field_value);
   // We're not recorded to listen to this kind of event, so complain.
   LOG(ERROR) << "Unexpected field write event in tracing " << PrettyMethod(method) << " " << dex_pc;
 }
@@ -804,9 +809,9 @@
                       thread_clock_diff, wall_clock_diff);
 }
 
-void Trace::ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
+void Trace::ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
+                            mirror::Throwable* exception_object ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  UNUSED(thread, exception_object);
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
 
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
new file mode 100644
index 0000000..0d40bb7
--- /dev/null
+++ b/runtime/type_lookup_table.cc
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "type_lookup_table.h"
+
+#include "dex_file-inl.h"
+#include "utf-inl.h"
+#include "utils.h"
+
+#include <memory>
+#include <cstring>
+
+namespace art {
+
+static uint16_t MakeData(uint16_t class_def_idx, uint32_t hash, uint32_t mask) {
+  uint16_t hash_mask = static_cast<uint16_t>(~mask);
+  return (static_cast<uint16_t>(hash) & hash_mask) | class_def_idx;
+}
+
+TypeLookupTable::~TypeLookupTable() {
+  if (!owns_entries_) {
+    // We don't actually own the entries, don't let the unique_ptr release them.
+    entries_.release();
+  }
+}
+
+uint32_t TypeLookupTable::RawDataLength() const {
+  return RawDataLength(dex_file_);
+}
+
+uint32_t TypeLookupTable::RawDataLength(const DexFile& dex_file) {
+  return RoundUpToPowerOfTwo(dex_file.NumClassDefs()) * sizeof(Entry);
+}
+
+TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file) {
+  const uint32_t num_class_defs = dex_file.NumClassDefs();
+  return (num_class_defs == 0 || num_class_defs > std::numeric_limits<uint16_t>::max())
+      ? nullptr
+      : new TypeLookupTable(dex_file);
+}
+
+TypeLookupTable* TypeLookupTable::Open(const uint8_t* raw_data, const DexFile& dex_file) {
+  return new TypeLookupTable(raw_data, dex_file);
+}
+
+TypeLookupTable::TypeLookupTable(const DexFile& dex_file)
+    : dex_file_(dex_file),
+      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
+      entries_(new Entry[mask_ + 1]),
+      owns_entries_(true) {
+  std::vector<uint16_t> conflict_class_defs;
+  // The first stage. Put elements on their initial positions. If an initial position is already
+  // occupied then delay the insertion of the element to the second stage to reduce probing
+  // distance.
+  for (size_t i = 0; i < dex_file.NumClassDefs(); ++i) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def.class_idx_);
+    const DexFile::StringId& str_id = dex_file.GetStringId(type_id.descriptor_idx_);
+    const uint32_t hash = ComputeModifiedUtf8Hash(dex_file.GetStringData(str_id));
+    Entry entry;
+    entry.str_offset = str_id.string_data_off_;
+    entry.data = MakeData(i, hash, GetSizeMask());
+    if (!SetOnInitialPos(entry, hash)) {
+      conflict_class_defs.push_back(i);
+    }
+  }
+  // The second stage. The initial position of these elements had a collision. Put these elements
+  // into the nearest free cells and link them together by updating next_pos_delta.
+  for (uint16_t class_def_idx : conflict_class_defs) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx);
+    const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def.class_idx_);
+    const DexFile::StringId& str_id = dex_file.GetStringId(type_id.descriptor_idx_);
+    const uint32_t hash = ComputeModifiedUtf8Hash(dex_file.GetStringData(str_id));
+    Entry entry;
+    entry.str_offset = str_id.string_data_off_;
+    entry.data = MakeData(class_def_idx, hash, GetSizeMask());
+    Insert(entry, hash);
+  }
+}
+
+TypeLookupTable::TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file)
+    : dex_file_(dex_file),
+      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
+      entries_(reinterpret_cast<Entry*>(const_cast<uint8_t*>(raw_data))),
+      owns_entries_(false) {}
+
+bool TypeLookupTable::SetOnInitialPos(const Entry& entry, uint32_t hash) {
+  const uint32_t pos = hash & GetSizeMask();
+  if (!entries_[pos].IsEmpty()) {
+    return false;
+  }
+  entries_[pos] = entry;
+  entries_[pos].next_pos_delta = 0;
+  return true;
+}
+
+void TypeLookupTable::Insert(const Entry& entry, uint32_t hash) {
+  uint32_t pos = FindLastEntryInBucket(hash & GetSizeMask());
+  uint32_t next_pos = (pos + 1) & GetSizeMask();
+  while (!entries_[next_pos].IsEmpty()) {
+    next_pos = (next_pos + 1) & GetSizeMask();
+  }
+  const uint32_t delta = (next_pos >= pos) ? (next_pos - pos) : (next_pos + Size() - pos);
+  entries_[pos].next_pos_delta = delta;
+  entries_[next_pos] = entry;
+  entries_[next_pos].next_pos_delta = 0;
+}
+
+uint32_t TypeLookupTable::FindLastEntryInBucket(uint32_t pos) const {
+  const Entry* entry = &entries_[pos];
+  while (!entry->IsLast()) {
+    pos = (pos + entry->next_pos_delta) & GetSizeMask();
+    entry = &entries_[pos];
+  }
+  return pos;
+}
+
+}  // namespace art
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
new file mode 100644
index 0000000..3c2295c
--- /dev/null
+++ b/runtime/type_lookup_table.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_TYPE_LOOKUP_TABLE_H_
+#define ART_RUNTIME_TYPE_LOOKUP_TABLE_H_
+
+#include "dex_file.h"
+#include "leb128.h"
+#include "utf.h"
+
+namespace art {
+
+/**
+ * TypeLookupTable used to find class_def_idx by class descriptor quickly.
+ * Implementation of TypeLookupTable is based on hash table.
+ * This class instantiated at compile time by calling Create() method and written into OAT file.
+ * At runtime, the raw data is read from memory-mapped file by calling Open() method. The table
+ * memory remains clean.
+ */
+class TypeLookupTable {
+ public:
+  ~TypeLookupTable();
+
+  // Return the number of buckets in the lookup table.
+  uint32_t Size() const {
+    return mask_ + 1;
+  }
+
+  // Method search class_def_idx by class descriptor and it's hash.
+  // If no data found then the method returns DexFile::kDexNoIndex
+  ALWAYS_INLINE uint32_t Lookup(const char* str, uint32_t hash) const {
+    uint32_t pos = hash & GetSizeMask();
+    // Thanks to special insertion algorithm, the element at position pos can be empty or start of
+    // bucket.
+    const Entry* entry = &entries_[pos];
+    while (!entry->IsEmpty()) {
+      if (CmpHashBits(entry->data, hash) && IsStringsEquals(str, entry->str_offset)) {
+        return GetClassDefIdx(entry->data);
+      }
+      if (entry->IsLast()) {
+        return DexFile::kDexNoIndex;
+      }
+      pos = (pos + entry->next_pos_delta) & GetSizeMask();
+      entry = &entries_[pos];
+    }
+    return DexFile::kDexNoIndex;
+  }
+
+  // Method creates lookup table for dex file
+  static TypeLookupTable* Create(const DexFile& dex_file);
+
+  // Method opens lookup table from binary data. Lookup table does not owns binary data.
+  static TypeLookupTable* Open(const uint8_t* raw_data, const DexFile& dex_file);
+
+  // Method returns pointer to binary data of lookup table. Used by the oat writer.
+  const uint8_t* RawData() const {
+    return reinterpret_cast<const uint8_t*>(entries_.get());
+  }
+
+  // Method returns length of binary data. Used by the oat writer.
+  uint32_t RawDataLength() const;
+
+  // Method returns length of binary data for the specified dex file.
+  static uint32_t RawDataLength(const DexFile& dex_file);
+
+ private:
+   /**
+    * To find element we need to compare strings.
+    * It is faster to compare first hashes and then strings itself.
+    * But we have no full hash of element of table. But we can use 2 ideas.
+    * 1. All minor bits of hash inside one bucket are equals.
+    * 2. If dex file contains N classes and size of hash table is 2^n (where N <= 2^n)
+    *    then 16-n bits are free. So we can encode part of element's hash into these bits.
+    * So hash of element can be divided on three parts:
+    * XXXX XXXX XXXX YYYY YZZZ ZZZZ ZZZZZ
+    * Z - a part of hash encoded in bucket (these bits of has are same for all elements in bucket) -
+    * n bits
+    * Y - a part of hash that we can write into free 16-n bits (because only n bits used to store
+    * class_def_idx)
+    * X - a part of has that we can't use without increasing increase
+    * So the data element of Entry used to store class_def_idx and part of hash of the entry.
+    */
+  struct Entry {
+    uint32_t str_offset;
+    uint16_t data;
+    uint16_t next_pos_delta;
+
+    Entry() : str_offset(0), data(0), next_pos_delta(0) {}
+
+    bool IsEmpty() const {
+      return str_offset == 0;
+    }
+
+    bool IsLast() const {
+      return next_pos_delta == 0;
+    }
+  };
+
+  // Construct from a dex file.
+  explicit TypeLookupTable(const DexFile& dex_file);
+
+  // Construct from a dex file with existing data.
+  TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file);
+
+  bool IsStringsEquals(const char* str, uint32_t str_offset) const {
+    const uint8_t* ptr = dex_file_.Begin() + str_offset;
+    // Skip string length.
+    DecodeUnsignedLeb128(&ptr);
+    return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
+        str, reinterpret_cast<const char*>(ptr)) == 0;
+  }
+
+  // Method extracts hash bits from element's data and compare them with
+  // the corresponding bits of the specified hash
+  bool CmpHashBits(uint32_t data, uint32_t hash) const {
+    uint32_t mask = static_cast<uint16_t>(~GetSizeMask());
+    return (hash & mask) == (data & mask);
+  }
+
+  uint32_t GetClassDefIdx(uint32_t data) const {
+    return data & mask_;
+  }
+
+  uint32_t GetSizeMask() const {
+    return mask_;
+  }
+
+  // Attempt to set an entry on it's hash' slot. If there is alrady something there, return false.
+  // Otherwise return true.
+  bool SetOnInitialPos(const Entry& entry, uint32_t hash);
+
+  // Insert an entry, probes until there is an empty slot.
+  void Insert(const Entry& entry, uint32_t hash);
+
+  // Find the last entry in a chain.
+  uint32_t FindLastEntryInBucket(uint32_t cur_pos) const;
+
+  const DexFile& dex_file_;
+  const uint32_t mask_;
+  std::unique_ptr<Entry[]> entries_;
+  // owns_entries_ specifies if the lookup table owns the entries_ array.
+  const bool owns_entries_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(TypeLookupTable);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_TYPE_LOOKUP_TABLE_H_
diff --git a/runtime/type_lookup_table_test.cc b/runtime/type_lookup_table_test.cc
new file mode 100644
index 0000000..7f500cc
--- /dev/null
+++ b/runtime/type_lookup_table_test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <memory>
+
+#include "common_runtime_test.h"
+#include "dex_file-inl.h"
+#include "scoped_thread_state_change.h"
+#include "type_lookup_table.h"
+#include "utf-inl.h"
+
+namespace art {
+
+class TypeLookupTableTest : public CommonRuntimeTest {
+ public:
+  size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
+};
+
+TEST_F(TypeLookupTableTest, CreateLookupTable) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  ASSERT_NE(nullptr, table->RawData());
+  ASSERT_EQ(32U, table->RawDataLength());
+}
+
+TEST_F(TypeLookupTableTest, FindNonExistingClassWithoutCollisions) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  const char* descriptor = "LBA;";
+  size_t hash = ComputeModifiedUtf8Hash(descriptor);
+  uint32_t class_def_idx = table->Lookup(descriptor, hash);
+  ASSERT_EQ(kDexNoIndex, class_def_idx);
+}
+
+TEST_F(TypeLookupTableTest, FindNonExistingClassWithCollisions) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  const char* descriptor = "LDA;";
+  size_t hash = ComputeModifiedUtf8Hash(descriptor);
+  uint32_t class_def_idx = table->Lookup(descriptor, hash);
+  ASSERT_EQ(kDexNoIndex, class_def_idx);
+}
+
+TEST_F(TypeLookupTableTest, FindClassNoCollisions) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  const char* descriptor = "LC;";
+  size_t hash = ComputeModifiedUtf8Hash(descriptor);
+  uint32_t class_def_idx = table->Lookup(descriptor, hash);
+  ASSERT_EQ(2U, class_def_idx);
+}
+
+TEST_F(TypeLookupTableTest, FindClassWithCollisions) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex_file(OpenTestDexFile("Lookup"));
+  std::unique_ptr<TypeLookupTable> table(TypeLookupTable::Create(*dex_file));
+  ASSERT_NE(nullptr, table.get());
+  const char* descriptor = "LAB;";
+  size_t hash = ComputeModifiedUtf8Hash(descriptor);
+  uint32_t class_def_idx = table->Lookup(descriptor, hash);
+  ASSERT_EQ(1U, class_def_idx);
+}
+
+}  // namespace art
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 27dacea..48dce63 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -36,6 +36,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string.h"
+#include "oat_quick_method_header.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "utf-inl.h"
@@ -45,7 +46,9 @@
 #include <sys/syscall.h>
 #endif
 
-#include <backtrace/Backtrace.h>  // For DumpNativeStack.
+// For DumpNativeStack.
+#include <backtrace/Backtrace.h>
+#include <backtrace/BacktraceMap.h>
 
 #if defined(__linux__)
 #include <linux/unistd.h>
@@ -1089,9 +1092,19 @@
                                    map_src.c_str(), offset));
   RunCommand(cmdline.c_str(), &os, prefix);
 }
+
+static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
+  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
+      method->GetEntryPointFromQuickCompiledCode()));
+  if (code == 0) {
+    return pc == 0;
+  }
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return code <= pc && pc <= (code + code_size);
+}
 #endif
 
-void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix,
+void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
     ArtMethod* current_method, void* ucontext_ptr) {
 #if __linux__
   // b/18119146
@@ -1099,7 +1112,13 @@
     return;
   }
 
-  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid));
+  BacktraceMap* map = existing_map;
+  std::unique_ptr<BacktraceMap> tmp_map;
+  if (map == nullptr) {
+    tmp_map.reset(BacktraceMap::Create(tid));
+    map = tmp_map.get();
+  }
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
   if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
     os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
     return;
@@ -1146,9 +1165,9 @@
           os << "+" << it->func_offset;
         }
         try_addr2line = true;
-      } else if (
-          current_method != nullptr && Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
-          current_method->PcIsWithinQuickCode(it->pc)) {
+      } else if (current_method != nullptr &&
+          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
+          PcIsWithinQuickCode(current_method, it->pc)) {
         const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
         os << JniLongName(current_method) << "+"
            << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
@@ -1163,7 +1182,7 @@
     }
   }
 #else
-  UNUSED(os, tid, prefix, current_method, ucontext_ptr);
+  UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
 #endif
 }
 
@@ -1816,4 +1835,43 @@
   os << "Something went wrong, didn't find the method in the class data.";
 }
 
+static void ParseStringAfterChar(const std::string& s,
+                                 char c,
+                                 std::string* parsed_value,
+                                 UsageFn Usage) {
+  std::string::size_type colon = s.find(c);
+  if (colon == std::string::npos) {
+    Usage("Missing char %c in option %s\n", c, s.c_str());
+  }
+  // Add one to remove the char we were trimming until.
+  *parsed_value = s.substr(colon + 1);
+}
+
+void ParseDouble(const std::string& option,
+                 char after_char,
+                 double min,
+                 double max,
+                 double* parsed_value,
+                 UsageFn Usage) {
+  std::string substring;
+  ParseStringAfterChar(option, after_char, &substring, Usage);
+  bool sane_val = true;
+  double value;
+  if ((false)) {
+    // TODO: this doesn't seem to work on the emulator.  b/15114595
+    std::stringstream iss(substring);
+    iss >> value;
+    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
+    sane_val = iss.eof() && (value >= min) && (value <= max);
+  } else {
+    char* end = nullptr;
+    value = strtod(substring.c_str(), &end);
+    sane_val = *end == '\0' && value >= min && value <= max;
+  }
+  if (!sane_val) {
+    Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
+  }
+  *parsed_value = value;
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 3e61824..3690f86 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -26,11 +26,15 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "base/stringpiece.h"
 #include "globals.h"
 #include "primitive.h"
 
+class BacktraceMap;
+
 namespace art {
 
 class ArtField;
@@ -220,12 +224,19 @@
 void SetThreadName(const char* thread_name);
 
 // Dumps the native stack for thread 'tid' to 'os'.
-void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix = "",
-    ArtMethod* current_method = nullptr, void* ucontext = nullptr)
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* map = nullptr,
+                     const char* prefix = "",
+                     ArtMethod* current_method = nullptr,
+                     void* ucontext = nullptr)
     NO_THREAD_SAFETY_ANALYSIS;
 
 // Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
-void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix = "", bool include_count = true);
+void DumpKernelStack(std::ostream& os,
+                     pid_t tid,
+                     const char* prefix = "",
+                     bool include_count = true);
 
 // Find $ANDROID_ROOT, /system, or abort.
 const char* GetAndroidRoot();
@@ -271,23 +282,21 @@
 class VoidFunctor {
  public:
   template <typename A>
-  inline void operator() (A a) const {
-    UNUSED(a);
+  inline void operator() (A a ATTRIBUTE_UNUSED) const {
   }
 
   template <typename A, typename B>
-  inline void operator() (A a, B b) const {
-    UNUSED(a, b);
+  inline void operator() (A a ATTRIBUTE_UNUSED, B b ATTRIBUTE_UNUSED) const {
   }
 
   template <typename A, typename B, typename C>
-  inline void operator() (A a, B b, C c) const {
-    UNUSED(a, b, c);
+  inline void operator() (A a ATTRIBUTE_UNUSED, B b ATTRIBUTE_UNUSED, C c ATTRIBUTE_UNUSED) const {
   }
 };
 
-template <typename Alloc>
-void Push32(std::vector<uint8_t, Alloc>* buf, int32_t data) {
+template <typename Vector>
+void Push32(Vector* buf, int32_t data) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   buf->push_back(data & 0xff);
   buf->push_back((data >> 8) & 0xff);
   buf->push_back((data >> 16) & 0xff);
@@ -305,6 +314,42 @@
 void DumpMethodCFG(ArtMethod* method, std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_);
 void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os);
 
+static inline const void* EntryPointToCodePointer(const void* entry_point) {
+  uintptr_t code = reinterpret_cast<uintptr_t>(entry_point);
+  // TODO: Make this Thumb2 specific. It is benign on other architectures as code is always at
+  //       least 2 byte aligned.
+  code &= ~0x1;
+  return reinterpret_cast<const void*>(code);
+}
+
+using UsageFn = void (*)(const char*, ...);
+
+template <typename T>
+static void ParseUintOption(const StringPiece& option,
+                            const std::string& option_name,
+                            T* out,
+                            UsageFn Usage,
+                            bool is_long_option = true) {
+  std::string option_prefix = option_name + (is_long_option ? "=" : "");
+  DCHECK(option.starts_with(option_prefix));
+  const char* value_string = option.substr(option_prefix.size()).data();
+  int64_t parsed_integer_value = 0;
+  if (!ParseInt(value_string, &parsed_integer_value)) {
+    Usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
+  }
+  if (parsed_integer_value < 0) {
+    Usage("%s passed a negative value %d", option_name.c_str(), parsed_integer_value);
+  }
+  *out = dchecked_integral_cast<T>(parsed_integer_value);
+}
+
+void ParseDouble(const std::string& option,
+                 char after_char,
+                 double min,
+                 double max,
+                 double* parsed_value,
+                 UsageFn Usage);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index 4f662d5..90e24b9 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -27,20 +27,25 @@
 
 namespace art {
 
-inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file)
+inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size,
+                                                  const DexFile::Header& header)
     : pointer_size_(pointer_size),
       /* types_offset_ is always 0u, so it's constexpr */
       methods_offset_(types_offset_ +
-                      RoundUp(TypesSize(dex_file->NumTypeIds()), MethodsAlignment())),
+                      RoundUp(TypesSize(header.type_ids_size_), MethodsAlignment())),
       strings_offset_(methods_offset_ +
-                      RoundUp(MethodsSize(dex_file->NumMethodIds()), StringsAlignment())),
+                      RoundUp(MethodsSize(header.method_ids_size_), StringsAlignment())),
       fields_offset_(strings_offset_ +
-                     RoundUp(StringsSize(dex_file->NumStringIds()), FieldsAlignment())),
+                     RoundUp(StringsSize(header.string_ids_size_), FieldsAlignment())),
       size_(fields_offset_ +
-            RoundUp(FieldsSize(dex_file->NumFieldIds()), Alignment())) {
+            RoundUp(FieldsSize(header.field_ids_size_), Alignment())) {
   DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
 }
 
+inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file)
+    : DexCacheArraysLayout(pointer_size, dex_file->GetHeader()) {
+}
+
 inline size_t DexCacheArraysLayout::Alignment() const {
   // GcRoot<> alignment is 4, i.e. lower than or equal to the pointer alignment.
   static_assert(alignof(GcRoot<mirror::Class>) == 4, "Expecting alignof(GcRoot<>) == 4");
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index d50be5a..cd84460 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
 #define ART_RUNTIME_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
 
+#include "dex_file.h"
+
 namespace art {
 
 /**
@@ -36,6 +38,9 @@
         size_(0u) {
   }
 
+  // Construct a layout for a particular dex file header.
+  DexCacheArraysLayout(size_t pointer_size, const DexFile::Header& header);
+
   // Construct a layout for a particular dex file.
   DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file);
 
diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h
index 2d9fd53..f52d011 100644
--- a/runtime/verifier/method_verifier-inl.h
+++ b/runtime/verifier/method_verifier-inl.h
@@ -38,6 +38,10 @@
   return insn_flags_[index];
 }
 
+inline InstructionFlags& MethodVerifier::GetInstructionFlags(size_t index) {
+  return insn_flags_[index];
+}
+
 inline mirror::ClassLoader* MethodVerifier::GetClassLoader() {
   return class_loader_.Get();
 }
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index eed3e22..2db79ab 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -30,6 +30,7 @@
 #include "dex_instruction-inl.h"
 #include "dex_instruction_utils.h"
 #include "dex_instruction_visitor.h"
+#include "experimental_flags.h"
 #include "gc/accounting/card_table-inl.h"
 #include "indenter.h"
 #include "intern_table.h"
@@ -57,12 +58,14 @@
 // On VLOG(verifier), should we dump the whole state when we run into a hard failure?
 static constexpr bool kDumpRegLinesOnHardFailureIfVLOG = true;
 
+PcToRegisterLineTable::PcToRegisterLineTable(ScopedArenaAllocator& arena)
+    : register_lines_(arena.Adapter(kArenaAllocVerifier)) {}
+
 void PcToRegisterLineTable::Init(RegisterTrackingMode mode, InstructionFlags* flags,
                                  uint32_t insns_size, uint16_t registers_size,
                                  MethodVerifier* verifier) {
   DCHECK_GT(insns_size, 0U);
-  register_lines_.reset(new RegisterLine*[insns_size]());
-  size_ = insns_size;
+  register_lines_.resize(insns_size);
   for (uint32_t i = 0; i < insns_size; i++) {
     bool interesting = false;
     switch (mode) {
@@ -79,19 +82,12 @@
         break;
     }
     if (interesting) {
-      register_lines_[i] = RegisterLine::Create(registers_size, verifier);
+      register_lines_[i].reset(RegisterLine::Create(registers_size, verifier));
     }
   }
 }
 
-PcToRegisterLineTable::~PcToRegisterLineTable() {
-  for (size_t i = 0; i < size_; i++) {
-    delete register_lines_[i];
-    if (kIsDebugBuild) {
-      register_lines_[i] = nullptr;
-    }
-  }
-}
+PcToRegisterLineTable::~PcToRegisterLineTable() {}
 
 // Note: returns true on failure.
 ALWAYS_INLINE static inline bool FailOrAbort(MethodVerifier* verifier, bool condition,
@@ -397,7 +393,10 @@
                                bool need_precise_constants, bool verify_to_dump,
                                bool allow_thread_suspension)
     : self_(self),
-      reg_types_(can_load_classes),
+      arena_stack_(Runtime::Current()->GetArenaPool()),
+      arena_(&arena_stack_),
+      reg_types_(can_load_classes, arena_),
+      reg_table_(arena_),
       work_insn_idx_(DexFile::kDexNoIndex),
       dex_method_idx_(dex_method_idx),
       mirror_method_(method),
@@ -560,6 +559,7 @@
 bool MethodVerifier::Verify() {
   // Some older code doesn't correctly mark constructors as such. Test for this case by looking at
   // the name.
+  Runtime* runtime = Runtime::Current();
   const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
   const char* method_name = dex_file_->StringDataByIdx(method_id.name_idx_);
   bool instance_constructor_by_name = strcmp("<init>", method_name) == 0;
@@ -628,9 +628,13 @@
         }
       }
       if ((class_def_->GetJavaAccessFlags() & kAccInterface) != 0) {
-        // Interface methods must be public and abstract.
-        if ((method_access_flags_ & (kAccPublic | kAccAbstract)) != (kAccPublic | kAccAbstract)) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public and abstract";
+        // Interface methods must be public and abstract (if default methods are disabled).
+        bool default_methods_supported =
+            runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
+        uint32_t kRequired = kAccPublic | (default_methods_supported ? 0 : kAccAbstract);
+        if ((method_access_flags_ & kRequired) != kRequired) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public"
+                                            << (default_methods_supported ? "" : " and abstract");
           return false;
         }
         // In addition to the above, interface methods must not be protected.
@@ -657,12 +661,26 @@
       return false;
     }
 
-    // Only the static initializer may have code in an interface.
     if ((class_def_->GetJavaAccessFlags() & kAccInterface) != 0) {
-      // Interfaces may have static initializers for their fields.
-      if (!IsConstructor() || !IsStatic()) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be abstract";
-        return false;
+      // Interfaces may always have static initializers for their fields. If we are running with
+      // default methods enabled we also allow other public, static, non-final methods to have code.
+      // Otherwise that is the only type of method allowed.
+      if (!(IsConstructor() && IsStatic())) {
+        if (runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods)) {
+          if (IsInstanceConstructor()) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-static constructor";
+            return false;
+          } else if (method_access_flags_ & kAccFinal) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have final methods";
+            return false;
+          } else if (!(method_access_flags_ & kAccPublic)) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-public members";
+            return false;
+          }
+        } else {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be abstract";
+          return false;
+        }
       }
     }
 
@@ -682,8 +700,13 @@
                                       << " regs=" << code_item_->registers_size_;
     return false;
   }
+
   // Allocate and initialize an array to hold instruction data.
-  insn_flags_.reset(new InstructionFlags[code_item_->insns_size_in_code_units_]());
+  insn_flags_.reset(arena_.AllocArray<InstructionFlags>(code_item_->insns_size_in_code_units_));
+  DCHECK(insn_flags_ != nullptr);
+  std::uninitialized_fill_n(insn_flags_.get(),
+                            code_item_->insns_size_in_code_units_,
+                            InstructionFlags());
   // Run through the instructions and see if the width checks out.
   bool result = ComputeWidthsAndCountOps();
   // Flag instructions guarded by a "try" block and check exception handlers.
@@ -693,8 +716,8 @@
   // Perform code-flow analysis and return.
   result = result && VerifyCodeFlow();
   // Compute information for compiler.
-  if (result && Runtime::Current()->IsCompiler()) {
-    result = Runtime::Current()->GetCompilerCallbacks()->MethodVerified(this);
+  if (result && runtime->IsCompiler()) {
+    result = runtime->GetCompilerCallbacks()->MethodVerified(this);
   }
   return result;
 }
@@ -829,7 +852,7 @@
         break;
     }
     size_t inst_size = inst->SizeInCodeUnits();
-    insn_flags_[dex_pc].SetIsOpcode();
+    GetInstructionFlags(dex_pc).SetIsOpcode();
     dex_pc += inst_size;
     inst = inst->RelativeAt(inst_size);
   }
@@ -862,7 +885,7 @@
                                         << " endAddr=" << end << " (size=" << insns_size << ")";
       return false;
     }
-    if (!insn_flags_[start].IsOpcode()) {
+    if (!GetInstructionFlags(start).IsOpcode()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD)
           << "'try' block starts inside an instruction (" << start << ")";
       return false;
@@ -870,7 +893,7 @@
     uint32_t dex_pc = start;
     const Instruction* inst = Instruction::At(code_item_->insns_ + dex_pc);
     while (dex_pc < end) {
-      insn_flags_[dex_pc].SetInTry();
+      GetInstructionFlags(dex_pc).SetInTry();
       size_t insn_size = inst->SizeInCodeUnits();
       dex_pc += insn_size;
       inst = inst->RelativeAt(insn_size);
@@ -884,7 +907,7 @@
     CatchHandlerIterator iterator(handlers_ptr);
     for (; iterator.HasNext(); iterator.Next()) {
       uint32_t dex_pc= iterator.GetHandlerAddress();
-      if (!insn_flags_[dex_pc].IsOpcode()) {
+      if (!GetInstructionFlags(dex_pc).IsOpcode()) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD)
             << "exception handler starts at bad address (" << dex_pc << ")";
         return false;
@@ -894,7 +917,7 @@
             << "exception handler begins with move-result* (" << dex_pc << ")";
         return false;
       }
-      insn_flags_[dex_pc].SetBranchTarget();
+      GetInstructionFlags(dex_pc).SetBranchTarget();
       // Ensure exception types are resolved so that they don't need resolution to be delivered,
       // unresolved exception types will be ignored by exception delivery
       if (iterator.GetHandlerTypeIndex() != DexFile::kDexNoIndex16) {
@@ -916,8 +939,8 @@
   const Instruction* inst = Instruction::At(code_item_->insns_);
 
   /* Flag the start of the method as a branch target, and a GC point due to stack overflow errors */
-  insn_flags_[0].SetBranchTarget();
-  insn_flags_[0].SetCompileTimeInfoPoint();
+  GetInstructionFlags(0).SetBranchTarget();
+  GetInstructionFlags(0).SetCompileTimeInfoPoint();
 
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
   for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
@@ -929,18 +952,18 @@
     // All invoke points are marked as "Throw" points already.
     // We are relying on this to also count all the invokes as interesting.
     if (inst->IsBranch()) {
-      insn_flags_[dex_pc].SetCompileTimeInfoPoint();
+      GetInstructionFlags(dex_pc).SetCompileTimeInfoPoint();
       // The compiler also needs safepoints for fall-through to loop heads.
       // Such a loop head must be a target of a branch.
       int32_t offset = 0;
       bool cond, self_ok;
       bool target_ok = GetBranchOffset(dex_pc, &offset, &cond, &self_ok);
       DCHECK(target_ok);
-      insn_flags_[dex_pc + offset].SetCompileTimeInfoPoint();
+      GetInstructionFlags(dex_pc + offset).SetCompileTimeInfoPoint();
     } else if (inst->IsSwitch() || inst->IsThrow()) {
-      insn_flags_[dex_pc].SetCompileTimeInfoPoint();
+      GetInstructionFlags(dex_pc).SetCompileTimeInfoPoint();
     } else if (inst->IsReturn()) {
-      insn_flags_[dex_pc].SetCompileTimeInfoPointAndReturn();
+      GetInstructionFlags(dex_pc).SetCompileTimeInfoPointAndReturn();
     }
     dex_pc += inst->SizeInCodeUnits();
     inst = inst->Next();
@@ -1183,7 +1206,7 @@
   }
   // Make sure the array-data is marked as an opcode. This ensures that it was reached when
   // traversing the code item linearly. It is an approximation for a by-spec padding value.
-  if (!insn_flags_[cur_offset + array_data_offset].IsOpcode()) {
+  if (!GetInstructionFlags(cur_offset + array_data_offset).IsOpcode()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "array data table at " << cur_offset
                                       << ", data offset " << array_data_offset
                                       << " not correctly visited, probably bad padding.";
@@ -1226,13 +1249,13 @@
   int32_t abs_offset = cur_offset + offset;
   if (abs_offset < 0 ||
       (uint32_t) abs_offset >= insn_count ||
-      !insn_flags_[abs_offset].IsOpcode()) {
+      !GetInstructionFlags(abs_offset).IsOpcode()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid branch target " << offset << " (-> "
                                       << reinterpret_cast<void*>(abs_offset) << ") at "
                                       << reinterpret_cast<void*>(cur_offset);
     return false;
   }
-  insn_flags_[abs_offset].SetBranchTarget();
+  GetInstructionFlags(abs_offset).SetBranchTarget();
   return true;
 }
 
@@ -1296,7 +1319,7 @@
   }
   // Make sure the switch data is marked as an opcode. This ensures that it was reached when
   // traversing the code item linearly. It is an approximation for a by-spec padding value.
-  if (!insn_flags_[cur_offset + switch_offset].IsOpcode()) {
+  if (!GetInstructionFlags(cur_offset + switch_offset).IsOpcode()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "switch table at " << cur_offset
                                       << ", switch offset " << switch_offset
                                       << " not correctly visited, probably bad padding.";
@@ -1368,14 +1391,14 @@
     int32_t abs_offset = cur_offset + offset;
     if (abs_offset < 0 ||
         abs_offset >= static_cast<int32_t>(insn_count) ||
-        !insn_flags_[abs_offset].IsOpcode()) {
+        !GetInstructionFlags(abs_offset).IsOpcode()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid switch target " << offset
                                         << " (-> " << reinterpret_cast<void*>(abs_offset) << ") at "
                                         << reinterpret_cast<void*>(cur_offset)
                                         << "[" << targ << "]";
       return false;
     }
-    insn_flags_[abs_offset].SetBranchTarget();
+    GetInstructionFlags(abs_offset).SetBranchTarget();
   }
   return true;
 }
@@ -1416,7 +1439,6 @@
                   registers_size,
                   this);
 
-
   work_line_.reset(RegisterLine::Create(registers_size, this));
   saved_line_.reset(RegisterLine::Create(registers_size, this));
 
@@ -1472,7 +1494,7 @@
       vios->Stream() << reg_line->Dump(this) << "\n";
     }
     vios->Stream()
-        << StringPrintf("0x%04zx", dex_pc) << ": " << insn_flags_[dex_pc].ToString() << " ";
+        << StringPrintf("0x%04zx", dex_pc) << ": " << GetInstructionFlags(dex_pc).ToString() << " ";
     const bool kDumpHexOfInstruction = false;
     if (kDumpHexOfInstruction) {
       vios->Stream() << inst->DumpHex(5) << " ";
@@ -1658,7 +1680,7 @@
   const uint32_t insns_size = code_item_->insns_size_in_code_units_;
 
   /* Begin by marking the first instruction as "changed". */
-  insn_flags_[0].SetChanged();
+  GetInstructionFlags(0).SetChanged();
   uint32_t start_guess = 0;
 
   /* Continue until no instructions are marked "changed". */
@@ -1669,7 +1691,7 @@
     // Find the first marked one. Use "start_guess" as a way to find one quickly.
     uint32_t insn_idx = start_guess;
     for (; insn_idx < insns_size; insn_idx++) {
-      if (insn_flags_[insn_idx].IsChanged())
+      if (GetInstructionFlags(insn_idx).IsChanged())
         break;
     }
     if (insn_idx == insns_size) {
@@ -1689,7 +1711,7 @@
     // situation where we have a stray "changed" flag set on an instruction that isn't a branch
     // target.
     work_insn_idx_ = insn_idx;
-    if (insn_flags_[insn_idx].IsBranchTarget()) {
+    if (GetInstructionFlags(insn_idx).IsBranchTarget()) {
       work_line_->CopyFromLine(reg_table_.GetLine(insn_idx));
     } else if (kIsDebugBuild) {
       /*
@@ -1715,8 +1737,8 @@
       return false;
     }
     /* Clear "changed" and mark as visited. */
-    insn_flags_[insn_idx].SetVisited();
-    insn_flags_[insn_idx].ClearChanged();
+    GetInstructionFlags(insn_idx).SetVisited();
+    GetInstructionFlags(insn_idx).ClearChanged();
   }
 
   if (gDebugVerify) {
@@ -1743,10 +1765,10 @@
            (insns[insn_idx + 1] == Instruction::kPackedSwitchSignature ||
             insns[insn_idx + 1] == Instruction::kSparseSwitchSignature ||
             insns[insn_idx + 1] == Instruction::kArrayDataSignature))) {
-        insn_flags_[insn_idx].SetVisited();
+        GetInstructionFlags(insn_idx).SetVisited();
       }
 
-      if (!insn_flags_[insn_idx].IsVisited()) {
+      if (!GetInstructionFlags(insn_idx).IsVisited()) {
         if (dead_start < 0)
           dead_start = insn_idx;
       } else if (dead_start >= 0) {
@@ -1876,8 +1898,8 @@
   // We need to ensure the work line is consistent while performing validation. When we spot a
   // peephole pattern we compute a new line for either the fallthrough instruction or the
   // branch target.
-  std::unique_ptr<RegisterLine> branch_line;
-  std::unique_ptr<RegisterLine> fallthrough_line;
+  ArenaUniquePtr<RegisterLine> branch_line;
+  ArenaUniquePtr<RegisterLine> fallthrough_line;
 
   switch (inst->Opcode()) {
     case Instruction::NOP:
@@ -2125,9 +2147,9 @@
       work_line_->PushMonitor(this, inst->VRegA_11x(), work_insn_idx_);
       // Check whether the previous instruction is a move-object with vAA as a source, creating
       // untracked lock aliasing.
-      if (0 != work_insn_idx_ && !insn_flags_[work_insn_idx_].IsBranchTarget()) {
+      if (0 != work_insn_idx_ && !GetInstructionFlags(work_insn_idx_).IsBranchTarget()) {
         uint32_t prev_idx = work_insn_idx_ - 1;
-        while (0 != prev_idx && !insn_flags_[prev_idx].IsOpcode()) {
+        while (0 != prev_idx && !GetInstructionFlags(prev_idx).IsOpcode()) {
           prev_idx--;
         }
         const Instruction* prev_inst = Instruction::At(code_item_->insns_ + prev_idx);
@@ -2408,10 +2430,10 @@
       uint32_t instance_of_idx = 0;
       if (0 != work_insn_idx_) {
         instance_of_idx = work_insn_idx_ - 1;
-        while (0 != instance_of_idx && !insn_flags_[instance_of_idx].IsOpcode()) {
+        while (0 != instance_of_idx && !GetInstructionFlags(instance_of_idx).IsOpcode()) {
           instance_of_idx--;
         }
-        if (FailOrAbort(this, insn_flags_[instance_of_idx].IsOpcode(),
+        if (FailOrAbort(this, GetInstructionFlags(instance_of_idx).IsOpcode(),
                         "Unable to get previous instruction of if-eqz/if-nez for work index ",
                         work_insn_idx_)) {
           break;
@@ -2467,15 +2489,15 @@
           update_line->SetRegisterType<LockOp::kKeep>(this,
                                                       instance_of_inst->VRegB_22c(),
                                                       cast_type);
-          if (!insn_flags_[instance_of_idx].IsBranchTarget() && 0 != instance_of_idx) {
+          if (!GetInstructionFlags(instance_of_idx).IsBranchTarget() && 0 != instance_of_idx) {
             // See if instance-of was preceded by a move-object operation, common due to the small
             // register encoding space of instance-of, and propagate type information to the source
             // of the move-object.
             uint32_t move_idx = instance_of_idx - 1;
-            while (0 != move_idx && !insn_flags_[move_idx].IsOpcode()) {
+            while (0 != move_idx && !GetInstructionFlags(move_idx).IsOpcode()) {
               move_idx--;
             }
-            if (FailOrAbort(this, insn_flags_[move_idx].IsOpcode(),
+            if (FailOrAbort(this, GetInstructionFlags(move_idx).IsOpcode(),
                             "Unable to get previous instruction of if-eqz/if-nez for work index ",
                             work_insn_idx_)) {
               break;
@@ -2767,8 +2789,7 @@
         work_line_->MarkRefsAsInitialized(this, this_type, this_reg, work_insn_idx_);
       }
       if (return_type == nullptr) {
-        return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor,
-                                                 false);
+        return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor, false);
       }
       if (!return_type->IsLowHalf()) {
         work_line_->SetResultRegisterType(this, *return_type);
@@ -2841,7 +2862,7 @@
         uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
-        descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
+        descriptor = dex_file_->StringByTypeIdx(return_type_idx);
       } else {
         descriptor = abs_method->GetReturnTypeDescriptor();
       }
@@ -3290,7 +3311,7 @@
       return false;
     }
     /* update branch target, set "changed" if appropriate */
-    if (nullptr != branch_line.get()) {
+    if (nullptr != branch_line) {
       if (!UpdateRegisters(work_insn_idx_ + branch_target, branch_line.get(), false)) {
         return false;
       }
@@ -3345,7 +3366,7 @@
    * Handle instructions that can throw and that are sitting in a "try" block. (If they're not in a
    * "try" block when they throw, control transfers out of the method.)
    */
-  if ((opcode_flags & Instruction::kThrow) != 0 && insn_flags_[work_insn_idx_].IsInTry()) {
+  if ((opcode_flags & Instruction::kThrow) != 0 && GetInstructionFlags(work_insn_idx_).IsInTry()) {
     bool has_catch_all_handler = false;
     CatchHandlerIterator iterator(*code_item_, work_insn_idx_);
 
@@ -3415,11 +3436,11 @@
     if (!CheckNotMoveException(code_item_->insns_, next_insn_idx)) {
       return false;
     }
-    if (nullptr != fallthrough_line.get()) {
+    if (nullptr != fallthrough_line) {
       // Make workline consistent with fallthrough computed from peephole optimization.
       work_line_->CopyFromLine(fallthrough_line.get());
     }
-    if (insn_flags_[next_insn_idx].IsReturn()) {
+    if (GetInstructionFlags(next_insn_idx).IsReturn()) {
       // For returns we only care about the operand to the return, all other registers are dead.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn_idx);
       AdjustReturnLine(this, ret_inst, work_line_.get());
@@ -3437,7 +3458,7 @@
        * We're not recording register data for the next instruction, so we don't know what the
        * prior state was. We have to assume that something has changed and re-evaluate it.
        */
-      insn_flags_[next_insn_idx].SetChanged();
+      GetInstructionFlags(next_insn_idx).SetChanged();
     }
   }
 
@@ -3461,7 +3482,7 @@
   }
 
   DCHECK_LT(*start_guess, code_item_->insns_size_in_code_units_);
-  DCHECK(insn_flags_[*start_guess].IsOpcode());
+  DCHECK(GetInstructionFlags(*start_guess).IsOpcode());
 
   if (have_pending_runtime_throw_failure_) {
     have_any_pending_runtime_throw_failure_ = true;
@@ -3472,30 +3493,55 @@
   return true;
 }  // NOLINT(readability/fn_size)
 
+void MethodVerifier::UninstantiableError(const char* descriptor) {
+  Fail(VerifyError::VERIFY_ERROR_NO_CLASS) << "Could not create precise reference for "
+                                           << "non-instantiable klass " << descriptor;
+}
+
+inline bool MethodVerifier::IsInstantiableOrPrimitive(mirror::Class* klass) {
+  return klass->IsInstantiable() || klass->IsPrimitive();
+}
+
 const RegType& MethodVerifier::ResolveClassAndCheckAccess(uint32_t class_idx) {
-  const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
-  const RegType& referrer = GetDeclaringClass();
   mirror::Class* klass = dex_cache_->GetResolvedType(class_idx);
-  const RegType& result = klass != nullptr ?
-      FromClass(descriptor, klass, klass->CannotBeAssignedFromOtherTypes()) :
-      reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
-  if (result.IsConflict()) {
-    Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "accessing broken descriptor '" << descriptor
-        << "' in " << referrer;
-    return result;
+  const RegType* result = nullptr;
+  if (klass != nullptr) {
+    bool precise = klass->CannotBeAssignedFromOtherTypes();
+    if (precise && !IsInstantiableOrPrimitive(klass)) {
+      const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
+      UninstantiableError(descriptor);
+      precise = false;
+    }
+    result = reg_types_.FindClass(klass, precise);
+    if (result == nullptr) {
+      const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
+      result = reg_types_.InsertClass(descriptor, klass, precise);
+    }
+  } else {
+    const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
+    result = &reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
   }
-  if (klass == nullptr && !result.IsUnresolvedTypes()) {
-    dex_cache_->SetResolvedType(class_idx, result.GetClass());
+  DCHECK(result != nullptr);
+  if (result->IsConflict()) {
+    const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
+    Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "accessing broken descriptor '" << descriptor
+        << "' in " << GetDeclaringClass();
+    return *result;
+  }
+  if (klass == nullptr && !result->IsUnresolvedTypes()) {
+    dex_cache_->SetResolvedType(class_idx, result->GetClass());
   }
   // Check if access is allowed. Unresolved types use xxxWithAccessCheck to
   // check at runtime if access is allowed and so pass here. If result is
   // primitive, skip the access check.
-  if (result.IsNonZeroReferenceTypes() && !result.IsUnresolvedTypes() &&
-      !referrer.IsUnresolvedTypes() && !referrer.CanAccess(result)) {
-    Fail(VERIFY_ERROR_ACCESS_CLASS) << "illegal class access: '"
-                                    << referrer << "' -> '" << result << "'";
+  if (result->IsNonZeroReferenceTypes() && !result->IsUnresolvedTypes()) {
+    const RegType& referrer = GetDeclaringClass();
+    if (!referrer.IsUnresolvedTypes() && !referrer.CanAccess(*result)) {
+      Fail(VERIFY_ERROR_ACCESS_CLASS) << "illegal class access: '"
+                                      << referrer << "' -> '" << result << "'";
+    }
   }
-  return result;
+  return *result;
 }
 
 const RegType& MethodVerifier::GetCaughtExceptionType() {
@@ -3618,8 +3664,15 @@
                                       << PrettyMethod(res_method);
     return nullptr;
   }
-  // Check that interface methods match interface classes.
-  if (klass->IsInterface() && method_type != METHOD_INTERFACE) {
+  // Check that interface methods are static or match interface classes.
+  // We only allow statics if we don't have default methods enabled.
+  Runtime* runtime = Runtime::Current();
+  const bool default_methods_supported =
+      runtime == nullptr ||
+      runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
+  if (klass->IsInterface() &&
+      method_type != METHOD_INTERFACE &&
+      (!default_methods_supported || method_type != METHOD_STATIC)) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "non-interface method " << PrettyMethod(res_method)
                                     << " is in an interface class " << PrettyClass(klass);
     return nullptr;
@@ -3701,9 +3754,10 @@
       } else {
         const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
         const uint16_t class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
-        res_method_class = &reg_types_.FromDescriptor(GetClassLoader(),
-                                                      dex_file_->StringByTypeIdx(class_idx),
-                                                      false);
+        res_method_class = &reg_types_.FromDescriptor(
+            GetClassLoader(),
+            dex_file_->StringByTypeIdx(class_idx),
+            false);
       }
       if (!res_method_class->IsAssignableFrom(actual_arg_type)) {
         Fail(actual_arg_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS:
@@ -4457,14 +4511,16 @@
         field->GetType<false>();
 
     if (field_type_class != nullptr) {
-      field_type = &FromClass(field->GetTypeDescriptor(), field_type_class,
+      field_type = &FromClass(field->GetTypeDescriptor(),
+                              field_type_class,
                               field_type_class->CannotBeAssignedFromOtherTypes());
     } else {
       Thread* self = Thread::Current();
       DCHECK(!can_load_classes_ || self->IsExceptionPending());
       self->ClearException();
       field_type = &reg_types_.FromDescriptor(field->GetDeclaringClass()->GetClassLoader(),
-                                              field->GetTypeDescriptor(), false);
+                                              field->GetTypeDescriptor(),
+                                              false);
     }
     if (field_type == nullptr) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field type from " << inst->Name();
@@ -4585,14 +4641,14 @@
                                      bool update_merge_line) {
   bool changed = true;
   RegisterLine* target_line = reg_table_.GetLine(next_insn);
-  if (!insn_flags_[next_insn].IsVisitedOrChanged()) {
+  if (!GetInstructionFlags(next_insn).IsVisitedOrChanged()) {
     /*
      * We haven't processed this instruction before, and we haven't touched the registers here, so
      * there's nothing to "merge". Copy the registers over and mark it as changed. (This is the
      * only way a register can transition out of "unknown", so this is not just an optimization.)
      */
     target_line->CopyFromLine(merge_line);
-    if (insn_flags_[next_insn].IsReturn()) {
+    if (GetInstructionFlags(next_insn).IsReturn()) {
       // Verify that the monitor stack is empty on return.
       merge_line->VerifyMonitorStackEmpty(this);
 
@@ -4602,10 +4658,9 @@
       AdjustReturnLine(this, ret_inst, target_line);
     }
   } else {
-    std::unique_ptr<RegisterLine> copy(gDebugVerify ?
-                                 RegisterLine::Create(target_line->NumRegs(), this) :
-                                 nullptr);
+    ArenaUniquePtr<RegisterLine> copy;
     if (gDebugVerify) {
+      copy.reset(RegisterLine::Create(target_line->NumRegs(), this));
       copy->CopyFromLine(target_line);
     }
     changed = target_line->MergeRegisters(this, merge_line);
@@ -4624,13 +4679,13 @@
     }
   }
   if (changed) {
-    insn_flags_[next_insn].SetChanged();
+    GetInstructionFlags(next_insn).SetChanged();
   }
   return true;
 }
 
 InstructionFlags* MethodVerifier::CurrentInsnFlags() {
-  return &insn_flags_[work_insn_idx_];
+  return &GetInstructionFlags(work_insn_idx_);
 }
 
 const RegType& MethodVerifier::GetMethodReturnType() {
@@ -4666,8 +4721,7 @@
         = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(method_id.class_idx_));
     if (mirror_method_ != nullptr) {
       mirror::Class* klass = mirror_method_->GetDeclaringClass();
-      declaring_class_ = &FromClass(descriptor, klass,
-                                    klass->CannotBeAssignedFromOtherTypes());
+      declaring_class_ = &FromClass(descriptor, klass, klass->CannotBeAssignedFromOtherTypes());
     } else {
       declaring_class_ = &reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
     }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index ba694b7..7b51d6e 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -21,7 +21,10 @@
 #include <sstream>
 #include <vector>
 
+#include "base/arena_allocator.h"
 #include "base/macros.h"
+#include "base/scoped_arena_containers.h"
+#include "base/stl_util.h"
 #include "dex_file.h"
 #include "handle.h"
 #include "instruction_flags.h"
@@ -107,7 +110,7 @@
 // execution of that instruction.
 class PcToRegisterLineTable {
  public:
-  PcToRegisterLineTable() : size_(0) {}
+  explicit PcToRegisterLineTable(ScopedArenaAllocator& arena);
   ~PcToRegisterLineTable();
 
   // Initialize the RegisterTable. Every instruction address can have a different set of information
@@ -116,14 +119,12 @@
   void Init(RegisterTrackingMode mode, InstructionFlags* flags, uint32_t insns_size,
             uint16_t registers_size, MethodVerifier* verifier);
 
-  RegisterLine* GetLine(size_t idx) {
-    DCHECK_LT(idx, size_);
-    return register_lines_[idx];
+  RegisterLine* GetLine(size_t idx) const {
+    return register_lines_[idx].get();
   }
 
  private:
-  std::unique_ptr<RegisterLine*[]> register_lines_;
-  size_t size_;
+  ScopedArenaVector<ArenaUniquePtr<RegisterLine>> register_lines_;
 
   DISALLOW_COPY_AND_ASSIGN(PcToRegisterLineTable);
 };
@@ -240,7 +241,8 @@
   // Accessors used by the compiler via CompilerCallback
   const DexFile::CodeItem* CodeItem() const;
   RegisterLine* GetRegLine(uint32_t dex_pc);
-  const InstructionFlags& GetInstructionFlags(size_t index) const;
+  ALWAYS_INLINE const InstructionFlags& GetInstructionFlags(size_t index) const;
+  ALWAYS_INLINE InstructionFlags& GetInstructionFlags(size_t index);
   mirror::ClassLoader* GetClassLoader() SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::DexCache* GetDexCache() SHARED_REQUIRES(Locks::mutator_lock_);
   MethodReference GetMethodReference() const;
@@ -275,7 +277,14 @@
     return IsConstructor() && !IsStatic();
   }
 
+  ScopedArenaAllocator& GetArena() {
+    return arena_;
+  }
+
  private:
+  void UninstantiableError(const char* descriptor);
+  static bool IsInstantiableOrPrimitive(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Is the method being verified a constructor? See the comment on the field.
   bool IsConstructor() const {
     return is_constructor_;
@@ -687,19 +696,23 @@
   // The thread we're verifying on.
   Thread* const self_;
 
+  // Arena allocator.
+  ArenaStack arena_stack_;
+  ScopedArenaAllocator arena_;
+
   RegTypeCache reg_types_;
 
   PcToRegisterLineTable reg_table_;
 
   // Storage for the register status we're currently working on.
-  std::unique_ptr<RegisterLine> work_line_;
+  ArenaUniquePtr<RegisterLine> work_line_;
 
   // The address of the instruction we're currently working on, note that this is in 2 byte
   // quantities
   uint32_t work_insn_idx_;
 
   // Storage for the register status we're saving for later.
-  std::unique_ptr<RegisterLine> saved_line_;
+  ArenaUniquePtr<RegisterLine> saved_line_;
 
   const uint32_t dex_method_idx_;  // The method we're working on.
   // Its object representation if known.
@@ -715,7 +728,8 @@
   const DexFile::CodeItem* const code_item_;  // The code item containing the code for the method.
   const RegType* declaring_class_;  // Lazily computed reg type of the method's declaring class.
   // Instruction widths and flags, one entry per code unit.
-  std::unique_ptr<InstructionFlags[]> insn_flags_;
+  // Owned, but not unique_ptr since insn_flags_ are allocated in arenas.
+  ArenaUniquePtr<InstructionFlags[]> insn_flags_;
   // The dex PC of a FindLocksAtDexPc request, -1 otherwise.
   uint32_t interesting_dex_pc_;
   // The container into which FindLocksAtDexPc should write the registers containing held locks,
diff --git a/runtime/verifier/reg_type-inl.h b/runtime/verifier/reg_type-inl.h
index f445132..11a53e5 100644
--- a/runtime/verifier/reg_type-inl.h
+++ b/runtime/verifier/reg_type-inl.h
@@ -20,6 +20,7 @@
 #include "reg_type.h"
 
 #include "base/casts.h"
+#include "base/scoped_arena_allocator.h"
 #include "mirror/class.h"
 
 namespace art {
@@ -180,6 +181,10 @@
   return instance_;
 }
 
+inline void* RegType::operator new(size_t size, ScopedArenaAllocator* arena) {
+  return arena->Alloc(size, kArenaAllocMisc);
+}
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index b86a4c8..16cab03 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -16,6 +16,7 @@
 
 #include "reg_type-inl.h"
 
+#include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/casts.h"
 #include "class_linker-inl.h"
@@ -46,20 +47,17 @@
 const DoubleHiType* DoubleHiType::instance_ = nullptr;
 const IntegerType* IntegerType::instance_ = nullptr;
 
-PrimitiveType::PrimitiveType(mirror::Class* klass, const std::string& descriptor, uint16_t cache_id)
-    SHARED_REQUIRES(Locks::mutator_lock_)
+PrimitiveType::PrimitiveType(mirror::Class* klass, const StringPiece& descriptor, uint16_t cache_id)
     : RegType(klass, descriptor, cache_id) {
   CHECK(klass != nullptr);
   CHECK(!descriptor.empty());
 }
 
-Cat1Type::Cat1Type(mirror::Class* klass, const std::string& descriptor, uint16_t cache_id)
-    SHARED_REQUIRES(Locks::mutator_lock_)
+Cat1Type::Cat1Type(mirror::Class* klass, const StringPiece& descriptor, uint16_t cache_id)
     : PrimitiveType(klass, descriptor, cache_id) {
 }
 
-Cat2Type::Cat2Type(mirror::Class* klass, const std::string& descriptor, uint16_t cache_id)
-    SHARED_REQUIRES(Locks::mutator_lock_)
+Cat2Type::Cat2Type(mirror::Class* klass, const StringPiece& descriptor, uint16_t cache_id)
     : PrimitiveType(klass, descriptor, cache_id) {
 }
 
@@ -121,11 +119,11 @@
 }
 
 std::string IntegerType::Dump() const {
-    return "Integer";
+  return "Integer";
 }
 
 const DoubleHiType* DoubleHiType::CreateInstance(mirror::Class* klass,
-                                                 const std::string& descriptor,
+                                                 const StringPiece& descriptor,
                                                  uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new DoubleHiType(klass, descriptor, cache_id);
@@ -140,7 +138,7 @@
 }
 
 const DoubleLoType* DoubleLoType::CreateInstance(mirror::Class* klass,
-                                                 const std::string& descriptor,
+                                                 const StringPiece& descriptor,
                                                  uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new DoubleLoType(klass, descriptor, cache_id);
@@ -154,14 +152,14 @@
   }
 }
 
-const LongLoType* LongLoType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const LongLoType* LongLoType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                              uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new LongLoType(klass, descriptor, cache_id);
   return instance_;
 }
 
-const LongHiType* LongHiType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const LongHiType* LongHiType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                              uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new LongHiType(klass, descriptor, cache_id);
@@ -182,7 +180,7 @@
   }
 }
 
-const FloatType* FloatType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const FloatType* FloatType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                            uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new FloatType(klass, descriptor, cache_id);
@@ -196,7 +194,7 @@
   }
 }
 
-const CharType* CharType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const CharType* CharType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                          uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new CharType(klass, descriptor, cache_id);
@@ -210,7 +208,7 @@
   }
 }
 
-const ShortType* ShortType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const ShortType* ShortType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                            uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new ShortType(klass, descriptor, cache_id);
@@ -224,7 +222,7 @@
   }
 }
 
-const ByteType* ByteType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const ByteType* ByteType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                          uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new ByteType(klass, descriptor, cache_id);
@@ -238,7 +236,7 @@
   }
 }
 
-const IntegerType* IntegerType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const IntegerType* IntegerType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                                uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new IntegerType(klass, descriptor, cache_id);
@@ -253,7 +251,7 @@
 }
 
 const ConflictType* ConflictType::CreateInstance(mirror::Class* klass,
-                                                 const std::string& descriptor,
+                                                 const StringPiece& descriptor,
                                                  uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new ConflictType(klass, descriptor, cache_id);
@@ -267,7 +265,7 @@
   }
 }
 
-const BooleanType* BooleanType::CreateInstance(mirror::Class* klass, const std::string& descriptor,
+const BooleanType* BooleanType::CreateInstance(mirror::Class* klass, const StringPiece& descriptor,
                                          uint16_t cache_id) {
   CHECK(BooleanType::instance_ == nullptr);
   instance_ = new BooleanType(klass, descriptor, cache_id);
@@ -286,7 +284,7 @@
 }
 
 const UndefinedType* UndefinedType::CreateInstance(mirror::Class* klass,
-                                                   const std::string& descriptor,
+                                                   const StringPiece& descriptor,
                                                    uint16_t cache_id) {
   CHECK(instance_ == nullptr);
   instance_ = new UndefinedType(klass, descriptor, cache_id);
@@ -300,7 +298,7 @@
   }
 }
 
-PreciseReferenceType::PreciseReferenceType(mirror::Class* klass, const std::string& descriptor,
+PreciseReferenceType::PreciseReferenceType(mirror::Class* klass, const StringPiece& descriptor,
                                            uint16_t cache_id)
     : RegType(klass, descriptor, cache_id) {
   // Note: no check for IsInstantiable() here. We may produce this in case an InstantiationError
@@ -335,14 +333,14 @@
 
 std::string UnresolvedReferenceType::Dump() const {
   std::stringstream result;
-  result << "Unresolved Reference" << ": " << PrettyDescriptor(GetDescriptor().c_str());
+  result << "Unresolved Reference" << ": " << PrettyDescriptor(GetDescriptor().as_string().c_str());
   return result.str();
 }
 
 std::string UnresolvedUninitializedRefType::Dump() const {
   std::stringstream result;
   result << "Unresolved And Uninitialized Reference" << ": "
-      << PrettyDescriptor(GetDescriptor().c_str())
+      << PrettyDescriptor(GetDescriptor().as_string().c_str())
       << " Allocation PC: " << GetAllocationPc();
   return result.str();
 }
@@ -350,7 +348,7 @@
 std::string UnresolvedUninitializedThisRefType::Dump() const {
   std::stringstream result;
   result << "Unresolved And Uninitialized This Reference"
-      << PrettyDescriptor(GetDescriptor().c_str());
+      << PrettyDescriptor(GetDescriptor().as_string().c_str());
   return result.str();
 }
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 2834a9a..80b751c 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -22,9 +22,11 @@
 #include <set>
 #include <string>
 
+#include "base/arena_object.h"
 #include "base/bit_vector.h"
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "base/stringpiece.h"
 #include "gc_root.h"
 #include "handle_scope.h"
 #include "object_callbacks.h"
@@ -35,6 +37,9 @@
 class Class;
 }  // namespace mirror
 
+class ArenaBitVector;
+class ScopedArenaAllocator;
+
 namespace verifier {
 
 class RegTypeCache;
@@ -173,7 +178,7 @@
   bool IsJavaLangObjectArray() const
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsInstantiableTypes() const SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::string& GetDescriptor() const {
+  const StringPiece& GetDescriptor() const {
     DCHECK(HasClass() ||
            (IsUnresolvedTypes() && !IsUnresolvedMergedReference() &&
             !IsUnresolvedSuperClass()));
@@ -274,10 +279,20 @@
   void VisitRoots(RootVisitor* visitor, const RootInfo& root_info) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static void* operator new(size_t size) noexcept {
+    return ::operator new(size);
+  }
+
+  static void* operator new(size_t size, ArenaAllocator* arena) = delete;
+  static void* operator new(size_t size, ScopedArenaAllocator* arena);
+
  protected:
-  RegType(mirror::Class* klass, const std::string& descriptor,
+  RegType(mirror::Class* klass,
+          const StringPiece& descriptor,
           uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
-      : descriptor_(descriptor), klass_(klass), cache_id_(cache_id) {
+      : descriptor_(descriptor),
+        klass_(klass),
+        cache_id_(cache_id) {
     if (kIsDebugBuild) {
       CheckInvariants();
     }
@@ -285,9 +300,8 @@
 
   void CheckInvariants() const SHARED_REQUIRES(Locks::mutator_lock_);
 
-  const std::string descriptor_;
-  mutable GcRoot<mirror::Class>
-      klass_;  // Non-const only due to moving classes.
+  const StringPiece descriptor_;
+  mutable GcRoot<mirror::Class> klass_;  // Non-const only due to moving classes.
   const uint16_t cache_id_;
 
   friend class RegTypeCache;
@@ -311,7 +325,7 @@
 
   // Create the singleton instance.
   static const ConflictType* CreateInstance(mirror::Class* klass,
-                                            const std::string& descriptor,
+                                            const StringPiece& descriptor,
                                             uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -319,7 +333,7 @@
   static void Destroy();
 
  private:
-  ConflictType(mirror::Class* klass, const std::string& descriptor,
+  ConflictType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : RegType(klass, descriptor, cache_id) {}
 
@@ -340,7 +354,7 @@
 
   // Create the singleton instance.
   static const UndefinedType* CreateInstance(mirror::Class* klass,
-                                             const std::string& descriptor,
+                                             const StringPiece& descriptor,
                                              uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -348,7 +362,7 @@
   static void Destroy();
 
  private:
-  UndefinedType(mirror::Class* klass, const std::string& descriptor,
+  UndefinedType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : RegType(klass, descriptor, cache_id) {}
 
@@ -357,7 +371,7 @@
 
 class PrimitiveType : public RegType {
  public:
-  PrimitiveType(mirror::Class* klass, const std::string& descriptor,
+  PrimitiveType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool HasClassVirtual() const OVERRIDE { return true; }
@@ -365,7 +379,7 @@
 
 class Cat1Type : public PrimitiveType {
  public:
-  Cat1Type(mirror::Class* klass, const std::string& descriptor,
+  Cat1Type(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_);
 };
 
@@ -374,14 +388,14 @@
   bool IsInteger() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   static const IntegerType* CreateInstance(mirror::Class* klass,
-                                           const std::string& descriptor,
+                                           const StringPiece& descriptor,
                                            uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const IntegerType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  IntegerType(mirror::Class* klass, const std::string& descriptor,
+  IntegerType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat1Type(klass, descriptor, cache_id) {}
   static const IntegerType* instance_;
@@ -392,14 +406,14 @@
   bool IsBoolean() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   static const BooleanType* CreateInstance(mirror::Class* klass,
-                                           const std::string& descriptor,
+                                           const StringPiece& descriptor,
                                            uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const BooleanType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  BooleanType(mirror::Class* klass, const std::string& descriptor,
+  BooleanType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat1Type(klass, descriptor, cache_id) {}
 
@@ -411,14 +425,14 @@
   bool IsByte() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   static const ByteType* CreateInstance(mirror::Class* klass,
-                                        const std::string& descriptor,
+                                        const StringPiece& descriptor,
                                         uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const ByteType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  ByteType(mirror::Class* klass, const std::string& descriptor,
+  ByteType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat1Type(klass, descriptor, cache_id) {}
   static const ByteType* instance_;
@@ -429,14 +443,14 @@
   bool IsShort() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   static const ShortType* CreateInstance(mirror::Class* klass,
-                                         const std::string& descriptor,
+                                         const StringPiece& descriptor,
                                          uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const ShortType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  ShortType(mirror::Class* klass, const std::string& descriptor,
+  ShortType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat1Type(klass, descriptor, cache_id) {}
   static const ShortType* instance_;
@@ -447,14 +461,14 @@
   bool IsChar() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   static const CharType* CreateInstance(mirror::Class* klass,
-                                        const std::string& descriptor,
+                                        const StringPiece& descriptor,
                                         uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const CharType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  CharType(mirror::Class* klass, const std::string& descriptor,
+  CharType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat1Type(klass, descriptor, cache_id) {}
   static const CharType* instance_;
@@ -465,14 +479,14 @@
   bool IsFloat() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   static const FloatType* CreateInstance(mirror::Class* klass,
-                                         const std::string& descriptor,
+                                         const StringPiece& descriptor,
                                          uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const FloatType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  FloatType(mirror::Class* klass, const std::string& descriptor,
+  FloatType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat1Type(klass, descriptor, cache_id) {}
   static const FloatType* instance_;
@@ -480,7 +494,7 @@
 
 class Cat2Type : public PrimitiveType {
  public:
-  Cat2Type(mirror::Class* klass, const std::string& descriptor,
+  Cat2Type(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_);
 };
 
@@ -490,14 +504,14 @@
   bool IsLongLo() const OVERRIDE { return true; }
   bool IsLong() const OVERRIDE { return true; }
   static const LongLoType* CreateInstance(mirror::Class* klass,
-                                          const std::string& descriptor,
+                                          const StringPiece& descriptor,
                                           uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const LongLoType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  LongLoType(mirror::Class* klass, const std::string& descriptor,
+  LongLoType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat2Type(klass, descriptor, cache_id) {}
   static const LongLoType* instance_;
@@ -508,14 +522,14 @@
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsLongHi() const OVERRIDE { return true; }
   static const LongHiType* CreateInstance(mirror::Class* klass,
-                                          const std::string& descriptor,
+                                          const StringPiece& descriptor,
                                           uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const LongHiType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  LongHiType(mirror::Class* klass, const std::string& descriptor,
+  LongHiType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat2Type(klass, descriptor, cache_id) {}
   static const LongHiType* instance_;
@@ -527,14 +541,14 @@
   bool IsDoubleLo() const OVERRIDE { return true; }
   bool IsDouble() const OVERRIDE { return true; }
   static const DoubleLoType* CreateInstance(mirror::Class* klass,
-                                            const std::string& descriptor,
+                                            const StringPiece& descriptor,
                                             uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const DoubleLoType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  DoubleLoType(mirror::Class* klass, const std::string& descriptor,
+  DoubleLoType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat2Type(klass, descriptor, cache_id) {}
   static const DoubleLoType* instance_;
@@ -545,14 +559,14 @@
   std::string Dump() const OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   virtual bool IsDoubleHi() const OVERRIDE { return true; }
   static const DoubleHiType* CreateInstance(mirror::Class* klass,
-                                      const std::string& descriptor,
+                                      const StringPiece& descriptor,
                                       uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
   static const DoubleHiType* GetInstance() PURE;
   static void Destroy();
 
  private:
-  DoubleHiType(mirror::Class* klass, const std::string& descriptor,
+  DoubleHiType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : Cat2Type(klass, descriptor, cache_id) {}
   static const DoubleHiType* instance_;
@@ -677,7 +691,7 @@
 // instructions and must be passed to a constructor.
 class UninitializedType : public RegType {
  public:
-  UninitializedType(mirror::Class* klass, const std::string& descriptor,
+  UninitializedType(mirror::Class* klass, const StringPiece& descriptor,
                     uint32_t allocation_pc, uint16_t cache_id)
       : RegType(klass, descriptor, cache_id), allocation_pc_(allocation_pc) {}
 
@@ -697,7 +711,7 @@
 class UninitializedReferenceType FINAL : public UninitializedType {
  public:
   UninitializedReferenceType(mirror::Class* klass,
-                             const std::string& descriptor,
+                             const StringPiece& descriptor,
                              uint32_t allocation_pc, uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : UninitializedType(klass, descriptor, allocation_pc, cache_id) {}
@@ -713,7 +727,7 @@
 // constructor.
 class UnresolvedUninitializedRefType FINAL : public UninitializedType {
  public:
-  UnresolvedUninitializedRefType(const std::string& descriptor,
+  UnresolvedUninitializedRefType(const StringPiece& descriptor,
                                  uint32_t allocation_pc, uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : UninitializedType(nullptr, descriptor, allocation_pc, cache_id) {
@@ -737,7 +751,7 @@
 class UninitializedThisReferenceType FINAL : public UninitializedType {
  public:
   UninitializedThisReferenceType(mirror::Class* klass,
-                                 const std::string& descriptor,
+                                 const StringPiece& descriptor,
                                  uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : UninitializedType(klass, descriptor, 0, cache_id) {
@@ -758,7 +772,7 @@
 
 class UnresolvedUninitializedThisRefType FINAL : public UninitializedType {
  public:
-  UnresolvedUninitializedThisRefType(const std::string& descriptor,
+  UnresolvedUninitializedThisRefType(const StringPiece& descriptor,
                                      uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : UninitializedType(nullptr, descriptor, 0, cache_id) {
@@ -781,7 +795,7 @@
 // sub-class.
 class ReferenceType FINAL : public RegType {
  public:
-  ReferenceType(mirror::Class* klass, const std::string& descriptor,
+  ReferenceType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) SHARED_REQUIRES(Locks::mutator_lock_)
       : RegType(klass, descriptor, cache_id) {}
 
@@ -799,7 +813,7 @@
 // type.
 class PreciseReferenceType FINAL : public RegType {
  public:
-  PreciseReferenceType(mirror::Class* klass, const std::string& descriptor,
+  PreciseReferenceType(mirror::Class* klass, const StringPiece& descriptor,
                        uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -815,7 +829,7 @@
 // Common parent of unresolved types.
 class UnresolvedType : public RegType {
  public:
-  UnresolvedType(const std::string& descriptor, uint16_t cache_id)
+  UnresolvedType(const StringPiece& descriptor, uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : RegType(nullptr, descriptor, cache_id) {}
 
@@ -827,7 +841,7 @@
 // of this type must be conservative.
 class UnresolvedReferenceType FINAL : public UnresolvedType {
  public:
-  UnresolvedReferenceType(const std::string& descriptor, uint16_t cache_id)
+  UnresolvedReferenceType(const StringPiece& descriptor, uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : UnresolvedType(descriptor, cache_id) {
     if (kIsDebugBuild) {
@@ -882,8 +896,10 @@
 class UnresolvedMergedType FINAL : public UnresolvedType {
  public:
   // Note: the constructor will copy the unresolved BitVector, not use it directly.
-  UnresolvedMergedType(const RegType& resolved, const BitVector& unresolved,
-                       const RegTypeCache* reg_type_cache, uint16_t cache_id)
+  UnresolvedMergedType(const RegType& resolved,
+                       const BitVector& unresolved,
+                       const RegTypeCache* reg_type_cache,
+                       uint16_t cache_id)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // The resolved part. See description below.
diff --git a/runtime/verifier/reg_type_cache-inl.h b/runtime/verifier/reg_type_cache-inl.h
index b6f253b..68af62e 100644
--- a/runtime/verifier/reg_type_cache-inl.h
+++ b/runtime/verifier/reg_type_cache-inl.h
@@ -118,6 +118,18 @@
   }
 }
 
+template <class RegTypeType>
+inline RegTypeType& RegTypeCache::AddEntry(RegTypeType* new_entry) {
+  DCHECK(new_entry != nullptr);
+  entries_.push_back(new_entry);
+  if (new_entry->HasClass()) {
+    mirror::Class* klass = new_entry->GetClass();
+    DCHECK(!klass->IsPrimitive());
+    klass_entries_.push_back(std::make_pair(GcRoot<mirror::Class>(klass), new_entry));
+  }
+  return *new_entry;
+}
+
 }  // namespace verifier
 }  // namespace art
 #endif  // ART_RUNTIME_VERIFIER_REG_TYPE_CACHE_INL_H_
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index bb756e9..71ed4a2 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -16,7 +16,9 @@
 
 #include "reg_type_cache-inl.h"
 
+#include "base/arena_bit_vector.h"
 #include "base/casts.h"
+#include "base/scoped_arena_allocator.h"
 #include "base/stl_util.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -29,9 +31,10 @@
 
 bool RegTypeCache::primitive_initialized_ = false;
 uint16_t RegTypeCache::primitive_count_ = 0;
-const PreciseConstType* RegTypeCache::small_precise_constants_[kMaxSmallConstant - kMinSmallConstant + 1];
+const PreciseConstType* RegTypeCache::small_precise_constants_[kMaxSmallConstant -
+                                                               kMinSmallConstant + 1];
 
-static bool MatchingPrecisionForClass(const RegType* entry, bool precise)
+ALWAYS_INLINE static inline bool MatchingPrecisionForClass(const RegType* entry, bool precise)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (entry->IsPreciseReference() == precise) {
     // We were or weren't looking for a precise reference and we found what we need.
@@ -67,7 +70,8 @@
   DCHECK_EQ(entries_.size(), primitive_count_);
 }
 
-const RegType& RegTypeCache::FromDescriptor(mirror::ClassLoader* loader, const char* descriptor,
+const RegType& RegTypeCache::FromDescriptor(mirror::ClassLoader* loader,
+                                            const char* descriptor,
                                             bool precise) {
   DCHECK(RegTypeCache::primitive_initialized_);
   if (descriptor[1] == '\0') {
@@ -159,13 +163,20 @@
   return klass;
 }
 
-const RegType& RegTypeCache::From(mirror::ClassLoader* loader, const char* descriptor,
+StringPiece RegTypeCache::AddString(const StringPiece& string_piece) {
+  char* ptr = arena_.AllocArray<char>(string_piece.length());
+  memcpy(ptr, string_piece.data(), string_piece.length());
+  return StringPiece(ptr, string_piece.length());
+}
+
+const RegType& RegTypeCache::From(mirror::ClassLoader* loader,
+                                  const char* descriptor,
                                   bool precise) {
+  StringPiece sp_descriptor(descriptor);
   // Try looking up the class in the cache first. We use a StringPiece to avoid continual strlen
   // operations on the descriptor.
-  StringPiece descriptor_sp(descriptor);
   for (size_t i = primitive_count_; i < entries_.size(); i++) {
-    if (MatchDescriptor(i, descriptor_sp, precise)) {
+    if (MatchDescriptor(i, sp_descriptor, precise)) {
       return *(entries_[i]);
     }
   }
@@ -186,12 +197,11 @@
     if (klass->CannotBeAssignedFromOtherTypes() || precise) {
       DCHECK(!(klass->IsAbstract()) || klass->IsArrayClass());
       DCHECK(!klass->IsInterface());
-      entry = new PreciseReferenceType(klass, descriptor_sp.as_string(), entries_.size());
+      entry = new (&arena_) PreciseReferenceType(klass, AddString(sp_descriptor), entries_.size());
     } else {
-      entry = new ReferenceType(klass, descriptor_sp.as_string(), entries_.size());
+      entry = new (&arena_) ReferenceType(klass, AddString(sp_descriptor), entries_.size());
     }
-    AddEntry(entry);
-    return *entry;
+    return AddEntry(entry);
   } else {  // Class not resolved.
     // We tried loading the class and failed, this might get an exception raised
     // so we want to clear it before we go on.
@@ -202,9 +212,8 @@
       DCHECK(!Thread::Current()->IsExceptionPending());
     }
     if (IsValidDescriptor(descriptor)) {
-      RegType* entry = new UnresolvedReferenceType(descriptor_sp.as_string(), entries_.size());
-      AddEntry(entry);
-      return *entry;
+      return AddEntry(
+          new (&arena_) UnresolvedReferenceType(AddString(sp_descriptor), entries_.size()));
     } else {
       // The descriptor is broken return the unknown type as there's nothing sensible that
       // could be done at runtime
@@ -213,50 +222,65 @@
   }
 }
 
-const RegType& RegTypeCache::FromClass(const char* descriptor, mirror::Class* klass, bool precise) {
+const RegType* RegTypeCache::FindClass(mirror::Class* klass, bool precise) const {
   DCHECK(klass != nullptr);
   if (klass->IsPrimitive()) {
     // Note: precise isn't used for primitive classes. A char is assignable to an int. All
     // primitive classes are final.
-    return RegTypeFromPrimitiveType(klass->GetPrimitiveType());
-  } else {
-    // Look for the reference in the list of entries to have.
-    for (size_t i = primitive_count_; i < entries_.size(); i++) {
-      const RegType* cur_entry = entries_[i];
-      if (cur_entry->klass_.Read() == klass && MatchingPrecisionForClass(cur_entry, precise)) {
-        return *cur_entry;
+    return &RegTypeFromPrimitiveType(klass->GetPrimitiveType());
+  }
+  for (auto& pair : klass_entries_) {
+    mirror::Class* const reg_klass = pair.first.Read();
+    if (reg_klass == klass) {
+      const RegType* reg_type = pair.second;
+      if (MatchingPrecisionForClass(reg_type, precise)) {
+        return reg_type;
       }
     }
-    // No reference to the class was found, create new reference.
-    RegType* entry;
-    if (precise) {
-      entry = new PreciseReferenceType(klass, descriptor, entries_.size());
-    } else {
-      entry = new ReferenceType(klass, descriptor, entries_.size());
-    }
-    AddEntry(entry);
-    return *entry;
   }
+  return nullptr;
 }
 
-RegTypeCache::RegTypeCache(bool can_load_classes) : can_load_classes_(can_load_classes) {
+const RegType* RegTypeCache::InsertClass(const StringPiece& descriptor,
+                                         mirror::Class* klass,
+                                         bool precise) {
+  // No reference to the class was found, create new reference.
+  DCHECK(FindClass(klass, precise) == nullptr);
+  RegType* const reg_type = precise
+      ? static_cast<RegType*>(
+          new (&arena_) PreciseReferenceType(klass, descriptor, entries_.size()))
+      : new (&arena_) ReferenceType(klass, descriptor, entries_.size());
+  return &AddEntry(reg_type);
+}
+
+const RegType& RegTypeCache::FromClass(const char* descriptor, mirror::Class* klass, bool precise) {
+  DCHECK(klass != nullptr);
+  const RegType* reg_type = FindClass(klass, precise);
+  if (reg_type == nullptr) {
+    reg_type = InsertClass(AddString(StringPiece(descriptor)), klass, precise);
+  }
+  return *reg_type;
+}
+
+RegTypeCache::RegTypeCache(bool can_load_classes, ScopedArenaAllocator& arena)
+    : entries_(arena.Adapter(kArenaAllocVerifier)),
+      klass_entries_(arena.Adapter(kArenaAllocVerifier)),
+      can_load_classes_(can_load_classes),
+      arena_(arena) {
   if (kIsDebugBuild) {
     Thread::Current()->AssertThreadSuspensionIsAllowable(gAborting == 0);
   }
-  entries_.reserve(64);
+  // The klass_entries_ array does not have primitives or small constants.
+  static constexpr size_t kNumReserveEntries = 32;
+  klass_entries_.reserve(kNumReserveEntries);
+  // We want to have room for additional entries after inserting primitives and small
+  // constants.
+  entries_.reserve(kNumReserveEntries + kNumPrimitivesAndSmallConstants);
   FillPrimitiveAndSmallConstantTypes();
 }
 
 RegTypeCache::~RegTypeCache() {
-  CHECK_LE(primitive_count_, entries_.size());
-  // Delete only the non primitive types.
-  if (entries_.size() == kNumPrimitivesAndSmallConstants) {
-    // All entries are from the global pool, nothing to delete.
-    return;
-  }
-  std::vector<const RegType*>::iterator non_primitive_begin = entries_.begin();
-  std::advance(non_primitive_begin, kNumPrimitivesAndSmallConstants);
-  STLDeleteContainerPointers(non_primitive_begin, entries_.end());
+  DCHECK_LE(primitive_count_, entries_.size());
 }
 
 void RegTypeCache::ShutDown() {
@@ -318,9 +342,9 @@
 }
 
 const RegType& RegTypeCache::FromUnresolvedMerge(const RegType& left, const RegType& right) {
-  BitVector types(1,                                    // Allocate at least a word.
-                  true,                                 // Is expandable.
-                  Allocator::GetMallocAllocator());     // TODO: Arenas in the verifier.
+  ArenaBitVector types(&arena_,
+                       kDefaultArenaBitVectorBytes * kBitsPerByte,  // Allocate at least 8 bytes.
+                       true);                                       // Is expandable.
   const RegType* left_resolved;
   if (left.IsUnresolvedMergedReference()) {
     const UnresolvedMergedType* left_merge = down_cast<const UnresolvedMergedType*>(&left);
@@ -361,20 +385,15 @@
       const BitVector& unresolved_part = cmp_type->GetUnresolvedTypes();
       // Use SameBitsSet. "types" is expandable to allow merging in the components, but the
       // BitVector in the final RegType will be made non-expandable.
-      if (&resolved_part == &resolved_parts_merged &&
-              types.SameBitsSet(&unresolved_part)) {
+      if (&resolved_part == &resolved_parts_merged && types.SameBitsSet(&unresolved_part)) {
         return *cur_entry;
       }
     }
   }
-
-  // Create entry.
-  RegType* entry = new UnresolvedMergedType(resolved_parts_merged,
-                                            types,
-                                            this,
-                                            entries_.size());
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(new (&arena_) UnresolvedMergedType(resolved_parts_merged,
+                                                     types,
+                                                     this,
+                                                     entries_.size()));
 }
 
 const RegType& RegTypeCache::FromUnresolvedSuperClass(const RegType& child) {
@@ -391,14 +410,12 @@
       }
     }
   }
-  RegType* entry = new UnresolvedSuperClass(child.GetId(), this, entries_.size());
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(new (&arena_) UnresolvedSuperClass(child.GetId(), this, entries_.size()));
 }
 
 const UninitializedType& RegTypeCache::Uninitialized(const RegType& type, uint32_t allocation_pc) {
   UninitializedType* entry = nullptr;
-  const std::string& descriptor(type.GetDescriptor());
+  const StringPiece& descriptor(type.GetDescriptor());
   if (type.IsUnresolvedTypes()) {
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
       const RegType* cur_entry = entries_[i];
@@ -409,7 +426,9 @@
         return *down_cast<const UnresolvedUninitializedRefType*>(cur_entry);
       }
     }
-    entry = new UnresolvedUninitializedRefType(descriptor, allocation_pc, entries_.size());
+    entry = new (&arena_) UnresolvedUninitializedRefType(descriptor,
+                                                         allocation_pc,
+                                                         entries_.size());
   } else {
     mirror::Class* klass = type.GetClass();
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
@@ -421,17 +440,19 @@
         return *down_cast<const UninitializedReferenceType*>(cur_entry);
       }
     }
-    entry = new UninitializedReferenceType(klass, descriptor, allocation_pc, entries_.size());
+    entry = new (&arena_) UninitializedReferenceType(klass,
+                                                     descriptor,
+                                                     allocation_pc,
+                                                     entries_.size());
   }
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(entry);
 }
 
 const RegType& RegTypeCache::FromUninitialized(const RegType& uninit_type) {
   RegType* entry;
 
   if (uninit_type.IsUnresolvedTypes()) {
-    const std::string& descriptor(uninit_type.GetDescriptor());
+    const StringPiece& descriptor(uninit_type.GetDescriptor());
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
       const RegType* cur_entry = entries_[i];
       if (cur_entry->IsUnresolvedReference() &&
@@ -439,7 +460,7 @@
         return *cur_entry;
       }
     }
-    entry = new UnresolvedReferenceType(descriptor, entries_.size());
+    entry = new (&arena_) UnresolvedReferenceType(descriptor, entries_.size());
   } else {
     mirror::Class* klass = uninit_type.GetClass();
     if (uninit_type.IsUninitializedThisReference() && !klass->IsFinal()) {
@@ -450,7 +471,7 @@
           return *cur_entry;
         }
       }
-      entry = new ReferenceType(klass, "", entries_.size());
+      entry = new (&arena_) ReferenceType(klass, "", entries_.size());
     } else if (!klass->IsPrimitive()) {
       // We're uninitialized because of allocation, look or create a precise type as allocations
       // may only create objects of that type.
@@ -469,18 +490,19 @@
           return *cur_entry;
         }
       }
-      entry = new PreciseReferenceType(klass, uninit_type.GetDescriptor(), entries_.size());
+      entry = new (&arena_) PreciseReferenceType(klass,
+                                                 uninit_type.GetDescriptor(),
+                                                 entries_.size());
     } else {
       return Conflict();
     }
   }
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(entry);
 }
 
 const UninitializedType& RegTypeCache::UninitializedThisArgument(const RegType& type) {
   UninitializedType* entry;
-  const std::string& descriptor(type.GetDescriptor());
+  const StringPiece& descriptor(type.GetDescriptor());
   if (type.IsUnresolvedTypes()) {
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
       const RegType* cur_entry = entries_[i];
@@ -489,7 +511,7 @@
         return *down_cast<const UninitializedType*>(cur_entry);
       }
     }
-    entry = new UnresolvedUninitializedThisRefType(descriptor, entries_.size());
+    entry = new (&arena_) UnresolvedUninitializedThisRefType(descriptor, entries_.size());
   } else {
     mirror::Class* klass = type.GetClass();
     for (size_t i = primitive_count_; i < entries_.size(); i++) {
@@ -498,10 +520,9 @@
         return *down_cast<const UninitializedType*>(cur_entry);
       }
     }
-    entry = new UninitializedThisReferenceType(klass, descriptor, entries_.size());
+    entry = new (&arena_) UninitializedThisReferenceType(klass, descriptor, entries_.size());
   }
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(entry);
 }
 
 const ConstantType& RegTypeCache::FromCat1NonSmallConstant(int32_t value, bool precise) {
@@ -515,12 +536,11 @@
   }
   ConstantType* entry;
   if (precise) {
-    entry = new PreciseConstType(value, entries_.size());
+    entry = new (&arena_) PreciseConstType(value, entries_.size());
   } else {
-    entry = new ImpreciseConstType(value, entries_.size());
+    entry = new (&arena_) ImpreciseConstType(value, entries_.size());
   }
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(entry);
 }
 
 const ConstantType& RegTypeCache::FromCat2ConstLo(int32_t value, bool precise) {
@@ -533,12 +553,11 @@
   }
   ConstantType* entry;
   if (precise) {
-    entry = new PreciseConstLoType(value, entries_.size());
+    entry = new (&arena_) PreciseConstLoType(value, entries_.size());
   } else {
-    entry = new ImpreciseConstLoType(value, entries_.size());
+    entry = new (&arena_) ImpreciseConstLoType(value, entries_.size());
   }
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(entry);
 }
 
 const ConstantType& RegTypeCache::FromCat2ConstHi(int32_t value, bool precise) {
@@ -551,32 +570,30 @@
   }
   ConstantType* entry;
   if (precise) {
-    entry = new PreciseConstHiType(value, entries_.size());
+    entry = new (&arena_) PreciseConstHiType(value, entries_.size());
   } else {
-    entry = new ImpreciseConstHiType(value, entries_.size());
+    entry = new (&arena_) ImpreciseConstHiType(value, entries_.size());
   }
-  AddEntry(entry);
-  return *entry;
+  return AddEntry(entry);
 }
 
 const RegType& RegTypeCache::GetComponentType(const RegType& array, mirror::ClassLoader* loader) {
   if (!array.IsArrayTypes()) {
     return Conflict();
   } else if (array.IsUnresolvedTypes()) {
-    const std::string& descriptor(array.GetDescriptor());
-    const std::string component(descriptor.substr(1, descriptor.size() - 1));
-    return FromDescriptor(loader, component.c_str(), false);
+    const std::string descriptor(array.GetDescriptor().as_string());
+    return FromDescriptor(loader, descriptor.c_str() + 1, false);
   } else {
     mirror::Class* klass = array.GetClass()->GetComponentType();
     std::string temp;
+    const char* descriptor = klass->GetDescriptor(&temp);
     if (klass->IsErroneous()) {
       // Arrays may have erroneous component types, use unresolved in that case.
       // We assume that the primitive classes are not erroneous, so we know it is a
       // reference type.
-      return FromDescriptor(loader, klass->GetDescriptor(&temp), false);
+      return FromDescriptor(loader, descriptor, false);
     } else {
-      return FromClass(klass->GetDescriptor(&temp), klass,
-                       klass->CannotBeAssignedFromOtherTypes());
+      return FromClass(descriptor, klass, klass->CannotBeAssignedFromOtherTypes());
     }
   }
 }
@@ -618,10 +635,10 @@
   for (size_t i = primitive_count_; i < entries_.size(); ++i) {
     entries_[i]->VisitRoots(visitor, root_info);
   }
-}
-
-void RegTypeCache::AddEntry(RegType* new_entry) {
-  entries_.push_back(new_entry);
+  for (auto& pair : klass_entries_) {
+    GcRoot<mirror::Class>& root = pair.first;
+    root.VisitRoot(visitor, root_info);
+  }
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 93948a1..6f9a04e 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -19,6 +19,7 @@
 
 #include "base/casts.h"
 #include "base/macros.h"
+#include "base/scoped_arena_containers.h"
 #include "object_callbacks.h"
 #include "reg_type.h"
 #include "runtime.h"
@@ -31,15 +32,19 @@
   class Class;
   class ClassLoader;
 }  // namespace mirror
+class ScopedArenaAllocator;
 class StringPiece;
 
 namespace verifier {
 
 class RegType;
 
+// Use 8 bytes since that is the default arena allocator alignment.
+static constexpr size_t kDefaultArenaBitVectorBytes = 8;
+
 class RegTypeCache {
  public:
-  explicit RegTypeCache(bool can_load_classes);
+  explicit RegTypeCache(bool can_load_classes, ScopedArenaAllocator& arena);
   ~RegTypeCache();
   static void Init() SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!RegTypeCache::primitive_initialized_) {
@@ -53,6 +58,13 @@
   const art::verifier::RegType& GetFromId(uint16_t id) const;
   const RegType& From(mirror::ClassLoader* loader, const char* descriptor, bool precise)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  // Find a RegType, returns null if not found.
+  const RegType* FindClass(mirror::Class* klass, bool precise) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  // Insert a new class with a specified descriptor, must not already be in the cache.
+  const RegType* InsertClass(const StringPiece& descriptor, mirror::Class* klass, bool precise)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  // Get or insert a reg type for a description, klass, and precision.
   const RegType& FromClass(const char* descriptor, mirror::Class* klass, bool precise)
       SHARED_REQUIRES(Locks::mutator_lock_);
   const ConstantType& FromCat1Const(int32_t value, bool precise)
@@ -150,7 +162,13 @@
   const ConstantType& FromCat1NonSmallConstant(int32_t value, bool precise)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void AddEntry(RegType* new_entry);
+  // Returns the pass in RegType.
+  template <class RegTypeType>
+  RegTypeType& AddEntry(RegTypeType* new_entry) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Add a string piece to the arena allocator so that it stays live for the lifetime of the
+  // verifier.
+  StringPiece AddString(const StringPiece& string_piece);
 
   template <class Type>
   static const Type* CreatePrimitiveTypeInstance(const std::string& descriptor)
@@ -160,7 +178,8 @@
   // A quick look up for popular small constants.
   static constexpr int32_t kMinSmallConstant = -1;
   static constexpr int32_t kMaxSmallConstant = 4;
-  static const PreciseConstType* small_precise_constants_[kMaxSmallConstant - kMinSmallConstant + 1];
+  static const PreciseConstType* small_precise_constants_[kMaxSmallConstant -
+                                                          kMinSmallConstant + 1];
 
   static constexpr size_t kNumPrimitivesAndSmallConstants =
       12 + (kMaxSmallConstant - kMinSmallConstant + 1);
@@ -172,11 +191,17 @@
   static uint16_t primitive_count_;
 
   // The actual storage for the RegTypes.
-  std::vector<const RegType*> entries_;
+  ScopedArenaVector<const RegType*> entries_;
+
+  // Fast lookup for quickly finding entries that have a matching class.
+  ScopedArenaVector<std::pair<GcRoot<mirror::Class>, const RegType*>> klass_entries_;
 
   // Whether or not we're allowed to load classes.
   const bool can_load_classes_;
 
+  // Arena allocator.
+  ScopedArenaAllocator& arena_;
+
   DISALLOW_COPY_AND_ASSIGN(RegTypeCache);
 };
 
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 971b1f5..22ac7e4 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -20,6 +20,7 @@
 
 #include "base/bit_vector.h"
 #include "base/casts.h"
+#include "base/scoped_arena_allocator.h"
 #include "common_runtime_test.h"
 #include "reg_type_cache-inl.h"
 #include "reg_type-inl.h"
@@ -29,12 +30,23 @@
 namespace art {
 namespace verifier {
 
-class RegTypeTest : public CommonRuntimeTest {};
+class BaseRegTypeTest : public CommonRuntimeTest {
+ public:
+  void PostRuntimeCreate() OVERRIDE {
+    stack.reset(new ArenaStack(Runtime::Current()->GetArenaPool()));
+    allocator.reset(new ScopedArenaAllocator(stack.get()));
+  }
+
+  std::unique_ptr<ArenaStack> stack;
+  std::unique_ptr<ScopedArenaAllocator> allocator;
+};
+
+class RegTypeTest : public BaseRegTypeTest {};
 
 TEST_F(RegTypeTest, ConstLoHi) {
   // Tests creating primitive types types.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& ref_type_const_0 = cache.FromCat1Const(10, true);
   const RegType& ref_type_const_1 = cache.FromCat1Const(10, true);
   const RegType& ref_type_const_2 = cache.FromCat1Const(30, true);
@@ -56,7 +68,7 @@
 
 TEST_F(RegTypeTest, Pairs) {
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   int64_t val = static_cast<int32_t>(1234);
   const RegType& precise_lo = cache.FromCat2ConstLo(static_cast<int32_t>(val), true);
   const RegType& precise_hi = cache.FromCat2ConstHi(static_cast<int32_t>(val >> 32), true);
@@ -80,7 +92,7 @@
 
 TEST_F(RegTypeTest, Primitives) {
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
 
   const RegType& bool_reg_type = cache.Boolean();
   EXPECT_FALSE(bool_reg_type.IsUndefined());
@@ -347,13 +359,13 @@
   EXPECT_TRUE(double_reg_type.HasClass());
 }
 
-class RegTypeReferenceTest : public CommonRuntimeTest {};
+class RegTypeReferenceTest : public BaseRegTypeTest {};
 
 TEST_F(RegTypeReferenceTest, JavalangObjectImprecise) {
   // Tests matching precisions. A reference type that was created precise doesn't
   // match the one that is imprecise.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& imprecise_obj = cache.JavaLangObject(false);
   const RegType& precise_obj = cache.JavaLangObject(true);
   const RegType& precise_obj_2 = cache.FromDescriptor(nullptr, "Ljava/lang/Object;", true);
@@ -368,7 +380,7 @@
   // Tests creating unresolved types. Miss for the first time asking the cache and
   // a hit second time.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& ref_type_0 = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   EXPECT_TRUE(ref_type_0.IsUnresolvedReference());
   EXPECT_TRUE(ref_type_0.IsNonZeroReferenceTypes());
@@ -384,7 +396,7 @@
 TEST_F(RegTypeReferenceTest, UnresolvedUnintializedType) {
   // Tests creating types uninitialized types from unresolved types.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& ref_type_0 = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   EXPECT_TRUE(ref_type_0.IsUnresolvedReference());
   const RegType& ref_type = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
@@ -406,7 +418,7 @@
 TEST_F(RegTypeReferenceTest, Dump) {
   // Tests types for proper Dump messages.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& unresolved_ref = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExist;", true);
   const RegType& unresolved_ref_another = cache.FromDescriptor(nullptr, "Ljava/lang/DoesNotExistEither;", true);
   const RegType& resolved_ref = cache.JavaLangString();
@@ -431,7 +443,7 @@
   // Hit the second time. Then check for the same effect when using
   // The JavaLangObject method instead of FromDescriptor. String class is final.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& ref_type = cache.JavaLangString();
   const RegType& ref_type_2 = cache.JavaLangString();
   const RegType& ref_type_3 = cache.FromDescriptor(nullptr, "Ljava/lang/String;", true);
@@ -451,7 +463,7 @@
   // Hit the second time. Then I am checking for the same effect when using
   // The JavaLangObject method instead of FromDescriptor. Object Class in not final.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache(true);
+  RegTypeCache cache(true, *allocator);
   const RegType& ref_type = cache.JavaLangObject(true);
   const RegType& ref_type_2 = cache.JavaLangObject(true);
   const RegType& ref_type_3 = cache.FromDescriptor(nullptr, "Ljava/lang/Object;", true);
@@ -464,7 +476,7 @@
   // Tests merging logic
   // String and object , LUB is object.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true);
+  RegTypeCache cache_new(true, *allocator);
   const RegType& string = cache_new.JavaLangString();
   const RegType& Object = cache_new.JavaLangObject(true);
   EXPECT_TRUE(string.Merge(Object, &cache_new).IsJavaLangObject());
@@ -487,7 +499,7 @@
 TEST_F(RegTypeTest, MergingFloat) {
   // Testing merging logic with float and float constants.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true);
+  RegTypeCache cache_new(true, *allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& float_type = cache_new.Float();
@@ -518,7 +530,7 @@
 TEST_F(RegTypeTest, MergingLong) {
   // Testing merging logic with long and long constants.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true);
+  RegTypeCache cache_new(true, *allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& long_lo_type = cache_new.LongLo();
@@ -572,7 +584,7 @@
 TEST_F(RegTypeTest, MergingDouble) {
   // Testing merging logic with double and double constants.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true);
+  RegTypeCache cache_new(true, *allocator);
 
   constexpr int32_t kTestConstantValue = 10;
   const RegType& double_lo_type = cache_new.DoubleLo();
@@ -626,7 +638,7 @@
 TEST_F(RegTypeTest, ConstPrecision) {
   // Tests creating primitive types types.
   ScopedObjectAccess soa(Thread::Current());
-  RegTypeCache cache_new(true);
+  RegTypeCache cache_new(true, *allocator);
   const RegType& imprecise_const = cache_new.FromCat1Const(10, false);
   const RegType& precise_const = cache_new.FromCat1Const(10, true);
 
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 1df2428..57fb701 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -182,6 +182,21 @@
   }
 }
 
+inline RegisterLine* RegisterLine::Create(size_t num_regs, MethodVerifier* verifier) {
+  void* memory = verifier->GetArena().Alloc(OFFSETOF_MEMBER(RegisterLine, line_) +
+                                                (num_regs * sizeof(uint16_t)));
+  return new (memory) RegisterLine(num_regs, verifier);
+}
+
+inline RegisterLine::RegisterLine(size_t num_regs, MethodVerifier* verifier)
+    : num_regs_(num_regs),
+      monitors_(verifier->GetArena().Adapter(kArenaAllocVerifier)),
+      reg_to_lock_depths_(std::less<uint32_t>(), verifier->GetArena().Adapter(kArenaAllocVerifier)),
+      this_initialized_(false) {
+  std::uninitialized_fill_n(line_, num_regs_, 0u);
+  SetResultTypeToUnknown(verifier);
+}
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 33c90e3..37343b5 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -338,6 +338,8 @@
   }
 }
 
+static constexpr uint32_t kVirtualNullRegister = std::numeric_limits<uint32_t>::max();
+
 void RegisterLine::PushMonitor(MethodVerifier* verifier, uint32_t reg_idx, int32_t insn_idx) {
   const RegType& reg_type = GetRegisterType(verifier, reg_idx);
   if (!reg_type.IsReferenceTypes()) {
@@ -352,6 +354,12 @@
     }
   } else {
     if (SetRegToLockDepth(reg_idx, monitors_.size())) {
+      // Null literals can establish aliases that we can't easily track. As such, handle the zero
+      // case as the 2^32-1 register (which isn't available in dex bytecode).
+      if (reg_type.IsZero()) {
+        SetRegToLockDepth(kVirtualNullRegister, monitors_.size());
+      }
+
       monitors_.push_back(insn_idx);
     } else {
       verifier->Fail(VERIFY_ERROR_LOCKING);
@@ -377,7 +385,19 @@
     }
   } else {
     monitors_.pop_back();
-    if (!IsSetLockDepth(reg_idx, monitors_.size())) {
+
+    bool success = IsSetLockDepth(reg_idx, monitors_.size());
+
+    if (!success && reg_type.IsZero()) {
+      // Null literals can establish aliases that we can't easily track. As such, handle the zero
+      // case as the 2^32-1 register (which isn't available in dex bytecode).
+      success = IsSetLockDepth(kVirtualNullRegister, monitors_.size());
+      if (success) {
+        reg_idx = kVirtualNullRegister;
+      }
+    }
+
+    if (!success) {
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
         LOG(WARNING) << "monitor-exit not unlocking the top of the monitor stack while verifying "
@@ -385,12 +405,38 @@
                                      *verifier->GetMethodReference().dex_file);
       }
     } else {
-      // Record the register was unlocked
+      // Record the register was unlocked. This clears all aliases, thus it will also clear the
+      // null lock, if necessary.
       ClearRegToLockDepth(reg_idx, monitors_.size());
     }
   }
 }
 
+bool FindLockAliasedRegister(uint32_t src,
+                             const RegisterLine::RegToLockDepthsMap& src_map,
+                             const RegisterLine::RegToLockDepthsMap& search_map) {
+  auto it = src_map.find(src);
+  if (it == src_map.end()) {
+    // "Not locked" is trivially aliased.
+    return true;
+  }
+  uint32_t src_lock_levels = it->second;
+  if (src_lock_levels == 0) {
+    // "Not locked" is trivially aliased.
+    return true;
+  }
+
+  // Scan the map for the same value.
+  for (const std::pair<uint32_t, uint32_t>& pair : search_map) {
+    if (pair.first != src && pair.second == src_lock_levels) {
+      return true;
+    }
+  }
+
+  // Nothing found, no alias.
+  return false;
+}
+
 bool RegisterLine::MergeRegisters(MethodVerifier* verifier, const RegisterLine* incoming_line) {
   bool changed = false;
   DCHECK(incoming_line != nullptr);
@@ -417,9 +463,29 @@
         size_t depths = reg_to_lock_depths_.count(idx);
         size_t incoming_depths = incoming_line->reg_to_lock_depths_.count(idx);
         if (depths != incoming_depths) {
-          if (depths == 0 || incoming_depths == 0) {
-            reg_to_lock_depths_.erase(idx);
-          } else {
+          // Stack levels aren't matching. This is potentially bad, as we don't do a
+          // flow-sensitive analysis.
+          // However, this could be an alias of something locked in one path, and the alias was
+          // destroyed in another path. It is fine to drop this as long as there's another alias
+          // for the lock around. The last vanishing alias will then report that things would be
+          // left unlocked. We need to check for aliases for both lock levels.
+          //
+          // Example (lock status in curly braces as pair of register and lock leels):
+          //
+          //                            lock v1 {v1=1}
+          //                        |                    |
+          //              v0 = v1 {v0=1, v1=1}       v0 = v2 {v1=1}
+          //                        |                    |
+          //                                 {v1=1}
+          //                                         // Dropping v0, as the status can't be merged
+          //                                         // but the lock info ("locked at depth 1" and)
+          //                                         // "not locked at all") is available.
+          if (!FindLockAliasedRegister(idx,
+                                       reg_to_lock_depths_,
+                                       reg_to_lock_depths_) ||
+              !FindLockAliasedRegister(idx,
+                                       incoming_line->reg_to_lock_depths_,
+                                       reg_to_lock_depths_)) {
             verifier->Fail(VERIFY_ERROR_LOCKING);
             if (kDumpLockFailures) {
               LOG(WARNING) << "mismatched stack depths for register v" << idx
@@ -429,20 +495,51 @@
             }
             break;
           }
+          // We found aliases, set this to zero.
+          reg_to_lock_depths_.erase(idx);
         } else if (depths > 0) {
           // Check whether they're actually the same levels.
           uint32_t locked_levels = reg_to_lock_depths_.find(idx)->second;
           uint32_t incoming_locked_levels = incoming_line->reg_to_lock_depths_.find(idx)->second;
           if (locked_levels != incoming_locked_levels) {
-            verifier->Fail(VERIFY_ERROR_LOCKING);
-            if (kDumpLockFailures) {
-              LOG(WARNING) << "mismatched lock levels for register v" << idx << ": "
-                  << std::hex << locked_levels << std::dec  << " != "
-                  << std::hex << incoming_locked_levels << std::dec << " in "
-                  << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                  *verifier->GetMethodReference().dex_file);
+            // Lock levels aren't matching. This is potentially bad, as we don't do a
+            // flow-sensitive analysis.
+            // However, this could be an alias of something locked in one path, and the alias was
+            // destroyed in another path. It is fine to drop this as long as there's another alias
+            // for the lock around. The last vanishing alias will then report that things would be
+            // left unlocked. We need to check for aliases for both lock levels.
+            //
+            // Example (lock status in curly braces as pair of register and lock leels):
+            //
+            //                          lock v1 {v1=1}
+            //                          lock v2 {v1=1, v2=2}
+            //                        |                      |
+            //         v0 = v1 {v0=1, v1=1, v2=2}  v0 = v2 {v0=2, v1=1, v2=2}
+            //                        |                      |
+            //                             {v1=1, v2=2}
+            //                                           // Dropping v0, as the status can't be
+            //                                           // merged but the lock info ("locked at
+            //                                           // depth 1" and "locked at depth 2") is
+            //                                           // available.
+            if (!FindLockAliasedRegister(idx,
+                                         reg_to_lock_depths_,
+                                         reg_to_lock_depths_) ||
+                !FindLockAliasedRegister(idx,
+                                         incoming_line->reg_to_lock_depths_,
+                                         reg_to_lock_depths_)) {
+              // No aliases for both current and incoming, we'll lose information.
+              verifier->Fail(VERIFY_ERROR_LOCKING);
+              if (kDumpLockFailures) {
+                LOG(WARNING) << "mismatched lock levels for register v" << idx << ": "
+                    << std::hex << locked_levels << std::dec  << " != "
+                    << std::hex << incoming_locked_levels << std::dec << " in "
+                    << PrettyMethod(verifier->GetMethodReference().dex_method_index,
+                                    *verifier->GetMethodReference().dex_file);
+              }
+              break;
             }
-            break;
+            // We found aliases, set this to zero.
+            reg_to_lock_depths_.erase(idx);
           }
         }
       }
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 46db1c6..b2f5555 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <vector>
 
+#include "base/scoped_arena_containers.h"
 #include "safe_map.h"
 
 namespace art {
@@ -58,11 +59,11 @@
 // stack of entered monitors (identified by code unit offset).
 class RegisterLine {
  public:
-  static RegisterLine* Create(size_t num_regs, MethodVerifier* verifier) {
-    void* memory = operator new(sizeof(RegisterLine) + (num_regs * sizeof(uint16_t)));
-    RegisterLine* rl = new (memory) RegisterLine(num_regs, verifier);
-    return rl;
-  }
+  // A map from register to a bit vector of indices into the monitors_ stack.
+  using RegToLockDepthsMap = ScopedArenaSafeMap<uint32_t, uint32_t>;
+
+  // Create a register line of num_regs registers.
+  static RegisterLine* Create(size_t num_regs, MethodVerifier* verifier);
 
   // Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
   void CopyRegister1(MethodVerifier* verifier, uint32_t vdst, uint32_t vsrc, TypeCategory cat)
@@ -311,11 +312,11 @@
   // Write a bit at each register location that holds a reference.
   void WriteReferenceBitMap(MethodVerifier* verifier, std::vector<uint8_t>* data, size_t max_bytes);
 
-  size_t GetMonitorEnterCount() {
+  size_t GetMonitorEnterCount() const {
     return monitors_.size();
   }
 
-  uint32_t GetMonitorEnterDexPc(size_t i) {
+  uint32_t GetMonitorEnterDexPc(size_t i) const {
     return monitors_[i];
   }
 
@@ -375,11 +376,7 @@
     reg_to_lock_depths_.erase(reg);
   }
 
-  RegisterLine(size_t num_regs, MethodVerifier* verifier)
-      : num_regs_(num_regs), this_initialized_(false) {
-    memset(&line_, 0, num_regs_ * sizeof(uint16_t));
-    SetResultTypeToUnknown(verifier);
-  }
+  RegisterLine(size_t num_regs, MethodVerifier* verifier);
 
   // Storage for the result register's type, valid after an invocation.
   uint16_t result_[2];
@@ -388,17 +385,18 @@
   const uint32_t num_regs_;
 
   // A stack of monitor enter locations.
-  std::vector<uint32_t, TrackingAllocator<uint32_t, kAllocatorTagVerifier>> monitors_;
+  ScopedArenaVector<uint32_t> monitors_;
+
   // A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
   // stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
   // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5.
-  AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier> reg_to_lock_depths_;
+  RegToLockDepthsMap reg_to_lock_depths_;
 
   // Whether "this" initialization (a constructor supercall) has happened.
   bool this_initialized_;
 
   // An array of RegType Ids associated with each dex register.
-  uint16_t line_[0];
+  uint16_t line_[1];
 
   DISALLOW_COPY_AND_ASSIGN(RegisterLine);
 };
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index c984b17..b76555b 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -150,10 +150,15 @@
   // Do we have a managed handler? If so, run it first.
   SpecialSignalHandlerFn managed = user_sigactions[sig].GetSpecialHandler();
   if (managed != nullptr) {
+    sigset_t mask, old_mask;
+    sigfillset(&mask);
+    sigprocmask(SIG_BLOCK, &mask, &old_mask);
     // Call the handler. If it succeeds, we're done.
     if (managed(sig, info, context)) {
+      sigprocmask(SIG_SETMASK, &old_mask, nullptr);
       return;
     }
+    sigprocmask(SIG_SETMASK, &old_mask, nullptr);
   }
 
   const struct sigaction& action = user_sigactions[sig].GetAction();
@@ -166,7 +171,10 @@
     }
   } else {
     if (action.sa_sigaction != nullptr) {
+      sigset_t old_mask;
+      sigprocmask(SIG_BLOCK, &action.sa_mask, &old_mask);
       action.sa_sigaction(sig, info, context);
+      sigprocmask(SIG_SETMASK, &old_mask, nullptr);
     } else {
       signal(sig, SIG_DFL);
       raise(sig);
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 285df18..2dbd7e8 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -19,15 +19,17 @@
 
 namespace art {
 
-#define CHECK_REGS_CONTAIN_REFS(dex_pc, abort_if_not_found, ...) do { \
-  int t[] = {__VA_ARGS__}; \
-  int t_size = sizeof(t) / sizeof(*t); \
-  uintptr_t native_quick_pc = m->ToNativeQuickPc(dex_pc, \
-                                                 /* is_catch_handler */ false, \
-                                                 abort_if_not_found); \
-  if (native_quick_pc != UINTPTR_MAX) { \
-    CheckReferences(t, t_size, m->NativeQuickPcOffset(native_quick_pc)); \
-  } \
+#define CHECK_REGS_CONTAIN_REFS(dex_pc, abort_if_not_found, ...) do {                 \
+  int t[] = {__VA_ARGS__};                                                            \
+  int t_size = sizeof(t) / sizeof(*t);                                                \
+  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();       \
+  uintptr_t native_quick_pc = method_header->ToNativeQuickPc(GetMethod(),             \
+                                                 dex_pc,                              \
+                                                 /* is_catch_handler */ false,        \
+                                                 abort_if_not_found);                 \
+  if (native_quick_pc != UINTPTR_MAX) {                                               \
+    CheckReferences(t, t_size, method_header->NativeQuickPcOffset(native_quick_pc));  \
+  }                                                                                   \
 } while (false);
 
 struct ReferenceMap2Visitor : public CheckReferenceMapVisitor {
@@ -47,9 +49,11 @@
     if (m_name.compare("f") == 0) {
       CHECK_REGS_CONTAIN_REFS(0x03U, true, 8);  // v8: this
       CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1);  // v8: this, v1: x
-      CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
+      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
+      }
       CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      if (!m->IsOptimized(sizeof(void*))) {
+      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       }
       CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
@@ -64,8 +68,9 @@
       CHECK_REGS_CONTAIN_REFS(0x13U, false, 3);  // v3: y
       // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      if (!m->IsOptimized(sizeof(void*))) {
+      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
+        CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);
         // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
         CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);
         // v5 is removed from the root set because there is a "merge" operation.
@@ -74,7 +79,7 @@
       }
       CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
 
-      if (!m->IsOptimized(sizeof(void*))) {
+      if (!GetCurrentOatQuickMethodHeader()->IsOptimized()) {
         CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       }
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index c93db50..5b22e88 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -129,13 +129,36 @@
         System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
     }
 
-    if (unsafe.compareAndSwapObject(t, objectOffset, null, new Object())) {
+    // We do not use `null` as argument to sun.misc.Unsafe.compareAndSwapObject
+    // in those tests, as this value is not affected by heap poisoning
+    // (which uses address negation to poison and unpoison heap object
+    // references).  This way, when heap poisoning is enabled, we can
+    // better exercise its implementation within that method.
+    if (unsafe.compareAndSwapObject(t, objectOffset, new Object(), new Object())) {
         System.out.println("Unexpectedly succeeding compareAndSwapObject...");
     }
-    if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue, null)) {
+    Object objectValue2 = new Object();
+    if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue, objectValue2)) {
         System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
     }
-    if (!unsafe.compareAndSwapObject(t, objectOffset, null, new Object())) {
+    Object objectValue3 = new Object();
+    if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue2, objectValue3)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+    }
+
+    // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
+    // object (`t`) for the `obj` and `newValue` arguments.
+    if (!unsafe.compareAndSwapObject(t, objectOffset, objectValue3, t)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+    }
+    // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
+    // object (`t`) for the `obj`, `expectedValue` and `newValue` arguments.
+    if (!unsafe.compareAndSwapObject(t, objectOffset, t, t)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
+    }
+    // Exercise sun.misc.Unsafe.compareAndSwapObject using the same
+    // object (`t`) for the `obj` and `expectedValue` arguments.
+    if (!unsafe.compareAndSwapObject(t, objectOffset, t, new Object())) {
         System.out.println("Unexpectedly not succeeding compareAndSwapObject...");
     }
   }
diff --git a/test/079-phantom/src/Bitmap.java b/test/079-phantom/src/Bitmap.java
index 85eb3cc..ff43749 100644
--- a/test/079-phantom/src/Bitmap.java
+++ b/test/079-phantom/src/Bitmap.java
@@ -125,7 +125,6 @@
  */
 class BitmapWatcher extends Thread {
     ReferenceQueue<PhantomWrapper> mQueue;
-    volatile boolean mQuit = false;
 
     BitmapWatcher(ReferenceQueue<PhantomWrapper> queue) {
         mQueue = queue;
@@ -133,7 +132,7 @@
     }
 
     public void run() {
-        while (!mQuit) {
+        while (true) {
             try {
                 PhantomWrapper ref = (PhantomWrapper) mQueue.remove();
                 //System.out.println("dequeued ref " + ref.mNativeData +
@@ -142,12 +141,12 @@
                 //ref.clear();
             } catch (InterruptedException ie) {
                 System.out.println("intr");
+                break;
             }
         }
     }
 
     public void shutDown() {
-        mQuit = true;
         interrupt();
     }
 }
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index 2f6d496..7c47e99 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -91,6 +91,7 @@
                      * is an error we can't recover from.
                      */
                     meth.invoke(dexFile, name, this);
+                    System.out.println("Unreachable");
                 } finally {
                     if (dexFile != null) {
                         /* close the DexFile to make CloseGuard happy */
diff --git a/test/088-monitor-verification/smali/NullLocks.smali b/test/088-monitor-verification/smali/NullLocks.smali
new file mode 100644
index 0000000..8262f19
--- /dev/null
+++ b/test/088-monitor-verification/smali/NullLocks.smali
@@ -0,0 +1,28 @@
+.class public LNullLocks;
+
+.super Ljava/lang/Object;
+
+.method public static run(Z)V
+   .registers 3
+
+   invoke-static {}, LMain;->assertIsManaged()V
+
+   if-eqz v2, :Lfalse
+
+   const v0, 0           # Null.
+   monitor-enter v0
+   const v1, 0           # Another null. This should be detected as an alias, such that the exit
+                         # will not fail verification.
+   monitor-exit v1
+
+   monitor-enter v0
+   monitor-exit v1
+
+   monitor-enter v1
+   monitor-exit v0
+
+:Lfalse
+
+   return-void
+
+.end method
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 2188055..212c894 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -27,6 +27,13 @@
      */
     public static void main(String[] args) {
         System.loadLibrary(args[0]);
+        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
+            // Some tests ensure that the verifier was able to guarantee balanced locking by
+            // asserting that the test function is running as compiled code. But skip this now,
+            // as this seems to be a non-compiled code test configuration.
+            disableStackFrameAsserts();
+        }
+
         Main m = new Main();
 
         m.recursiveSync(0);
@@ -49,7 +56,7 @@
         Object obj1 = new Object();
         Object obj2 = new Object();
 
-        m.twoPath(obj1, obj2, 0);
+        TwoPath.twoPath(obj1, obj2, 0);
         System.out.println("twoPath ok");
 
         m.triplet(obj1, obj2, 0);
@@ -62,6 +69,7 @@
      * Recursive synchronized method.
      */
     synchronized void recursiveSync(int iter) {
+        assertIsManaged();
         if (iter < 40) {
             recursiveSync(iter+1);
         } else {
@@ -73,6 +81,7 @@
      * Tests simple nesting, with and without a throw.
      */
     void nestedMayThrow(boolean doThrow) {
+        assertIsManaged();
         synchronized (this) {
             synchronized (Main.class) {
                 synchronized (new Object()) {
@@ -90,6 +99,7 @@
      * Exercises bug 3215458.
      */
     void constantLock() {
+        assertIsManaged();
         Class thing = Thread.class;
         synchronized (Thread.class) {}
     }
@@ -98,6 +108,7 @@
      * Confirms that we can have 32 nested monitors on one method.
      */
     void notExcessiveNesting() {
+        assertIsManaged();
         synchronized (this) {   // 1
         synchronized (this) {   // 2
         synchronized (this) {   // 3
@@ -138,6 +149,7 @@
      * method.
      */
     void notNested() {
+        assertIsManaged();
         synchronized (this) {}  // 1
         synchronized (this) {}  // 2
         synchronized (this) {}  // 3
@@ -178,25 +190,6 @@
     private void doNothing(Object obj) {}
 
     /**
-     * Conditionally uses one of the synchronized objects.
-     */
-    public void twoPath(Object obj1, Object obj2, int x) {
-        Object localObj;
-
-        synchronized (obj1) {
-            synchronized(obj2) {
-                if (x == 0) {
-                    localObj = obj2;
-                } else {
-                    localObj = obj1;
-                }
-            }
-        }
-
-        doNothing(localObj);
-    }
-
-    /**
      * Lock the monitor two or three times, and make use of the locked or
      * unlocked object.
      */
@@ -220,19 +213,16 @@
 
     // Smali testing code.
     private static void runSmaliTests() {
-        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
-            // Skip test, this seems to be a non-compiled code test configuration.
-            return;
-        }
-
         runTest("OK", new Object[] { new Object(), new Object() }, null);
         runTest("TooDeep", new Object[] { new Object() }, null);
         runTest("NotStructuredOverUnlock", new Object[] { new Object() },
                 IllegalMonitorStateException.class);
-        runTest("NotStructuredUnderUnlock", new Object[] { new Object() }, null);
-                // TODO: new IllegalMonitorStateException());
+        runTest("NotStructuredUnderUnlock", new Object[] { new Object() },
+                IllegalMonitorStateException.class);
         runTest("UnbalancedJoin", new Object[] { new Object(), new Object() }, null);
         runTest("UnbalancedStraight", new Object[] { new Object(), new Object() }, null);
+        runTest("NullLocks", new Object[] { false }, null);
+        runTest("NullLocks", new Object[] { true }, NullPointerException.class);
     }
 
     private static void runTest(String className, Object[] parameters, Class<?> excType) {
@@ -282,4 +272,5 @@
     public static native boolean hasOatFile();
     public static native boolean runtimeIsSoftFail();
     public static native boolean isInterpreted();
+    public static native void disableStackFrameAsserts();
 }
diff --git a/test/088-monitor-verification/src/TwoPath.java b/test/088-monitor-verification/src/TwoPath.java
new file mode 100644
index 0000000..bdc15ad
--- /dev/null
+++ b/test/088-monitor-verification/src/TwoPath.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+/*
+ * Test case for conditionally using one of two synchronized objects.
+ *
+ * This code cannot be verified at the moment, as the join point merges a register with two
+ * different lock options. Do not put it into Main to avoid the whole class being run in the
+ * interpreter.
+ */
+public class TwoPath {
+
+    /**
+     * Conditionally uses one of the synchronized objects.
+     */
+    public static void twoPath(Object obj1, Object obj2, int x) {
+        Main.assertIsManaged();
+
+        Object localObj;
+
+        synchronized (obj1) {
+            synchronized(obj2) {
+                if (x == 0) {
+                    localObj = obj2;
+                } else {
+                    localObj = obj1;
+                }
+            }
+        }
+
+        doNothing(localObj);
+    }
+
+    private static void doNothing(Object o) {
+    }
+}
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 948273a..e9946c8 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -390,6 +390,20 @@
 #endif
 #endif
 
+static bool cannot_be_blocked(int signum) {
+  // These two sigs cannot be blocked anywhere.
+  if ((signum == SIGKILL) || (signum == SIGSTOP)) {
+      return true;
+  }
+
+  // The invalid rt_sig cannot be blocked.
+  if (((signum >= 32) && (signum < SIGRTMIN)) || (signum > SIGRTMAX)) {
+      return true;
+  }
+
+  return false;
+}
+
 // A dummy special handler, continueing after the faulting location. This code comes from
 // 004-SignalTest.
 static bool nb_signalhandler(int sig, siginfo_t* info ATTRIBUTE_UNUSED, void* context) {
@@ -413,6 +427,23 @@
     UNUSED(context);
 #endif
   }
+
+  // Before invoking this handler, all other unclaimed signals must be blocked.
+  // We're trying to check the signal mask to verify its status here.
+  sigset_t tmpset;
+  sigemptyset(&tmpset);
+  sigprocmask(SIG_SETMASK, nullptr, &tmpset);
+  int other_claimed = (sig == SIGSEGV) ? SIGILL : SIGSEGV;
+  for (int signum = 0; signum < NSIG; ++signum) {
+    if (cannot_be_blocked(signum)) {
+        continue;
+    } else if ((sigismember(&tmpset, signum)) && (signum == other_claimed)) {
+      printf("ERROR: The claimed signal %d is blocked\n", signum);
+    } else if ((!sigismember(&tmpset, signum)) && (signum != other_claimed)) {
+      printf("ERROR: The unclaimed signal %d is not blocked\n", signum);
+    }
+  }
+
   // We handled this...
   return true;
 }
diff --git a/test/131-structural-change/expected.txt b/test/131-structural-change/expected.txt
index cc7713d..1d19278 100644
--- a/test/131-structural-change/expected.txt
+++ b/test/131-structural-change/expected.txt
@@ -1,2 +1,3 @@
+JNI_OnLoad called
 Should really reach here.
 Done.
diff --git a/test/131-structural-change/src/Main.java b/test/131-structural-change/src/Main.java
index 6cbbd12..c748899 100644
--- a/test/131-structural-change/src/Main.java
+++ b/test/131-structural-change/src/Main.java
@@ -35,7 +35,7 @@
             e.printStackTrace(System.out);
         }
 
-        boolean haveOatFile = hasOat();
+        boolean haveOatFile = hasOatFile();
         boolean gotError = false;
         try {
             Class<?> bClass = getClass().getClassLoader().loadClass("B");
@@ -45,10 +45,10 @@
             e.printStackTrace(System.out);
         }
         if (haveOatFile ^ gotError) {
-            System.out.println("Did not get expected error.");
+            System.out.println("Did not get expected error. " + haveOatFile + " " + gotError);
         }
         System.out.println("Done.");
     }
 
-    private native static boolean hasOat();
+    private native static boolean hasOatFile();
 }
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 53d7abe..11de660 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -21,3 +21,4 @@
 JNI_OnLoad called
 class null false test
 JNI_OnUnload called
+Number of loaded unload-ex maps 0
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 3cc43ac..0640b36 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
 import java.lang.ref.WeakReference;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
@@ -43,11 +46,28 @@
             testStackTrace(constructor);
             // Stress test to make sure we dont leak memory.
             stressTest(constructor);
+            // Test that the oat files are unloaded.
+            testOatFilesUnloaded(getPid());
         } catch (Exception e) {
             System.out.println(e);
         }
     }
 
+    private static void testOatFilesUnloaded(int pid) throws Exception {
+        BufferedReader reader = new BufferedReader(new FileReader ("/proc/" + pid + "/maps"));
+        String line;
+        int count = 0;
+        Runtime.getRuntime().gc();
+        System.runFinalization();
+        while ((line = reader.readLine()) != null) {
+            if (line.contains("@141-class-unload-ex.jar")) {
+                System.out.println(line);
+                ++count;
+            }
+        }
+        System.out.println("Number of loaded unload-ex maps " + count);
+    }
+
     private static void stressTest(Constructor constructor) throws Exception {
         for (int i = 0; i <= 100; ++i) {
             setUpUnloadLoader(constructor, false);
@@ -163,4 +183,8 @@
         loadLibrary.invoke(intHolder, nativeLibraryName);
         return new WeakReference(loader);
     }
+
+    private static int getPid() throws Exception {
+      return Integer.parseInt(new File("/proc/self").getCanonicalFile().getName());
+    }
 }
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 22829cd..ffeae7d 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -624,12 +624,13 @@
       constantIndexing2(new int[3]);
     } catch (ArrayIndexOutOfBoundsException e) {
       assertIsManaged();  // This is to ensure that single-frame deoptimization works.
-                                // Will need to be updated if constantIndexing2 is inlined.
+                          // Will need to be updated if constantIndexing2 is inlined.
       try {
         // This will cause AIOOBE.
         constantIndexingForward6(new int[3]);
       } catch (ArrayIndexOutOfBoundsException e2) {
-        assertIsManaged();
+        // Having deopted, we expect to be running interpreted at this point.
+        // Does not apply to debuggable, however, since we do not inline.
         return 99;
       }
     }
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 134abd1..f1885de 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -537,6 +537,17 @@
     return ((SubclassA)a).toString();
   }
 
+
+  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) reference_type_propagation (after)
+  /// CHECK:      ParameterValue klass:Main can_be_null:false exact:false
+  /// CHECK:      ParameterValue klass:Super can_be_null:true exact:false
+  /// CHECK:      ParameterValue
+  /// CHECK:      ParameterValue klass:SubclassA can_be_null:true exact:false
+  /// CHECK:      ParameterValue klass:Final can_be_null:true exact:true
+  /// CHECK-NOT:  ParameterValue
+  private void argumentCheck(Super s, double d, SubclassA a, Final f) {
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 9facfdb..30f9954 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -17,6 +17,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "jni.h"
+#include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
 #include "thread.h"
@@ -45,10 +46,14 @@
       CHECK_EQ(value, 42u);
 
       bool success = GetVReg(m, 1, kIntVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        CHECK(!success);
+      }
 
       success = GetVReg(m, 2, kIntVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        CHECK(!success);
+      }
 
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
@@ -78,10 +83,14 @@
       CHECK_EQ(value, 42u);
 
       bool success = GetVRegPair(m, 2, kLongLoVReg, kLongHiVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        CHECK(!success);
+      }
 
       success = GetVRegPair(m, 4, kLongLoVReg, kLongHiVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        CHECK(!success);
+      }
 
       uint32_t value32 = 0;
       CHECK(GetVReg(m, 6, kReferenceVReg, &value32));
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index c21168b..64b2336 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -17,6 +17,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "jni.h"
+#include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
 #include "thread.h"
@@ -63,7 +64,9 @@
       CHECK_EQ(value, 1u);
 
       bool success = GetVReg(m, 2, kIntVReg, &value);
-      if (m->IsOptimized(sizeof(void*))) CHECK(!success);
+      if (!IsCurrentFrameInInterpreter() && GetCurrentOatQuickMethodHeader()->IsOptimized()) {
+        CHECK(!success);
+      }
 
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(value, 1u);
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 7e9a583..375a3fc 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -17,6 +17,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "jni.h"
+#include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
 #include "thread.h"
@@ -43,7 +44,7 @@
       found_method_ = true;
       uint32_t value = 0;
       if (GetCurrentQuickFrame() != nullptr &&
-          m->IsOptimized(sizeof(void*)) &&
+          GetCurrentOatQuickMethodHeader()->IsOptimized() &&
           !Runtime::Current()->IsDebuggable()) {
         CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false);
       } else {
diff --git a/test/527-checker-array-access-split/expected.txt b/test/527-checker-array-access-split/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/527-checker-array-access-split/expected.txt
diff --git a/test/527-checker-array-access-split/info.txt b/test/527-checker-array-access-split/info.txt
new file mode 100644
index 0000000..9206804
--- /dev/null
+++ b/test/527-checker-array-access-split/info.txt
@@ -0,0 +1 @@
+Test arm64-specific array access optimization.
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
new file mode 100644
index 0000000..ead9446
--- /dev/null
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test that HArrayGet with a constant index is not split.
+   */
+
+  /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  public static int constantIndexGet(int array[]) {
+    return array[1];
+  }
+
+  /**
+   * Test that HArraySet with a constant index is not split.
+   */
+
+  /// CHECK-START-ARM64: void Main.constantIndexSet(int[]) instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+  /// CHECK-START-ARM64: void Main.constantIndexSet(int[]) instruction_simplifier_arm64 (after)
+  /// CHECK:             <<Const2:i\d+>>        IntConstant 2
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+
+  public static void constantIndexSet(int array[]) {
+    array[1] = 2;
+  }
+
+  /**
+   * Test basic splitting of HArrayGet.
+   */
+
+  /// CHECK-START-ARM64: int Main.get(int[], int) instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArrayGet [<<Array>>,<<Index>>]
+
+  /// CHECK-START-ARM64: int Main.get(int[], int) instruction_simplifier_arm64 (after)
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArrayGet [<<Address>>,<<Index>>]
+
+  public static int get(int array[], int index) {
+    return array[index];
+  }
+
+  /**
+   * Test basic splitting of HArraySet.
+   */
+
+  /// CHECK-START-ARM64: void Main.set(int[], int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Arg>>]
+
+  /// CHECK-START-ARM64: void Main.set(int[], int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                                    ParameterValue
+  /// CHECK:                                    ParameterValue
+  /// CHECK:             <<Arg:i\d+>>           ParameterValue
+  /// CHECK:             <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address>>,<<Index>>,<<Arg>>]
+
+  public static void set(int array[], int index, int value) {
+    array[index] = value;
+  }
+
+  /**
+   * Check that the intermediate address can be shared after GVN.
+   */
+
+  /// CHECK-START-ARM64: void Main.getSet(int[], int) instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.getSet(int[], int) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+  public static void getSet(int array[], int index) {
+    array[index] = array[index] + 1;
+  }
+
+  /**
+   * Check that the intermediate address computation is not reordered or merged
+   * across IRs that can trigger GC.
+   */
+
+  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant
+  /// CHECK:             <<Array:l\d+>>         NullCheck
+  /// CHECK:             <<Index:i\d+>>         BoundsCheck
+  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    NewArray
+  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  public static int[] accrossGC(int array[], int index) {
+    int tmp = array[index] + 1;
+    int[] new_array = new int[1];
+    array[index] = tmp;
+    return new_array;
+  }
+
+  /**
+   * Test that the intermediate address is shared between array accesses after
+   * the bounds check have been removed by BCE.
+   */
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  // By the time we reach the architecture-specific instruction simplifier, BCE
+  // has removed the bounds checks in the loop.
+
+  // Note that we do not care that the `DataOffset` is `12`. But if we do not
+  // specify it and any other `IntConstant` appears before that instruction,
+  // checker will match the previous `IntConstant`, and we will thus fail the
+  // check.
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK:             <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK-NOT:                                Arm64IntermediateAddress
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+  public static int canMergeAfterBCE1() {
+    int[] array = {0, 1, 2, 3};
+    for (int i = 0; i < array.length; i++) {
+      array[i] = array[i] + 1;
+    }
+    return array[array.length - 1];
+  }
+
+  /**
+   * This test case is similar to `canMergeAfterBCE1`, but with different
+   * indexes for the accesses.
+   */
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() instruction_simplifier_arm64 (before)
+  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Array>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+
+  // Note that we do not care that the `DataOffset` is `12`. But if we do not
+  // specify it and any other `IntConstant` appears before that instruction,
+  // checker will match the previous `IntConstant`, and we will thus fail the
+  // check.
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address1:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:         <<Address2:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:             <<Address3:l\d+>>      Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
+  /// CHECK:             <<Array:l\d+>>         NewArray
+  /// CHECK:             <<Index:i\d+>>         Phi
+  /// CHECK:                                    If
+  //  -------------- Loop
+  /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
+  /// CHECK-DAG:         <<Address:l\d+>>       Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
+  /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
+  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+
+  // There should be only one intermediate address computation in the loop.
+
+  /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+  /// CHECK:                                    Arm64IntermediateAddress
+  /// CHECK-NOT:                                Arm64IntermediateAddress
+
+  public static int canMergeAfterBCE2() {
+    int[] array = {0, 1, 2, 3};
+    for (int i = 0; i < array.length - 1; i++) {
+      array[i + 1] = array[i] + array[i + 1];
+    }
+    return array[array.length - 1];
+  }
+
+
+  public static void main(String[] args) {
+    int[] array = {123, 456, 789};
+
+    assertIntEquals(456, constantIndexGet(array));
+
+    constantIndexSet(array);
+    assertIntEquals(2, array[1]);
+
+    assertIntEquals(789, get(array, 2));
+
+    set(array, 1, 456);
+    assertIntEquals(456, array[1]);
+
+    getSet(array, 0);
+    assertIntEquals(124, array[0]);
+
+    accrossGC(array, 0);
+    assertIntEquals(125, array[0]);
+
+    assertIntEquals(4, canMergeAfterBCE1());
+    assertIntEquals(6, canMergeAfterBCE2());
+  }
+}
diff --git a/test/530-checker-lse/expected.txt b/test/530-checker-lse/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-lse/expected.txt
diff --git a/test/530-checker-lse/info.txt b/test/530-checker-lse/info.txt
new file mode 100644
index 0000000..5b45e20
--- /dev/null
+++ b/test/530-checker-lse/info.txt
@@ -0,0 +1 @@
+Checker test for testing load-store elimination.
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
new file mode 100644
index 0000000..c766aaa
--- /dev/null
+++ b/test/530-checker-lse/src/Main.java
@@ -0,0 +1,512 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Circle {
+  Circle(double radius) {
+    this.radius = radius;
+  }
+  public double getArea() {
+    return radius * radius * Math.PI;
+  }
+  private double radius;
+};
+
+class TestClass {
+  TestClass() {
+  }
+  TestClass(int i, int j) {
+    this.i = i;
+    this.j = j;
+  }
+  int i;
+  int j;
+  volatile int k;
+  TestClass next;
+  static int si;
+};
+
+class SubTestClass extends TestClass {
+  int k;
+};
+
+class TestClass2 {
+  int i;
+  int j;
+};
+
+public class Main {
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+
+  static double calcCircleArea(double radius) {
+    return new Circle(radius).getArea();
+  }
+
+  /// CHECK-START: int Main.test1(TestClass, TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test1(TestClass, TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Different fields shouldn't alias.
+  static int test1(TestClass obj1, TestClass obj2) {
+    obj1.i = 1;
+    obj2.j = 2;
+    return obj1.i + obj2.j;
+  }
+
+  /// CHECK-START: int Main.test2(TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test2(TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Redundant store of the same value.
+  static int test2(TestClass obj) {
+    obj.j = 1;
+    obj.j = 1;
+    return obj.j;
+  }
+
+  /// CHECK-START: int Main.test3(TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test3(TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // A new allocation shouldn't alias with pre-existing values.
+  static int test3(TestClass obj) {
+    obj.i = 1;
+    obj.next.j = 2;
+    TestClass obj2 = new TestClass();
+    obj2.i = 3;
+    obj2.j = 4;
+    return obj.i + obj.next.j + obj2.i + obj2.j;
+  }
+
+  /// CHECK-START: int Main.test4(TestClass, boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.test4(TestClass, boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldSet
+
+  // Set and merge the same value in two branches.
+  static int test4(TestClass obj, boolean b) {
+    if (b) {
+      obj.i = 1;
+    } else {
+      obj.i = 1;
+    }
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.test5(TestClass, boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.test5(TestClass, boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldSet
+
+  // Set and merge different values in two branches.
+  static int test5(TestClass obj, boolean b) {
+    if (b) {
+      obj.i = 1;
+    } else {
+      obj.i = 2;
+    }
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.test6(TestClass, TestClass, boolean) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test6(TestClass, TestClass, boolean) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Setting the same value doesn't clear the value for aliased locations.
+  static int test6(TestClass obj1, TestClass obj2, boolean b) {
+    obj1.i = 1;
+    obj1.j = 2;
+    if (b) {
+      obj2.j = 2;
+    }
+    return obj1.j + obj2.j;
+  }
+
+  /// CHECK-START: int Main.test7(TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test7(TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  // Invocation should kill values in non-singleton heap locations.
+  static int test7(TestClass obj) {
+    obj.i = 1;
+    System.out.print("");
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.test8() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InvokeVirtual
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test8() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InvokeVirtual
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Invocation should not kill values in singleton heap locations.
+  static int test8() {
+    TestClass obj = new TestClass();
+    obj.i = 1;
+    System.out.print("");
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.test9(TestClass) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test9(TestClass) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  // Invocation should kill values in non-singleton heap locations.
+  static int test9(TestClass obj) {
+    TestClass obj2 = new TestClass();
+    obj2.i = 1;
+    obj.next = obj2;
+    System.out.print("");
+    return obj2.i;
+  }
+
+  /// CHECK-START: int Main.test10(TestClass) load_store_elimination (before)
+  /// CHECK: StaticFieldGet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: StaticFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test10(TestClass) load_store_elimination (after)
+  /// CHECK: StaticFieldGet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: StaticFieldSet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Static fields shouldn't alias with instance fields.
+  static int test10(TestClass obj) {
+    TestClass.si += obj.i;
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.test11(TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test11(TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Loop without heap writes.
+  // obj.i is actually hoisted to the loop pre-header by licm already.
+  static int test11(TestClass obj) {
+    obj.i = 1;
+    int sum = 0;
+    for (int i = 0; i < 10; i++) {
+      sum += obj.i;
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.test12(TestClass, TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.test12(TestClass, TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  // Loop with heap writes.
+  static int test12(TestClass obj1, TestClass obj2) {
+    obj1.i = 1;
+    int sum = 0;
+    for (int i = 0; i < 10; i++) {
+      sum += obj1.i;
+      obj2.i = sum;
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.test13(TestClass, TestClass2) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test13(TestClass, TestClass2) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Different classes shouldn't alias.
+  static int test13(TestClass obj1, TestClass2 obj2) {
+    obj1.i = 1;
+    obj2.i = 2;
+    return obj1.i + obj2.i;
+  }
+
+  /// CHECK-START: int Main.test14(TestClass, SubTestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test14(TestClass, SubTestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  // Subclass may alias with super class.
+  static int test14(TestClass obj1, SubTestClass obj2) {
+    obj1.i = 1;
+    obj2.i = 2;
+    return obj1.i;
+  }
+
+  /// CHECK-START: int Main.test15() load_store_elimination (before)
+  /// CHECK: StaticFieldSet
+  /// CHECK: StaticFieldSet
+  /// CHECK: StaticFieldGet
+
+  /// CHECK-START: int Main.test15() load_store_elimination (after)
+  /// CHECK: <<Const2:i\d+>> IntConstant 2
+  /// CHECK: StaticFieldSet
+  /// CHECK: StaticFieldSet
+  /// CHECK-NOT: StaticFieldGet
+  /// CHECK: Return [<<Const2>>]
+
+  // Static field access from subclass's name.
+  static int test15() {
+    TestClass.si = 1;
+    SubTestClass.si = 2;
+    return TestClass.si;
+  }
+
+  /// CHECK-START: int Main.test16() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test16() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: StaticFieldSet
+  /// CHECK-NOT: StaticFieldGet
+
+  // Test inlined constructor.
+  static int test16() {
+    TestClass obj = new TestClass(1, 2);
+    return obj.i + obj.j;
+  }
+
+  /// CHECK-START: int Main.test17() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test17() load_store_elimination (after)
+  /// CHECK: <<Const0:i\d+>> IntConstant 0
+  /// CHECK: NewInstance
+  /// CHECK-NOT: StaticFieldSet
+  /// CHECK-NOT: StaticFieldGet
+  /// CHECK: Return [<<Const0>>]
+
+  // Test getting default value.
+  static int test17() {
+    TestClass obj = new TestClass();
+    obj.j = 1;
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.test18(TestClass) load_store_elimination (before)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test18(TestClass) load_store_elimination (after)
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+
+  // Volatile field load/store shouldn't be eliminated.
+  static int test18(TestClass obj) {
+    obj.k = 1;
+    return obj.k;
+  }
+
+  /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (before)
+  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+
+  /// CHECK-START: float Main.test19(float[], float[]) load_store_elimination (after)
+  /// CHECK: <<IntTypeValue:i\d+>> ArrayGet
+  /// CHECK: ArraySet
+  /// CHECK: <<FloatTypeValue:f\d+>> ArrayGet
+
+  // I/F, J/D aliasing should keep the load/store.
+  static float test19(float[] fa1, float[] fa2) {
+    fa1[0] = fa2[0];
+    return fa1[0];
+  }
+
+  /// CHECK-START: TestClass Main.test20() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: TestClass Main.test20() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+
+  // Storing default heap value is redundant if the heap location has the
+  // default heap value.
+  static TestClass test20() {
+    TestClass obj = new TestClass();
+    obj.i = 0;
+    return obj;
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertFloatEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertDoubleEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcCircleArea(Math.PI));
+    assertIntEquals(test1(new TestClass(), new TestClass()), 3);
+    assertIntEquals(test2(new TestClass()), 1);
+    TestClass obj1 = new TestClass();
+    TestClass obj2 = new TestClass();
+    obj1.next = obj2;
+    assertIntEquals(test3(obj1), 10);
+    assertIntEquals(test4(new TestClass(), true), 1);
+    assertIntEquals(test4(new TestClass(), false), 1);
+    assertIntEquals(test5(new TestClass(), true), 1);
+    assertIntEquals(test5(new TestClass(), false), 2);
+    assertIntEquals(test6(new TestClass(), new TestClass(), true), 4);
+    assertIntEquals(test6(new TestClass(), new TestClass(), false), 2);
+    assertIntEquals(test7(new TestClass()), 1);
+    assertIntEquals(test8(), 1);
+    obj1 = new TestClass();
+    obj2 = new TestClass();
+    obj1.next = obj2;
+    assertIntEquals(test9(new TestClass()), 1);
+    assertIntEquals(test10(new TestClass(3, 4)), 3);
+    assertIntEquals(TestClass.si, 3);
+    assertIntEquals(test11(new TestClass()), 10);
+    assertIntEquals(test12(new TestClass(), new TestClass()), 10);
+    assertIntEquals(test13(new TestClass(), new TestClass2()), 3);
+    SubTestClass obj3 = new SubTestClass();
+    assertIntEquals(test14(obj3, obj3), 2);
+    assertIntEquals(test15(), 2);
+    assertIntEquals(test16(), 3);
+    assertIntEquals(test17(), 0);
+    assertIntEquals(test18(new TestClass()), 1);
+    float[] fa1 = { 0.8f };
+    float[] fa2 = { 1.8f };
+    assertFloatEquals(test19(fa1, fa2), 1.8f);
+    assertFloatEquals(test20().i, 0);
+  }
+}
diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java
index 7d8fff4..2c701bb 100644
--- a/test/532-checker-nonnull-arrayset/src/Main.java
+++ b/test/532-checker-nonnull-arrayset/src/Main.java
@@ -29,10 +29,10 @@
   /// CHECK-NOT:      test
   /// CHECK:          ReturnVoid
   public static void test() {
-    Object[] array = new Object[1];
+    Object[] array = new Object[2];
     Object nonNull = array[0];
     nonNull.getClass(); // Ensure nonNull has an implicit null check.
-    array[0] = nonNull;
+    array[1] = nonNull;
   }
 
   public static void main(String[] args) {}
diff --git a/test/536-checker-intrinsic-optimization/expected.txt b/test/536-checker-intrinsic-optimization/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/536-checker-intrinsic-optimization/expected.txt
diff --git a/test/536-checker-intrinsic-optimization/info.txt b/test/536-checker-intrinsic-optimization/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/536-checker-intrinsic-optimization/info.txt
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
new file mode 100644
index 0000000..1b784ae
--- /dev/null
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static void main(String[] args) {
+    stringEqualsSame();
+    stringArgumentNotNull("Foo");
+  }
+
+  /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
+  /// CHECK:      InvokeStaticOrDirect
+
+  /// CHECK-START: boolean Main.stringEqualsSame() register (before)
+  /// CHECK:      <<Const1:i\d+>> IntConstant 1
+  /// CHECK:      Return [<<Const1>>]
+
+  /// CHECK-START: boolean Main.stringEqualsSame() register (before)
+  /// CHECK-NOT:  InvokeStaticOrDirect
+  public static boolean stringEqualsSame() {
+    return $inline$callStringEquals("obj", "obj");
+  }
+
+  /// CHECK-START: boolean Main.stringEqualsNull() register (after)
+  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      Return [<<Invoke>>]
+  public static boolean stringEqualsNull() {
+    String o = (String)myObject;
+    return $inline$callStringEquals(o, o);
+  }
+
+  public static boolean $inline$callStringEquals(String a, String b) {
+    return a.equals(b);
+  }
+
+  /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK-NOT:      test
+  public static boolean stringArgumentNotNull(Object obj) {
+    obj.getClass();
+    return "foo".equals(obj);
+  }
+
+  // Test is very brittle as it depends on the order we emit instructions.
+  /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      test
+  /// CHECK:      jz/eq
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:  mov
+  /// CHECK:      cmp
+  public static boolean stringArgumentIsString() {
+    return "foo".equals(myString);
+  }
+
+  static String myString;
+  static Object myObject;
+}
diff --git a/test/536-checker-needs-access-check/expected.txt b/test/536-checker-needs-access-check/expected.txt
new file mode 100644
index 0000000..4acae95
--- /dev/null
+++ b/test/536-checker-needs-access-check/expected.txt
@@ -0,0 +1,4 @@
+Got expected error instanceof
+Got expected error instanceof null
+Got expected error checkcast null
+Got expected error instanceof (keep LoadClass with access check)
diff --git a/test/536-checker-needs-access-check/info.txt b/test/536-checker-needs-access-check/info.txt
new file mode 100644
index 0000000..3413cf3
--- /dev/null
+++ b/test/536-checker-needs-access-check/info.txt
@@ -0,0 +1 @@
+Verifies that we don't remove type checks when we need to check for access rights.
diff --git a/test/536-checker-needs-access-check/src/Main.java b/test/536-checker-needs-access-check/src/Main.java
new file mode 100644
index 0000000..7bd49c1
--- /dev/null
+++ b/test/536-checker-needs-access-check/src/Main.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import other.InaccessibleClass;
+import other.InaccessibleClassProxy;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            testInstanceOf();
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error instanceof");
+        }
+
+         try {
+            testInstanceOfNull();
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error instanceof null");
+        }
+
+        try {
+            testCheckCastNull();
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error checkcast null");
+        }
+
+        try {
+            testDontGvnLoadClassWithAccessChecks(new Object());
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error instanceof (keep LoadClass with access check)");
+        }
+    }
+
+    /// CHECK-START: boolean Main.testInstanceOf() register (after)
+    /// CHECK: InstanceOf
+    public static boolean testInstanceOf() {
+        return ic instanceof InaccessibleClass;
+    }
+
+    /// CHECK-START: boolean Main.testInstanceOfNull() register (after)
+    /// CHECK: InstanceOf
+    public static boolean testInstanceOfNull() {
+        return null instanceof InaccessibleClass;
+    }
+
+    // TODO: write a test for for CheckCast with not null constant (after RTP can parse arguments).
+
+    /// CHECK-START: other.InaccessibleClass Main.testCheckCastNull() register (after)
+    /// CHECK: CheckCast
+    public static InaccessibleClass testCheckCastNull() {
+        return (InaccessibleClass) null;
+    }
+
+    /// CHECK-START: boolean Main.testDontGvnLoadClassWithAccessChecks(java.lang.Object) inliner (before)
+    /// CHECK: InvokeStaticOrDirect
+
+    /// CHECK-START: boolean Main.testDontGvnLoadClassWithAccessChecks(java.lang.Object) inliner (after)
+    /// CHECK-NOT: InvokeStaticOrDirect
+
+    /// CHECK-START: boolean Main.testDontGvnLoadClassWithAccessChecks(java.lang.Object) GVN (after)
+    /// CHECK: LoadClass needs_access_check:false
+    /// CHECK: LoadClass needs_access_check:true
+    public static boolean testDontGvnLoadClassWithAccessChecks(Object o) {
+        InaccessibleClassProxy.test(o);
+        return ic instanceof InaccessibleClass;
+    }
+
+    public static InaccessibleClass ic;
+}
diff --git a/test/536-checker-needs-access-check/src/other/InaccessibleClass.java b/test/536-checker-needs-access-check/src/other/InaccessibleClass.java
new file mode 100644
index 0000000..de2e1d7
--- /dev/null
+++ b/test/536-checker-needs-access-check/src/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClass {
+}
diff --git a/test/536-checker-needs-access-check/src/other/InaccessibleClassProxy.java b/test/536-checker-needs-access-check/src/other/InaccessibleClassProxy.java
new file mode 100644
index 0000000..4c005e4
--- /dev/null
+++ b/test/536-checker-needs-access-check/src/other/InaccessibleClassProxy.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClassProxy {
+  public static boolean test(Object o) {
+    return o instanceof InaccessibleClass;
+  }
+}
diff --git a/test/536-checker-needs-access-check/src2/other/InaccessibleClass.java b/test/536-checker-needs-access-check/src2/other/InaccessibleClass.java
new file mode 100644
index 0000000..2732263
--- /dev/null
+++ b/test/536-checker-needs-access-check/src2/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+/*package*/ class InaccessibleClass {
+}
diff --git a/test/536-checker-needs-access-check/src2/other/InaccessibleClassProxy.java b/test/536-checker-needs-access-check/src2/other/InaccessibleClassProxy.java
new file mode 100644
index 0000000..4c005e4
--- /dev/null
+++ b/test/536-checker-needs-access-check/src2/other/InaccessibleClassProxy.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClassProxy {
+  public static boolean test(Object o) {
+    return o instanceof InaccessibleClass;
+  }
+}
diff --git a/test/537-checker-arraycopy/expected.txt b/test/537-checker-arraycopy/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-arraycopy/expected.txt
diff --git a/test/537-checker-arraycopy/info.txt b/test/537-checker-arraycopy/info.txt
new file mode 100644
index 0000000..ea88f89
--- /dev/null
+++ b/test/537-checker-arraycopy/info.txt
@@ -0,0 +1 @@
+Test for edge cases of System.arraycopy.
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
new file mode 100644
index 0000000..30ccc56
--- /dev/null
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static void main(String[] args) {
+    arraycopy();
+    try {
+      arraycopy(new Object());
+      throw new Error("Should not be here");
+    } catch (ArrayStoreException ase) {
+      // Ignore.
+    }
+    try {
+      arraycopy(null);
+      throw new Error("Should not be here");
+    } catch (NullPointerException npe) {
+      // Ignore.
+    }
+
+    try {
+      arraycopy(new Object[1]);
+      throw new Error("Should not be here");
+    } catch (ArrayIndexOutOfBoundsException aiooe) {
+      // Ignore.
+    }
+
+    arraycopy(new Object[2]);
+    arraycopy(new Object[2], 0);
+
+    try {
+      arraycopy(new Object[1], 1);
+      throw new Error("Should not be here");
+    } catch (ArrayIndexOutOfBoundsException aiooe) {
+      // Ignore.
+    }
+  }
+
+  /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK-NOT:      test
+  /// CHECK-NOT:      call
+  /// CHECK:          ReturnVoid
+  // Checks that the call is intrinsified and that there is no test instruction
+  // when we know the source and destination are not null.
+  public static void arraycopy() {
+    Object[] obj = new Object[4];
+    System.arraycopy(obj, 1, obj, 0, 1);
+  }
+
+  public static void arraycopy(Object obj) {
+    System.arraycopy(obj, 1, obj, 0, 1);
+  }
+
+  public static void arraycopy(Object[] obj, int pos) {
+    System.arraycopy(obj, pos, obj, 0, obj.length);
+  }
+}
diff --git a/test/537-checker-debuggable/expected.txt b/test/537-checker-debuggable/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-debuggable/expected.txt
diff --git a/test/537-checker-debuggable/info.txt b/test/537-checker-debuggable/info.txt
new file mode 100644
index 0000000..25597d3
--- /dev/null
+++ b/test/537-checker-debuggable/info.txt
@@ -0,0 +1 @@
+Test that CHECK-START-DEBUGGABLE runs only on --debuggable code.
\ No newline at end of file
diff --git a/test/537-checker-debuggable/smali/TestCase.smali b/test/537-checker-debuggable/smali/TestCase.smali
new file mode 100644
index 0000000..8e6c7ef
--- /dev/null
+++ b/test/537-checker-debuggable/smali/TestCase.smali
@@ -0,0 +1,42 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+# The phi in this method has no actual uses but one environment use. It will
+# be eliminated in normal mode but kept live in debuggable mode. Test that
+# Checker runs the correct test for each compilation mode.
+
+## CHECK-START: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK-NOT:         Phi
+
+## CHECK-START-DEBUGGABLE: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK:             Phi
+
+.method public static deadPhi(III)I
+  .registers 8
+
+  move v0, p1
+  if-eqz p0, :after
+  move v0, p2
+  :after
+  # v0 = Phi [p1, p2] with no uses
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+
+  :return
+  return p2
+.end method
diff --git a/test/537-checker-debuggable/src/Main.java b/test/537-checker-debuggable/src/Main.java
new file mode 100644
index 0000000..a572648
--- /dev/null
+++ b/test/537-checker-debuggable/src/Main.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) { }
+}
diff --git a/test/537-checker-inline-and-unverified/expected.txt b/test/537-checker-inline-and-unverified/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/expected.txt
diff --git a/test/537-checker-inline-and-unverified/info.txt b/test/537-checker-inline-and-unverified/info.txt
new file mode 100644
index 0000000..ec12327
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/info.txt
@@ -0,0 +1 @@
+Checks that unverified methods are not inlined.
diff --git a/test/537-checker-inline-and-unverified/src/Main.java b/test/537-checker-inline-and-unverified/src/Main.java
new file mode 100644
index 0000000..bdc14b0
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/src/Main.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import other.InaccessibleClass;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            testNoInline();
+        } catch (IllegalAccessError e) {
+            // expected
+        }
+        testInline();
+    }
+
+    /// CHECK-START: void Main.testNoInline() inliner (before)
+    /// CHECK: InvokeStaticOrDirect method_name:Main.$opt$noinline$testNoInline
+
+    /// CHECK-START: void Main.testNoInline() inliner (after)
+    /// CHECK: InvokeStaticOrDirect method_name:Main.$opt$noinline$testNoInline
+    public static void testNoInline() {
+        $opt$noinline$testNoInline();
+    }
+
+    /// CHECK-START: void Main.testInline() inliner (before)
+    /// CHECK: InvokeStaticOrDirect method_name:Main.$opt$inline$testInline
+
+    /// CHECK-START: void Main.testInline() inliner (after)
+    /// CHECK-NOT: InvokeStaticOrDirect
+    public static void testInline() {
+        $opt$inline$testInline();
+    }
+
+    public static boolean $opt$noinline$testNoInline() {
+        try {
+            return null instanceof InaccessibleClass;
+        } catch (IllegalAccessError e) {
+            // expected
+        }
+        return false;
+    }
+
+    public static boolean $opt$inline$testInline() {
+        return null instanceof Main;
+    }
+}
diff --git a/test/537-checker-inline-and-unverified/src/other/InaccessibleClass.java b/test/537-checker-inline-and-unverified/src/other/InaccessibleClass.java
new file mode 100644
index 0000000..de2e1d7
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/src/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClass {
+}
diff --git a/test/537-checker-inline-and-unverified/src2/other/InaccessibleClass.java b/test/537-checker-inline-and-unverified/src2/other/InaccessibleClass.java
new file mode 100644
index 0000000..ff11d7a
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/src2/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+/* package */ class InaccessibleClass {
+}
diff --git a/test/538-checker-embed-constants/expected.txt b/test/538-checker-embed-constants/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/538-checker-embed-constants/expected.txt
diff --git a/test/538-checker-embed-constants/info.txt b/test/538-checker-embed-constants/info.txt
new file mode 100644
index 0000000..5a722ec
--- /dev/null
+++ b/test/538-checker-embed-constants/info.txt
@@ -0,0 +1 @@
+Test embedding of constants in assembler instructions.
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
new file mode 100644
index 0000000..979c4c8
--- /dev/null
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-ARM: int Main.and255(int) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK:                and {{r\d+}}, {{r\d+}}, #255
+
+  public static int and255(int arg) {
+    return arg & 255;
+  }
+
+  /// CHECK-START-ARM: int Main.and511(int) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int and511(int arg) {
+    return arg & 511;
+  }
+
+  /// CHECK-START-ARM: int Main.andNot15(int) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #15
+
+  public static int andNot15(int arg) {
+    return arg & ~15;
+  }
+
+  /// CHECK-START-ARM: int Main.or255(int) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #255
+
+  public static int or255(int arg) {
+    return arg | 255;
+  }
+
+  /// CHECK-START-ARM: int Main.or511(int) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK:                orr{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int or511(int arg) {
+    return arg | 511;
+  }
+
+  /// CHECK-START-ARM: int Main.orNot15(int) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK:                orn {{r\d+}}, {{r\d+}}, #15
+
+  public static int orNot15(int arg) {
+    return arg | ~15;
+  }
+
+  /// CHECK-START-ARM: int Main.xor255(int) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #255
+
+  public static int xor255(int arg) {
+    return arg ^ 255;
+  }
+
+  /// CHECK-START-ARM: int Main.xor511(int) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK:                eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int xor511(int arg) {
+    return arg ^ 511;
+  }
+
+  /// CHECK-START-ARM: int Main.xorNot15(int) disassembly (after)
+  /// CHECK:                mvn {{r\d+}}, #15
+  /// CHECK:                eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int xorNot15(int arg) {
+    return arg ^ ~15;
+  }
+
+  /// CHECK-START-ARM: long Main.and255(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #255
+  /// CHECK-DAG:            movs {{r\d+}}, #0
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long and255(long arg) {
+    return arg & 255L;
+  }
+
+  /// CHECK-START-ARM: long Main.and511(long) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK-DAG:            and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            movs {{r\d+}}, #0
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long and511(long arg) {
+    return arg & 511L;
+  }
+
+  /// CHECK-START-ARM: long Main.andNot15(long) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK:                bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long andNot15(long arg) {
+    return arg & ~15L;
+  }
+
+  /// CHECK-START-ARM: long Main.and0xfffffff00000000f(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #15
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+  /// CHECK-DAG:            and {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            bic {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-NOT:            and
+  /// CHECK-NOT:            bic
+
+  public static long and0xfffffff00000000f(long arg) {
+    return arg & 0xfffffff00000000fL;
+  }
+
+  /// CHECK-START-ARM: long Main.or255(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK:                orr {{r\d+}}, {{r\d+}}, #255
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long or255(long arg) {
+    return arg | 255L;
+  }
+
+  /// CHECK-START-ARM: long Main.or511(long) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK:                orr{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long or511(long arg) {
+    return arg | 511L;
+  }
+
+  /// CHECK-START-ARM: long Main.orNot15(long) disassembly (after)
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            mvn {{r\d+}}, #0
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long orNot15(long arg) {
+    return arg | ~15L;
+  }
+
+  /// CHECK-START-ARM: long Main.or0xfffffff00000000f(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #15
+  /// CHECK-NOT:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+  /// CHECK-DAG:            orr {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            orn {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-NOT:            orr
+  /// CHECK-NOT:            orn
+
+  public static long or0xfffffff00000000f(long arg) {
+    return arg | 0xfffffff00000000fL;
+  }
+
+  /// CHECK-START-ARM: long Main.xor255(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #255
+  /// CHECK-NOT:            eor
+  /// CHECK:                eor {{r\d+}}, {{r\d+}}, #255
+  /// CHECK-NOT:            eor
+
+  public static long xor255(long arg) {
+    return arg ^ 255L;
+  }
+
+  /// CHECK-START-ARM: long Main.xor511(long) disassembly (after)
+  /// CHECK:                movw {{r\d+}}, #511
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            eor
+
+  public static long xor511(long arg) {
+    return arg ^ 511L;
+  }
+
+  /// CHECK-START-ARM: long Main.xorNot15(long) disassembly (after)
+  /// CHECK-DAG:            mvn {{r\d+}}, #15
+  /// CHECK-DAG:            mov.w {{r\d+}}, #-1
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            eor
+
+  public static long xorNot15(long arg) {
+    return arg ^ ~15L;
+  }
+
+  // Note: No support for partial long constant embedding.
+  /// CHECK-START-ARM: long Main.xor0xfffffff00000000f(long) disassembly (after)
+  /// CHECK-DAG:            movs {{r\d+}}, #15
+  /// CHECK-DAG:            mvn {{r\d+}}, #15
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            eor{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            eor
+
+  public static long xor0xfffffff00000000f(long arg) {
+    return arg ^ 0xfffffff00000000fL;
+  }
+
+  /// CHECK-START-ARM: long Main.xor0xf00000000000000f(long) disassembly (after)
+  /// CHECK-NOT:            movs {{r\d+}}, #15
+  /// CHECK-NOT:            mov.w {{r\d+}}, #-268435456
+  /// CHECK-NOT:            eor
+  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #15
+  /// CHECK-DAG:            eor {{r\d+}}, {{r\d+}}, #-268435456
+  /// CHECK-NOT:            eor
+
+  public static long xor0xf00000000000000f(long arg) {
+    return arg ^ 0xf00000000000000fL;
+  }
+
+  /**
+   * Test that the `-1` constant is not synthesized in a register and that we
+   * instead simply switch between `add` and `sub` instructions with the
+   * constant embedded.
+   * We need two uses (or more) of the constant because the compiler always
+   * defers to immediate value handling to VIXL when it has only one use.
+   */
+
+  /// CHECK-START-ARM64: long Main.addM1(long) register (after)
+  /// CHECK:     <<Arg:j\d+>>       ParameterValue
+  /// CHECK:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-NOT:                    ParallelMove
+  /// CHECK:                        Add [<<Arg>>,<<ConstM1>>]
+  /// CHECK:                        Sub [<<Arg>>,<<ConstM1>>]
+
+  /// CHECK-START-ARM64: long Main.addM1(long) disassembly (after)
+  /// CHECK:                        sub x{{\d+}}, x{{\d+}}, #0x1
+  /// CHECK:                        add x{{\d+}}, x{{\d+}}, #0x1
+
+  public static long addM1(long arg) {
+    return (arg + (-1)) | (arg - (-1));
+  }
+
+  public static void main(String[] args) {
+    int arg = 0x87654321;
+    assertIntEquals(and255(arg), 0x21);
+    assertIntEquals(and511(arg), 0x121);
+    assertIntEquals(andNot15(arg), 0x87654320);
+    assertIntEquals(or255(arg), 0x876543ff);
+    assertIntEquals(or511(arg), 0x876543ff);
+    assertIntEquals(orNot15(arg), 0xfffffff1);
+    assertIntEquals(xor255(arg), 0x876543de);
+    assertIntEquals(xor511(arg), 0x876542de);
+    assertIntEquals(xorNot15(arg), 0x789abcd1);
+
+    long longArg = 0x1234567887654321L;
+    assertLongEquals(and255(longArg), 0x21L);
+    assertLongEquals(and511(longArg), 0x121L);
+    assertLongEquals(andNot15(longArg), 0x1234567887654320L);
+    assertLongEquals(and0xfffffff00000000f(longArg), 0x1234567000000001L);
+    assertLongEquals(or255(longArg), 0x12345678876543ffL);
+    assertLongEquals(or511(longArg), 0x12345678876543ffL);
+    assertLongEquals(orNot15(longArg), 0xfffffffffffffff1L);
+    assertLongEquals(or0xfffffff00000000f(longArg), 0xfffffff88765432fL);
+    assertLongEquals(xor255(longArg), 0x12345678876543deL);
+    assertLongEquals(xor511(longArg), 0x12345678876542deL);
+    assertLongEquals(xorNot15(longArg), 0xedcba987789abcd1L);
+    assertLongEquals(xor0xfffffff00000000f(longArg), 0xedcba9888765432eL);
+    assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
+
+    assertLongEquals(14, addM1(7));
+  }
+}
diff --git a/test/540-checker-rtp-bug/expected.txt b/test/540-checker-rtp-bug/expected.txt
new file mode 100644
index 0000000..2cf2842
--- /dev/null
+++ b/test/540-checker-rtp-bug/expected.txt
@@ -0,0 +1 @@
+instanceof failed
diff --git a/test/540-checker-rtp-bug/info.txt b/test/540-checker-rtp-bug/info.txt
new file mode 100644
index 0000000..852cd7c
--- /dev/null
+++ b/test/540-checker-rtp-bug/info.txt
@@ -0,0 +1 @@
+Test that we set the proper types for objects (b/25008765).
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
new file mode 100644
index 0000000..e9f16c0
--- /dev/null
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class Final {
+  public String toString() {
+    return "final";
+  }
+}
+
+public class Main {
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
+  /// CHECK:    <<Class:l\d+>>   LoadClass
+  /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
+  /// CHECK:    <<Ret:l\d+>>     BoundType [<<Phi>>] klass:Final
+  /// CHECK:                     Return [<<Ret>>]
+
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier_after_types (after)
+  /// CHECK:    <<Phi:l\d+>>     Phi
+  /// CHECK:    <<Class:l\d+>>   LoadClass
+  /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
+  /// CHECK:    <<Ret:l\d+>>     BoundType [<<Phi>>]
+  /// CHECK:                     Return [<<Ret>>]
+  public static Final testKeepCheckCast(Object o, boolean cond) {
+    Object x = new Final();
+    while (cond) {
+      x = o;
+      cond = false;
+    }
+    return (Final) x;
+  }
+
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
+  /// CHECK:    <<Class:l\d+>>   LoadClass
+  /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
+
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination (after)
+  /// CHECK:    <<Phi:l\d+>>     Phi
+  /// CHECK:    <<Class:l\d+>>   LoadClass
+  /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
+  public static void testKeepInstanceOf(Object o, boolean cond) {
+    Object x = new Final();
+    while (cond) {
+      x = o;
+      cond = false;
+    }
+    if (x instanceof Final) {
+      System.out.println("instanceof succeed");
+    } else {
+      System.out.println("instanceof failed");
+    }
+  }
+
+  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) reference_type_propagation (after)
+  /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
+  /// CHECK:    <<NC:l\d+>>      NullCheck [<<Phi>>]
+  /// CHECK:    <<Ret:l\d+>>     InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString
+  /// CHECK:                     Return [<<Ret>>]
+
+  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) inliner (after)
+  /// CHECK:    <<Phi:l\d+>>     Phi
+  /// CHECK:    <<NC:l\d+>>      NullCheck [<<Phi>>]
+  /// CHECK:    <<Ret:l\d+>>     InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString
+  /// CHECK:                     Return [<<Ret>>]
+  public static String testNoInline(Object o, boolean cond) {
+    Object x = new Final();
+    while (cond) {
+      x = o;
+      cond = false;
+    }
+    return x.toString();
+  }
+
+  public static void main(String[] args) {
+    try {
+      testKeepCheckCast(new Object(), true);
+      throw new Error("Expected check cast exception");
+    } catch (ClassCastException e) {
+      // expected
+    }
+
+    testKeepInstanceOf(new Object(), true);
+
+    if ("final".equals(testNoInline(new Object(), true))) {
+      throw new Error("Bad inlining");
+    }
+  }
+}
diff --git a/test/541-regression-inlined-deopt/expected.txt b/test/541-regression-inlined-deopt/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/541-regression-inlined-deopt/expected.txt
diff --git a/test/541-regression-inlined-deopt/info.txt b/test/541-regression-inlined-deopt/info.txt
new file mode 100644
index 0000000..209588f
--- /dev/null
+++ b/test/541-regression-inlined-deopt/info.txt
@@ -0,0 +1,4 @@
+Regression test for deopt from optimized code which would use the top-level
+stack map for deopting inlined frames. Test case is written in smali for full
+control over vregs because the previous test 449 would pass because the vreg
+maps at the various inlining depths were similar.
diff --git a/test/541-regression-inlined-deopt/smali/TestCase.smali b/test/541-regression-inlined-deopt/smali/TestCase.smali
new file mode 100644
index 0000000..a109775
--- /dev/null
+++ b/test/541-regression-inlined-deopt/smali/TestCase.smali
@@ -0,0 +1,55 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method private static $inline$depth1([I)V
+    .registers 3
+
+    # Expects array in v2.
+
+    const v0, 0x0
+
+    const v1, 0x3
+    aput v0, p0, v1
+
+    const v1, 0x4
+    aput v0, p0, v1
+
+    return-void
+.end method
+
+.method private static $inline$depth0([I)V
+    .registers 1
+
+    # Expects array in v0.
+
+    invoke-static {p0}, LTestCase;->$inline$depth1([I)V
+    return-void
+.end method
+
+.method public static foo()V
+    .registers 10
+
+    # Create a new array short enough to throw AIOOB in $inline$depth1.
+    # Make sure the reference is not stored in the same vreg as used by
+    # the inlined methods.
+
+    const v5, 0x3
+    new-array v6, v5, [I
+
+    invoke-static {v6}, LTestCase;->$inline$depth0([I)V
+    return-void
+.end method
diff --git a/test/541-regression-inlined-deopt/src/Main.java b/test/541-regression-inlined-deopt/src/Main.java
new file mode 100644
index 0000000..fa79590
--- /dev/null
+++ b/test/541-regression-inlined-deopt/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Throwable {
+    try {
+      Class<?> c = Class.forName("TestCase");
+      Method m = c.getMethod("foo");
+      m.invoke(null, (Object[]) null);
+    } catch (InvocationTargetException ex) {
+      // Code should have thrown AIOOB.
+      if (!(ex.getCause() instanceof ArrayIndexOutOfBoundsException)) {
+        throw ex;
+      }
+    }
+  }
+}
diff --git a/test/542-inline-trycatch/expected.txt b/test/542-inline-trycatch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/542-inline-trycatch/expected.txt
diff --git a/test/542-inline-trycatch/info.txt b/test/542-inline-trycatch/info.txt
new file mode 100644
index 0000000..b3e50d3
--- /dev/null
+++ b/test/542-inline-trycatch/info.txt
@@ -0,0 +1 @@
+Tests inlining in the optimizing compiler under try/catch.
\ No newline at end of file
diff --git a/test/542-inline-trycatch/src/Main.java b/test/542-inline-trycatch/src/Main.java
new file mode 100644
index 0000000..5a6e06f
--- /dev/null
+++ b/test/542-inline-trycatch/src/Main.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // The following tests make sure that we inline methods used inside try and catch
+  // blocks, provided they meet other inlining criteria. To do that, we rely on
+  // the compiler recognizing and enforcing the $inline$ and $noinline$ markers.
+
+  // We expect a single block to always be inlined.
+
+  private static int $inline$SingleBlock(String str) throws NumberFormatException {
+    return Integer.parseInt(str);
+  }
+
+  // We expect a "simple" method with multiple blocks to always be inlined.
+
+  private static int $inline$MultipleBlocks(String str, boolean is_hex)
+      throws NumberFormatException {
+    return is_hex ? Integer.parseInt(str, 16) : Integer.parseInt(str);
+  }
+
+  // We expect methods with try/catch to not be inlined. Inlined try/catch
+  // blocks are not supported at the moment.
+
+  private static int $noinline$TryCatch(String str) {
+    try {
+      return Integer.parseInt(str);
+    } catch (NumberFormatException ex) {
+      return -1;
+    }
+  }
+
+  public static void testSingleBlockFromTry() {
+    int val = 0;
+
+    try {
+      val = $inline$SingleBlock("42");
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(42, val);
+
+    try {
+      $inline$SingleBlock("xyz");
+      unreachable();
+    } catch (NumberFormatException ex) {}
+  }
+
+  public static void testSingleBlockFromCatch() {
+    int val = 0;
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $inline$SingleBlock("42");
+    }
+    assertEquals(42, val);
+  }
+
+  public static void testMultipleBlocksFromTry() {
+    int val = 0;
+
+    try {
+      val = $inline$MultipleBlocks("42", false);
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(42, val);
+
+    try {
+      val = $inline$MultipleBlocks("20", true);
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(32, val);
+
+    try {
+      $inline$MultipleBlocks("xyz", false);
+      unreachable();
+    } catch (NumberFormatException ex) {}
+
+    try {
+      $inline$MultipleBlocks("xyz", true);
+      unreachable();
+    } catch (NumberFormatException ex) {}
+  }
+
+  public static void testMultipleBlocksFromCatch() {
+    int val = 0;
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $inline$MultipleBlocks("42", false);
+    }
+    assertEquals(42, val);
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $inline$MultipleBlocks("20", true);
+    }
+    assertEquals(32, val);
+  }
+
+  public static void testTryCatchFromTry() {
+    int val = 0;
+
+    try {
+      val = $noinline$TryCatch("42");
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(42, val);
+
+    try {
+      val = $noinline$TryCatch("xyz");
+    } catch (NumberFormatException ex) {
+      unreachable();
+    }
+    assertEquals(-1, val);
+  }
+
+  public static void testTryCatchFromCatch() {
+    int val = 0;
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $noinline$TryCatch("42");
+    }
+    assertEquals(42, val);
+
+    try {
+      throwException();
+    } catch (Exception ex) {
+      val = $noinline$TryCatch("xyz");
+    }
+    assertEquals(-1, val);
+  }
+
+  public static void main(String[] args) {
+    testSingleBlockFromTry();
+    testSingleBlockFromCatch();
+    testMultipleBlocksFromTry();
+    testMultipleBlocksFromCatch();
+    testTryCatchFromTry();
+    testTryCatchFromCatch();
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  private static void unreachable() {
+    throw new Error("Unreachable");
+  }
+
+  private static void throwException() throws Exception {
+    throw new Exception();
+  }
+}
diff --git a/test/542-unresolved-access-check/expected.txt b/test/542-unresolved-access-check/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/542-unresolved-access-check/expected.txt
diff --git a/test/542-unresolved-access-check/info.txt b/test/542-unresolved-access-check/info.txt
new file mode 100644
index 0000000..30d45b8
--- /dev/null
+++ b/test/542-unresolved-access-check/info.txt
@@ -0,0 +1 @@
+Test unresolved/access checks entry points with the JIT.
diff --git a/test/542-unresolved-access-check/src/Main.java b/test/542-unresolved-access-check/src/Main.java
new file mode 100644
index 0000000..2bdf47f
--- /dev/null
+++ b/test/542-unresolved-access-check/src/Main.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.List;
+import p1.InP1;
+import p1.PlaceHolder;
+
+
+// Custom class loader to prevent loading while verifying.
+class MyClassLoader extends ClassLoader {
+  MyClassLoader() throws Exception {
+    super(MyClassLoader.class.getClassLoader());
+
+    // Some magic to get access to the pathList field of BaseDexClassLoader.
+    ClassLoader loader = getClass().getClassLoader();
+    Class<?> baseDexClassLoader = loader.getClass().getSuperclass();
+    Field f = baseDexClassLoader.getDeclaredField("pathList");
+    f.setAccessible(true);
+    Object pathList = f.get(loader);
+
+    // Some magic to get access to the dexField field of pathList.
+    f = pathList.getClass().getDeclaredField("dexElements");
+    f.setAccessible(true);
+    dexElements = (Object[]) f.get(pathList);
+    dexFileField = dexElements[0].getClass().getDeclaredField("dexFile");
+    dexFileField.setAccessible(true);
+  }
+
+  Object[] dexElements;
+  Field dexFileField;
+
+  protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException {
+    if (className.equals("p1.OtherInP1") && !p1.PlaceHolder.entered) {
+      // The request comes from the verifier. Return null to get the access check entry
+      // point in the compiled code.
+      return null;
+    }
+    // Mimic what DexPathList.findClass is doing.
+    try {
+      for (Object element : dexElements) {
+        Object dex = dexFileField.get(element);
+        Method method = dex.getClass().getDeclaredMethod(
+            "loadClassBinaryName", String.class, ClassLoader.class, List.class);
+
+        if (dex != null) {
+          Class clazz = (Class)method.invoke(dex, className, this, null);
+          if (clazz != null) {
+            return clazz;
+          }
+        }
+      }
+    } catch (Exception e) { /* Ignore */ }
+    return getParent().loadClass(className);
+  }
+}
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+      MyClassLoader o = new MyClassLoader();
+      Class foo = o.loadClass("LoadedByMyClassLoader");
+      Method m = foo.getDeclaredMethod("main");
+      m.invoke(null);
+    }
+}
+
+class LoadedByMyClassLoader {
+    public static void main() throws Exception {
+      for (int i = 0; i < 10000; ++i) {
+        // Warm up the JIT.
+        doTheCall(i);
+      }
+      // Sleep a while to let the JIT compile things.
+      // TODO(ngeoffray): Remove the sleep. b/25414532
+      Thread.sleep(2000);
+      doTheCall(10001);
+    }
+
+    public static void doTheCall(int i) {
+      InP1.$inline$AllocateOtherInP1(i);
+      InP1.$inline$AllocateArrayOtherInP1(i);
+      InP1.$inline$UseStaticFieldOtherInP1(i);
+      InP1.$inline$SetStaticFieldOtherInP1(i);
+      InP1.$inline$UseInstanceFieldOtherInP1(i);
+      InP1.$inline$SetInstanceFieldOtherInP1(i);
+      InP1.$inline$LoadOtherInP1(i);
+      InP1.$inline$StaticCallOtherInP1(i);
+      InP1.$inline$InstanceCallOtherInP1(i);
+    }
+}
diff --git a/test/542-unresolved-access-check/src/p1/InP1.java b/test/542-unresolved-access-check/src/p1/InP1.java
new file mode 100644
index 0000000..3516c72
--- /dev/null
+++ b/test/542-unresolved-access-check/src/p1/InP1.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class InP1 {
+    public static Object $inline$AllocateOtherInP1(int i) {
+      // Let this method execute a while to make sure the JIT sees it hot.
+      if (i <= 10000) {
+        return null;
+      }
+      // Set the flag that we have entered InP1 code to get OtherInP1 loaded.
+      PlaceHolder.entered = true;
+      return new OtherInP1();
+    }
+
+    public static Object $inline$AllocateArrayOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return new OtherInP1[10];
+    }
+
+    public static Object $inline$UseStaticFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return OtherInP1.staticField;
+    }
+
+    public static void $inline$SetStaticFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return;
+      }
+      OtherInP1.staticField = new Object();
+    }
+
+    public static Object $inline$UseInstanceFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return $noinline$AllocateOtherInP1().instanceField;
+    }
+
+    public static void $inline$SetInstanceFieldOtherInP1(int i) {
+      if (i <= 10000) {
+        return;
+      }
+      $noinline$AllocateOtherInP1().instanceField = new Object();
+    }
+
+    public static OtherInP1 $noinline$AllocateOtherInP1() {
+      try {
+        return new OtherInP1();
+      } catch (Exception e) {
+        throw new Error(e);
+      }
+    }
+
+    public static Object $inline$LoadOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return OtherInP1.class;
+    }
+
+    public static Object $inline$StaticCallOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return OtherInP1.doTheStaticCall();
+    }
+
+    public static Object $inline$InstanceCallOtherInP1(int i) {
+      if (i <= 10000) {
+        return null;
+      }
+      return $noinline$AllocateOtherInP1().doTheInstanceCall();
+    }
+}
diff --git a/test/542-unresolved-access-check/src/p1/OtherInP1.java b/test/542-unresolved-access-check/src/p1/OtherInP1.java
new file mode 100644
index 0000000..adc1ce1
--- /dev/null
+++ b/test/542-unresolved-access-check/src/p1/OtherInP1.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+class OtherInP1 {
+  OtherInP1() {
+  }
+  static Object staticField = new Object();
+  Object instanceField = new Object();
+
+  static Object doTheStaticCall() {
+    return null;
+  }
+
+  Object doTheInstanceCall() {
+    return null;
+  }
+}
diff --git a/test/542-unresolved-access-check/src/p1/PlaceHolder.java b/test/542-unresolved-access-check/src/p1/PlaceHolder.java
new file mode 100644
index 0000000..2bf4bdf
--- /dev/null
+++ b/test/542-unresolved-access-check/src/p1/PlaceHolder.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+// Specific class for putting the 'entered' marker. If we were to put the marker
+// in InP1 or in OtherInP1, the code in MyClassLoader using that marker would load
+// InP1 or OtherInP1 in the system class loader, and not in MyClassLoader.
+public class PlaceHolder {
+  public static boolean entered = false;
+}
diff --git a/test/955-lambda-smali/run b/test/955-lambda-smali/run
index 2aeca8c..2fb2f89 100755
--- a/test/955-lambda-smali/run
+++ b/test/955-lambda-smali/run
@@ -15,4 +15,4 @@
 # limitations under the License.
 
 # Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
-${RUN} "$@" --runtime-option -Xexperimental-lambdas -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental-lambdas
+${RUN} "$@" --experimental lambdas
diff --git a/test/960-default-smali/build b/test/960-default-smali/build
new file mode 100755
index 0000000..3946de3
--- /dev/null
+++ b/test/960-default-smali/build
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# Generate the smali Main.smali file or fail
+${ANDROID_BUILD_TOP}/art/test/utils/python/generate_smali_main.py ./smali
+
+USES_JAVA="false"
+if [[ $ARGS == *"--jvm"* ]]; then
+  USES_JAVA="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 >& /dev/null; then
+    USES_JAVA="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA" == "true" ]]; then
+  # We are compiling java code, create it.
+  mkdir -p src
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
diff --git a/test/960-default-smali/expected.txt b/test/960-default-smali/expected.txt
new file mode 100644
index 0000000..7671eed
--- /dev/null
+++ b/test/960-default-smali/expected.txt
@@ -0,0 +1,84 @@
+Testing for type A
+A-virtual           A.SayHi()='Hi '
+A-interface   Greeter.SayHi()='Hi '
+A-virtual           A.SayHiTwice()='Hi Hi '
+A-interface   Greeter.SayHiTwice()='Hi Hi '
+End testing for type A
+Testing for type B
+B-virtual           B.SayHi()='Hello '
+B-interface   Greeter.SayHi()='Hello '
+B-interface  Greeter2.SayHi()='Hello '
+B-virtual           B.SayHiTwice()='I say Hello Hello '
+B-interface   Greeter.SayHiTwice()='I say Hello Hello '
+B-interface  Greeter2.SayHiTwice()='I say Hello Hello '
+End testing for type B
+Testing for type C
+C-virtual           A.SayHi()='Hi '
+C-virtual           C.SayHi()='Hi '
+C-interface   Greeter.SayHi()='Hi '
+C-virtual           A.SayHiTwice()='You don't control me'
+C-virtual           C.SayHiTwice()='You don't control me'
+C-interface   Greeter.SayHiTwice()='You don't control me'
+End testing for type C
+Testing for type D
+D-virtual           D.GetName()='Alex '
+D-interface  Greeter3.GetName()='Alex '
+D-virtual           D.SayHi()='Hello Alex '
+D-interface   Greeter.SayHi()='Hello Alex '
+D-interface  Greeter3.SayHi()='Hello Alex '
+D-virtual           D.SayHiTwice()='Hello Alex Hello Alex '
+D-interface   Greeter.SayHiTwice()='Hello Alex Hello Alex '
+D-interface  Greeter3.SayHiTwice()='Hello Alex Hello Alex '
+End testing for type D
+Testing for type E
+E-virtual           A.SayHi()='Hi2 '
+E-virtual           E.SayHi()='Hi2 '
+E-interface   Greeter.SayHi()='Hi2 '
+E-interface  Greeter2.SayHi()='Hi2 '
+E-virtual           A.SayHiTwice()='I say Hi2 Hi2 '
+E-virtual           E.SayHiTwice()='I say Hi2 Hi2 '
+E-interface   Greeter.SayHiTwice()='I say Hi2 Hi2 '
+E-interface  Greeter2.SayHiTwice()='I say Hi2 Hi2 '
+End testing for type E
+Testing for type F
+F-interface Attendant.GetPlace()='android'
+F-virtual           F.GetPlace()='android'
+F-virtual           A.SayHi()='Hi '
+F-interface Attendant.SayHi()='Hi '
+F-virtual           F.SayHi()='Hi '
+F-interface   Greeter.SayHi()='Hi '
+F-virtual           A.SayHiTwice()='We can override both interfaces'
+F-interface Attendant.SayHiTwice()='We can override both interfaces'
+F-virtual           F.SayHiTwice()='We can override both interfaces'
+F-interface   Greeter.SayHiTwice()='We can override both interfaces'
+End testing for type F
+Testing for type G
+G-interface Attendant.GetPlace()='android'
+G-virtual           G.GetPlace()='android'
+G-interface Attendant.SayHi()='welcome to android'
+G-virtual           G.SayHi()='welcome to android'
+G-interface Attendant.SayHiTwice()='welcome to androidwelcome to android'
+G-virtual           G.SayHiTwice()='welcome to androidwelcome to android'
+End testing for type G
+Testing for type H
+H-interface Extension.SayHi()='welcome '
+H-virtual           H.SayHi()='welcome '
+End testing for type H
+Testing for type I
+I-virtual           A.SayHi()='Hi '
+I-interface   Greeter.SayHi()='Hi '
+I-interface  Greeter2.SayHi()='Hi '
+I-virtual           I.SayHi()='Hi '
+I-virtual           A.SayHiTwice()='I say Hi Hi '
+I-interface   Greeter.SayHiTwice()='I say Hi Hi '
+I-interface  Greeter2.SayHiTwice()='I say Hi Hi '
+I-virtual           I.SayHiTwice()='I say Hi Hi '
+End testing for type I
+Testing for type J
+J-virtual           A.SayHi()='Hi '
+J-interface   Greeter.SayHi()='Hi '
+J-virtual           J.SayHi()='Hi '
+J-virtual           A.SayHiTwice()='Hi Hi '
+J-interface   Greeter.SayHiTwice()='Hi Hi '
+J-virtual           J.SayHiTwice()='Hi Hi '
+End testing for type J
diff --git a/test/960-default-smali/info.txt b/test/960-default-smali/info.txt
new file mode 100644
index 0000000..9583abb
--- /dev/null
+++ b/test/960-default-smali/info.txt
@@ -0,0 +1,20 @@
+Smali-based tests for experimental interface default methods.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run a Main.smali file will be generated by the
+test/utils/python/generate_smali_main.py script. If we run with --jvm we will
+use the tools/extract-embedded-java script to turn the smali into equivalent
+Java using the embedded Java code.
+
+When updating be sure to write the equivalent Java code in comments of the smali
+files.
+
+Care should be taken when updating the generate_smali_main.py script. It must
+always return equivalent output when run multiple times.
+
+To update the test files do the following steps:
+    <Add new classes/interfaces>
+    <Add these classes/interfaces to ./smali/classes.xml>
+    JAVA_HOME="/path/to/java-8-jdk" ../run-test --use-java-home --update --jvm --host 956-default-smali
+    git add ./smali/classes.xml ./expected.txt
diff --git a/test/960-default-smali/run b/test/960-default-smali/run
new file mode 100755
index 0000000..22f6800
--- /dev/null
+++ b/test/960-default-smali/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} --experimental default-methods "$@"
diff --git a/test/960-default-smali/smali/A.smali b/test/960-default-smali/smali/A.smali
new file mode 100644
index 0000000..e755612
--- /dev/null
+++ b/test/960-default-smali/smali/A.smali
@@ -0,0 +1,38 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LA;
+.super Ljava/lang/Object;
+.implements LGreeter;
+
+# class A implements Greeter {
+#     public String SayHi() {
+#         return "Hi ";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .registers 1
+
+    const-string v0, "Hi "
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/Attendant.smali b/test/960-default-smali/smali/Attendant.smali
new file mode 100644
index 0000000..ab63aee
--- /dev/null
+++ b/test/960-default-smali/smali/Attendant.smali
@@ -0,0 +1,53 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public abstract interface LAttendant;
+.super Ljava/lang/Object;
+
+# public interface Attendant {
+#     public default String SayHi() {
+#         return "welcome to " + GetPlace();
+#     }
+#     public default String SayHiTwice() {
+#         return SayHi() + SayHi();
+#     }
+#
+#     public String GetPlace();
+# }
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    const-string v0, "welcome to "
+    invoke-interface {p0}, LAttendant;->GetPlace()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public SayHiTwice()Ljava/lang/String;
+    .locals 2
+    invoke-interface {p0}, LAttendant;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-interface {p0}, LAttendant;->SayHi()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
+
+.method public abstract GetPlace()Ljava/lang/String;
+.end method
diff --git a/test/960-default-smali/smali/B.smali b/test/960-default-smali/smali/B.smali
new file mode 100644
index 0000000..d847dd1
--- /dev/null
+++ b/test/960-default-smali/smali/B.smali
@@ -0,0 +1,38 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LB;
+.super Ljava/lang/Object;
+.implements LGreeter2;
+
+# class B implements Greeter2 {
+#     public String SayHi() {
+#         return "Hello ";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .registers 1
+
+    const-string v0, "Hello "
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/C.smali b/test/960-default-smali/smali/C.smali
new file mode 100644
index 0000000..08a8508
--- /dev/null
+++ b/test/960-default-smali/smali/C.smali
@@ -0,0 +1,37 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LC;
+.super LA;
+
+# class C extends A {
+#     public String SayHiTwice() {
+#         return "You don't control me";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LA;-><init>()V
+    return-void
+.end method
+
+.method public SayHiTwice()Ljava/lang/String;
+    .registers 1
+
+    const-string v0, "You don't control me"
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/D.smali b/test/960-default-smali/smali/D.smali
new file mode 100644
index 0000000..32f3b7e
--- /dev/null
+++ b/test/960-default-smali/smali/D.smali
@@ -0,0 +1,38 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LD;
+.super Ljava/lang/Object;
+.implements LGreeter3;
+
+# class D implements Greeter3 {
+#     public String GetName() {
+#         return "Alex ";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public GetName()Ljava/lang/String;
+    .registers 1
+
+    const-string v0, "Alex "
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/E.smali b/test/960-default-smali/smali/E.smali
new file mode 100644
index 0000000..bae6250
--- /dev/null
+++ b/test/960-default-smali/smali/E.smali
@@ -0,0 +1,38 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LE;
+.super LA;
+.implements LGreeter2;
+
+# class E extends A implements Greeter2 {
+#     public String SayHi() {
+#         return "Hi2 ";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LA;-><init>()V
+    return-void
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .registers 1
+
+    const-string v0, "Hi2 "
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/Extension.smali b/test/960-default-smali/smali/Extension.smali
new file mode 100644
index 0000000..60ffa26
--- /dev/null
+++ b/test/960-default-smali/smali/Extension.smali
@@ -0,0 +1,30 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public abstract interface LExtension;
+.super Ljava/lang/Object;
+
+# public interface Extension {
+#     public default String SayHi() {
+#         return "welcome ";
+#     }
+# }
+
+.method public SayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "welcome "
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/F.smali b/test/960-default-smali/smali/F.smali
new file mode 100644
index 0000000..3eaa089
--- /dev/null
+++ b/test/960-default-smali/smali/F.smali
@@ -0,0 +1,47 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LF;
+.super LA;
+.implements LAttendant;
+
+# class F extends A implements Attendant {
+#     public String GetPlace() {
+#         return "android";
+#     }
+#     public String SayHiTwice() {
+#         return "We can override both interfaces";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public SayHiTwice()Ljava/lang/String;
+    .registers 1
+
+    const-string v0, "We can override both interfaces"
+    return-object v0
+.end method
+
+.method public GetPlace()Ljava/lang/String;
+    .registers 1
+    const-string v0, "android"
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/G.smali b/test/960-default-smali/smali/G.smali
new file mode 100644
index 0000000..446f2a4
--- /dev/null
+++ b/test/960-default-smali/smali/G.smali
@@ -0,0 +1,37 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LG;
+.super Ljava/lang/Object;
+.implements LAttendant;
+
+# class G implements Attendant {
+#     public String GetPlace() {
+#         return "android";
+#     }
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public GetPlace()Ljava/lang/String;
+    .registers 1
+    const-string v0, "android"
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/Greeter.smali b/test/960-default-smali/smali/Greeter.smali
new file mode 100644
index 0000000..28530ff
--- /dev/null
+++ b/test/960-default-smali/smali/Greeter.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public abstract interface LGreeter;
+.super Ljava/lang/Object;
+
+# public interface Greeter {
+#     public String SayHi();
+#
+#     public default String SayHiTwice() {
+#         return SayHi() + SayHi();
+#     }
+# }
+
+.method public abstract SayHi()Ljava/lang/String;
+.end method
+
+.method public SayHiTwice()Ljava/lang/String;
+    .locals 2
+    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/Greeter2.smali b/test/960-default-smali/smali/Greeter2.smali
new file mode 100644
index 0000000..ace1798
--- /dev/null
+++ b/test/960-default-smali/smali/Greeter2.smali
@@ -0,0 +1,39 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public abstract interface LGreeter2;
+.super Ljava/lang/Object;
+.implements LGreeter;
+
+# public interface Greeter2 extends Greeter {
+#     public default String SayHiTwice() {
+#         return "I say " + SayHi() + SayHi();
+#     }
+# }
+
+.method public SayHiTwice()Ljava/lang/String;
+    .locals 3
+    const-string v0, "I say "
+    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    invoke-interface {p0}, LGreeter;->SayHi()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/Greeter3.smali b/test/960-default-smali/smali/Greeter3.smali
new file mode 100644
index 0000000..31fc2e7
--- /dev/null
+++ b/test/960-default-smali/smali/Greeter3.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public abstract interface LGreeter3;
+.super Ljava/lang/Object;
+.implements LGreeter;
+
+# public interface Greeter3 extends Greeter {
+#     public String GetName();
+#
+#     public default String SayHi() {
+#         return "Hello " + GetName();
+#     }
+# }
+
+.method public abstract GetName()Ljava/lang/String;
+.end method
+
+.method public SayHi()Ljava/lang/String;
+    .locals 2
+    const-string v0, "Hello "
+    invoke-interface {p0}, LGreeter3;->GetName()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    return-object v0
+.end method
diff --git a/test/960-default-smali/smali/H.smali b/test/960-default-smali/smali/H.smali
new file mode 100644
index 0000000..82065ea
--- /dev/null
+++ b/test/960-default-smali/smali/H.smali
@@ -0,0 +1,28 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LH;
+.super Ljava/lang/Object;
+.implements LExtension;
+
+# class H implements Extension {
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/960-default-smali/smali/I.smali b/test/960-default-smali/smali/I.smali
new file mode 100644
index 0000000..72fb58a
--- /dev/null
+++ b/test/960-default-smali/smali/I.smali
@@ -0,0 +1,28 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LI;
+.super LA;
+.implements LGreeter2;
+
+# class I extends A implements Greeter2 {
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/960-default-smali/smali/J.smali b/test/960-default-smali/smali/J.smali
new file mode 100644
index 0000000..93f3d62
--- /dev/null
+++ b/test/960-default-smali/smali/J.smali
@@ -0,0 +1,29 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LJ;
+.super LA;
+
+# class J extends A {
+# }
+
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LA;-><init>()V
+    return-void
+.end method
+
diff --git a/test/960-default-smali/smali/classes.xml b/test/960-default-smali/smali/classes.xml
new file mode 100644
index 0000000..0aa41f7
--- /dev/null
+++ b/test/960-default-smali/smali/classes.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright 2015 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+
+<data>
+  <classes>
+    <class name="A" super="java/lang/Object">
+      <implements>
+        <item>Greeter</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="B" super="java/lang/Object">
+      <implements>
+        <item>Greeter2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="C" super="A">
+      <implements> </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="D" super="java/lang/Object">
+      <implements>
+        <item>Greeter3</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="E" super="A">
+      <implements>
+        <item>Greeter2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="F" super="A">
+      <implements>
+        <item>Attendant</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="G" super="java/lang/Object">
+      <implements>
+        <item>Attendant</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="H" super="java/lang/Object">
+      <implements>
+        <item>Extension</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="I" super="A">
+      <implements>
+        <item>Greeter2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="J" super="A">
+      <implements> </implements>
+      <methods> </methods>
+    </class>
+  </classes>
+
+  <interfaces>
+    <interface name="Extension" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <method type="default">SayHi</method>
+      </methods>
+    </interface>
+
+    <interface name="Greeter" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <method type="abstract">SayHi</method>
+        <method type="default">SayHiTwice</method>
+      </methods>
+    </interface>
+
+    <interface name="Greeter2" super="java/lang/Object">
+      <implements>
+        <item>Greeter</item>
+      </implements>
+      <methods> </methods>
+    </interface>
+
+    <interface name="Greeter3" super="java/lang/Object">
+      <implements>
+        <item>Greeter</item>
+      </implements>
+      <methods>
+        <method type="abstract">GetName</method>
+      </methods>
+    </interface>
+
+    <interface name="Attendant" super="java/lang/Object">
+      <implements> </implements>
+      <methods>
+        <method type="default">SayHi</method>
+        <method type="default">SayHiTwice</method>
+        <method type="abstract">GetPlace</method>
+      </methods>
+    </interface>
+  </interfaces>
+</data>
diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build
new file mode 100755
index 0000000..03cc624
--- /dev/null
+++ b/test/961-default-iface-resolution-generated/build
@@ -0,0 +1,56 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+mkdir -p ./smali
+
+# Generate the smali files and expected.txt or fail
+./util-src/generate_smali.py ./smali ./expected.txt
+
+USES_JAVA="false"
+if [[ $ARGS == *"--jvm"* ]]; then
+  USES_JAVA="true"
+elif [[ $USE_JACK == "true" ]]; then
+  if "$JACK" -D jack.java.source.version=1.8 >& /dev/null; then
+    USES_JAVA="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA" == "true" ]]; then
+  # We are compiling java code, create it.
+  mkdir -p src
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/961-default-iface-resolution-generated/expected.txt b/test/961-default-iface-resolution-generated/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/961-default-iface-resolution-generated/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/961-default-iface-resolution-generated/info.txt b/test/961-default-iface-resolution-generated/info.txt
new file mode 100644
index 0000000..2cd2cc7
--- /dev/null
+++ b/test/961-default-iface-resolution-generated/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the
+$(ANDROID_BUILD_TOP)/art/tools/extract-embedded-java script to turn the smali
+into equivalent Java using the embedded Java code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/961-default-iface-resolution-generated/run b/test/961-default-iface-resolution-generated/run
new file mode 100755
index 0000000..22f6800
--- /dev/null
+++ b/test/961-default-iface-resolution-generated/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} --experimental default-methods "$@"
diff --git a/test/961-default-iface-resolution-generated/util-src/generate_smali.py b/test/961-default-iface-resolution-generated/util-src/generate_smali.py
new file mode 100755
index 0000000..921a096
--- /dev/null
+++ b/test/961-default-iface-resolution-generated/util-src/generate_smali.py
@@ -0,0 +1,466 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 961.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the type tree can have. Includes the class object in the tree.
+# Increasing this increases the number of generated files significantly. This
+# value was chosen as it is fairly quick to run and very comprehensive, checking
+# every possible interface tree up to 5 layers deep.
+MAX_IFACE_DEPTH = 5
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_groups}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass smali code.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("smali"),
+                                           test_groups = test_groups,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     try {{
+#       {farg} v = new {farg}();
+#       System.out.printf("%s calls default method on %s\\n",
+#                         v.CalledClassName(),
+#                         v.CalledInterfaceName());
+#       return;
+#     }} catch (Error e) {{
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    :call_{fname}_try_start
+      new-instance v6, L{farg};
+      invoke-direct {{v6}}, L{farg};-><init>()V
+
+      const/4 v0, 2
+      new-array v1,v0, [Ljava/lang/Object;
+      const/4 v0, 0
+      invoke-virtual {{v6}}, L{farg};->CalledClassName()Ljava/lang/String;
+      move-result-object v4
+      aput-object v4,v1,v0
+
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      const-string v3, "%s calls default method on %s\\n"
+
+      invoke-virtual {{v6}}, L{farg};->CalledInterfaceName()Ljava/lang/String;
+      move-result-object v4
+      const/4 v0, 1
+      aput-object v4, v1, v0
+
+      invoke-virtual {{v2,v3,v1}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Error; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v3
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def get_expected(self):
+    """
+    Get the expected output calling this function.
+    """
+    return "{tree} calls default method on {iface_tree}".format(
+        tree = self.farg.get_tree(), iface_tree = self.farg.get_called().get_tree())
+
+  def get_name(self):
+    """
+    Get the name of this function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def __str__(self):
+    """
+    Print the smali code of this function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(), farg=self.farg.get_name())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test default method resolution order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+.implements L{iface_name};
+
+# public class {class_name} implements {iface_name} {{
+#   public String CalledClassName() {{
+#     return "{tree}";
+#   }}
+# }}
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+.method public CalledClassName()Ljava/lang/String;
+  .locals 1
+  const-string v0, "{tree}"
+  return-object v0
+.end method
+"""
+
+  def __init__(self, iface):
+    """
+    Initialize this test class which implements the given interface
+    """
+    self.iface = iface
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
+                                                iface_tree = self.iface.get_tree())
+
+  def __iter__(self):
+    """
+    Step through all interfaces implemented transitively by this class
+    """
+    yield self.iface
+    yield from self.iface
+
+  def get_called(self):
+    """
+    Get the interface whose default method would be called when calling the
+    CalledInterfaceName function.
+    """
+    all_ifaces = set(iface for iface in self if iface.default)
+    for i in all_ifaces:
+      if all(map(lambda j: i not in j.get_super_types(), all_ifaces)):
+        return i
+    raise Exception("UNREACHABLE! Unable to find default method!")
+
+  def __str__(self):
+    """
+    Print the smali code of this class.
+    """
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           iface_name = self.iface.get_name(),
+                                           tree = self.get_tree(),
+                                           class_name = self.class_name)
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+#   public String CalledClassName();
+.method public abstract CalledClassName()Ljava/lang/String;
+.end method
+
+{funcs}
+
+# }}
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default String CalledInterfaceName() {{
+#     return "{tree}";
+#   }}
+.method public CalledInterfaceName()Ljava/lang/String;
+  .locals 1
+  const-string v0, "{tree}"
+  return-object v0
+.end method
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, default):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = sorted(ifaces)
+    self.default = default
+    end = "_DEFAULT" if default else ""
+    self.class_name = "INTERFACE_"+gensym()+end
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def __str__(self):
+    """
+    Print the smali code of this interface.
+    """
+    s_ifaces = " "
+    j_ifaces = " "
+    for i in self.ifaces:
+      s_ifaces += self.IMPLEMENTS_TEMPLATE.format(iface_name = i.get_name())
+      j_ifaces += " {},".format(i.get_name())
+    j_ifaces = j_ifaces[0:-1]
+    if self.default:
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(ifaces = j_ifaces,
+                                                tree = self.get_tree(),
+                                                class_name = self.class_name)
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def print_tree(ifaces):
+  """
+  Prints a list of iface trees
+  """
+  return " ".join(i.get_tree() for i in  ifaces)
+
+# The deduplicated output of subtree_sizes for each size up to
+# MAX_LEAF_IFACE_PER_OBJECT.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_interface_trees():
+  """
+  Return all legal interface trees
+  """
+  def dump_supers(s):
+    """
+    Does depth first traversal of all the interfaces in the list.
+    """
+    for i in s:
+      yield i
+      yield from i
+
+  def create_interface_trees_inner(num, allow_default):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        if sub == 1:
+          ifaces.append([TestInterface([], allow_default)])
+          if allow_default:
+            ifaces[-1].append(TestInterface([], False))
+        else:
+          ifaces.append(list(create_interface_trees_inner(sub, allow_default)))
+      for supers in itertools.product(*ifaces):
+        all_supers = sorted(set(dump_supers(supers)) - set(supers))
+        for i in range(len(all_supers) + 1):
+          for combo in itertools.combinations(all_supers, i):
+            yield TestInterface(list(combo) + list(supers), allow_default)
+      if allow_default:
+        for i in range(len(split)):
+          ifaces = []
+          for sub, cs in zip(split, itertools.count()):
+            if sub == 1:
+              ifaces.append([TestInterface([], i == cs)])
+            else:
+              ifaces.append(list(create_interface_trees_inner(sub, i == cs)))
+          for supers in itertools.product(*ifaces):
+            all_supers = sorted(set(dump_supers(supers)) - set(supers))
+            for i in range(len(all_supers) + 1):
+              for combo in itertools.combinations(all_supers, i):
+                yield TestInterface(list(combo) + list(supers), False)
+
+  for num in range(1, MAX_IFACE_DEPTH):
+    yield from create_interface_trees_inner(num, True)
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for tree in create_interface_trees():
+    classes.add(tree)
+    for i in tree:
+      classes.add(i)
+    test_class = TestClass(tree)
+    mc.add_test(test_class)
+    classes.add(test_class)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/962-iface-static/build b/test/962-iface-static/build
new file mode 100755
index 0000000..24e2feb
--- /dev/null
+++ b/test/962-iface-static/build
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+USES_JAVA="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
+    USES_JAVA="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA" == "true" ]]; then
+  # We are compiling java code, create it.
+  mkdir -p src
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
diff --git a/test/962-iface-static/expected.txt b/test/962-iface-static/expected.txt
new file mode 100644
index 0000000..6d98ea1
--- /dev/null
+++ b/test/962-iface-static/expected.txt
@@ -0,0 +1,3 @@
+init
+constructor
+Hello
diff --git a/test/962-iface-static/info.txt b/test/962-iface-static/info.txt
new file mode 100644
index 0000000..d4732e5
--- /dev/null
+++ b/test/962-iface-static/info.txt
@@ -0,0 +1,4 @@
+Smali-based tests for experimental interface static methods.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/962-iface-static/run b/test/962-iface-static/run
new file mode 100755
index 0000000..d37737f
--- /dev/null
+++ b/test/962-iface-static/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} --experimental default-methods "$@"
diff --git a/test/962-iface-static/smali/Displayer.smali b/test/962-iface-static/smali/Displayer.smali
new file mode 100644
index 0000000..ed4c013
--- /dev/null
+++ b/test/962-iface-static/smali/Displayer.smali
@@ -0,0 +1,45 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class Displayer {
+#   static {
+#       System.out.println("init");
+#   }
+#
+#   public Displayer() {
+#       System.out.println("constructor");
+#   }
+# }
+
+.class public LDisplayer;
+.super Ljava/lang/Object;
+
+.method static constructor <clinit>()V
+    .locals 3
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "init"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public constructor <init>()V
+    .locals 2
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "constructor"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
diff --git a/test/962-iface-static/smali/Main.smali b/test/962-iface-static/smali/Main.smali
new file mode 100644
index 0000000..72fa5e0
--- /dev/null
+++ b/test/962-iface-static/smali/Main.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main {
+#   public static void main(String[] args) {
+#       System.out.println(iface.SayHi());
+#   }
+# }
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    invoke-static {}, Liface;->SayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
diff --git a/test/962-iface-static/smali/iface.smali b/test/962-iface-static/smali/iface.smali
new file mode 100644
index 0000000..5b9c03e
--- /dev/null
+++ b/test/962-iface-static/smali/iface.smali
@@ -0,0 +1,43 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface iface {
+#   public static final Displayer f = new Displayer();
+#
+#   public static String SayHi() {
+#       return "Hello";
+#   }
+# }
+
+.class public abstract interface Liface;
+.super Ljava/lang/Object;
+
+.field public final static f:LDisplayer;
+
+.method static constructor <clinit>()V
+    .locals 3
+    new-instance v1, LDisplayer;
+    invoke-direct {v1}, LDisplayer;-><init>()V
+    sput-object v1, Liface;->f:LDisplayer;
+    return-void
+.end method
+
+.method public static SayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "Hello"
+    return-object v0
+.end method
+
diff --git a/test/963-default-range-smali/build b/test/963-default-range-smali/build
new file mode 100755
index 0000000..24e2feb
--- /dev/null
+++ b/test/963-default-range-smali/build
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+USES_JAVA="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
+    USES_JAVA="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA" == "true" ]]; then
+  # We are compiling java code, create it.
+  mkdir -p src
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
diff --git a/test/963-default-range-smali/expected.txt b/test/963-default-range-smali/expected.txt
new file mode 100644
index 0000000..af17d2f
--- /dev/null
+++ b/test/963-default-range-smali/expected.txt
@@ -0,0 +1,2 @@
+Hello
+Hello
diff --git a/test/963-default-range-smali/info.txt b/test/963-default-range-smali/info.txt
new file mode 100644
index 0000000..d4732e5
--- /dev/null
+++ b/test/963-default-range-smali/info.txt
@@ -0,0 +1,4 @@
+Smali-based tests for experimental interface static methods.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/963-default-range-smali/run b/test/963-default-range-smali/run
new file mode 100755
index 0000000..d37737f
--- /dev/null
+++ b/test/963-default-range-smali/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} --experimental default-methods "$@"
diff --git a/test/963-default-range-smali/smali/A.smali b/test/963-default-range-smali/smali/A.smali
new file mode 100644
index 0000000..b3d91dd
--- /dev/null
+++ b/test/963-default-range-smali/smali/A.smali
@@ -0,0 +1,29 @@
+# /*
+#  * Copyright 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public LA;
+.super Ljava/lang/Object;
+.implements Liface;
+
+# class A implements iface {
+# }
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
diff --git a/test/963-default-range-smali/smali/Main.smali b/test/963-default-range-smali/smali/Main.smali
new file mode 100644
index 0000000..400fba7
--- /dev/null
+++ b/test/963-default-range-smali/smali/Main.smali
@@ -0,0 +1,77 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main {
+#   public static void main(String[] args) {
+#       A a = new A();
+#       System.out.println(a.SayHi("a string 0",
+#                                  "a string 1",
+#                                  "a string 2",
+#                                  "a string 3",
+#                                  "a string 4",
+#                                  "a string 5",
+#                                  "a string 6",
+#                                  "a string 7",
+#                                  "a string 8",
+#                                  "a string 9"));
+#       iface b = (iface)a;
+#       System.out.println(b.SayHi("a string 0",
+#                                  "a string 1",
+#                                  "a string 2",
+#                                  "a string 3",
+#                                  "a string 4",
+#                                  "a string 5",
+#                                  "a string 6",
+#                                  "a string 7",
+#                                  "a string 8",
+#                                  "a string 9"));
+#   }
+# }
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 15
+    sget-object v12, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    new-instance v1, LA;
+    invoke-direct {v1}, LA;-><init>()V
+    const-string v2, "a string 0"
+    const-string v3, "a string 1"
+    const-string v4, "a string 2"
+    const-string v5, "a string 3"
+    const-string v6, "a string 4"
+    const-string v7, "a string 5"
+    const-string v8, "a string 6"
+    const-string v9, "a string 7"
+    const-string v10, "a string 8"
+    const-string v11, "a string 9"
+    invoke-virtual/range {v1 .. v11}, LA;->SayHi(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v12,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface/range {v1 .. v11}, Liface;->SayHi(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v12,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
diff --git a/test/963-default-range-smali/smali/iface.smali b/test/963-default-range-smali/smali/iface.smali
new file mode 100644
index 0000000..c2c3ce6
--- /dev/null
+++ b/test/963-default-range-smali/smali/iface.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface iface {
+#   public default String SayHi(String n1,
+#                               String n2,
+#                               String n3,
+#                               String n4,
+#                               String n5,
+#                               String n6,
+#                               String n7,
+#                               String n8,
+#                               String n9,
+#                               String n0) {
+#       return "Hello";
+#   }
+# }
+
+.class public abstract interface Liface;
+.super Ljava/lang/Object;
+
+.method public SayHi(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+    .locals 1
+    const-string v0, "Hello"
+    return-object v0
+.end method
+
diff --git a/test/964-default-iface-init-generated/build b/test/964-default-iface-init-generated/build
new file mode 100755
index 0000000..d916f1b
--- /dev/null
+++ b/test/964-default-iface-init-generated/build
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# Generate the smali files and expected.txt or fail
+./util-src/generate_smali.py ./smali ./expected.txt
+
+USES_JAVA="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
+    USES_JAVA="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA" == "true" ]]; then
+  # We are compiling java code, create it.
+  mkdir -p src
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/964-default-iface-init-generated/expected.txt b/test/964-default-iface-init-generated/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/964-default-iface-init-generated/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/964-default-iface-init-generated/info.txt b/test/964-default-iface-init-generated/info.txt
new file mode 100644
index 0000000..5805a86
--- /dev/null
+++ b/test/964-default-iface-init-generated/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for interface initialization.
+
+This tests that interface initialization order is correct.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the
+$(ANDROID_BUILD_TOP)/art/tools/extract-embedded-java script to turn the smali
+into equivalent Java using the embedded Java code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/964-default-iface-init-generated/run b/test/964-default-iface-init-generated/run
new file mode 100755
index 0000000..22f6800
--- /dev/null
+++ b/test/964-default-iface-init-generated/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} --experimental default-methods "$@"
diff --git a/test/964-default-iface-init-generated/smali/Displayer.smali b/test/964-default-iface-init-generated/smali/Displayer.smali
new file mode 100644
index 0000000..91280a8
--- /dev/null
+++ b/test/964-default-iface-init-generated/smali/Displayer.smali
@@ -0,0 +1,45 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# // This class is b/c java does not allow static {} blocks in interfaces.
+# public class Displayer {
+#   public Displayer(String type) {
+#       System.out.println("initialization of " + type);
+#   }
+#   public void touch() {
+#       return;
+#   }
+# }
+
+.class public LDisplayer;
+.super Ljava/lang/Object;
+
+.method public constructor <init>(Ljava/lang/String;)V
+    .locals 2
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    const-string v0, "initialization of "
+    invoke-virtual {v0, p1}, Ljava/lang/String;->concat(Ljava/lang/String;)Ljava/lang/String;
+    move-result-object v0
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public touch()V
+    .locals 0
+    return-void
+.end method
+
diff --git a/test/964-default-iface-init-generated/util-src/generate_smali.py b/test/964-default-iface-init-generated/util-src/generate_smali.py
new file mode 100755
index 0000000..3c138ab
--- /dev/null
+++ b/test/964-default-iface-init-generated/util-src/generate_smali.py
@@ -0,0 +1,531 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 964.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_groups}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def get_name(self):
+    """
+    Gets the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the smali code for this test.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           test_groups = test_groups,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     try {{
+#       System.out.println("About to initialize {tree}");
+#       {farg} v = new {farg}();
+#       System.out.println("Initialized {tree}");
+#       v.touchAll();
+#       System.out.println("All of {tree} hierarchy initialized");
+#       return;
+#     }} catch (Error e) {{
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    :call_{fname}_try_start
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      const-string v3, "About to initialize {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      new-instance v6, L{farg};
+      invoke-direct {{v6}}, L{farg};-><init>()V
+
+      const-string v3, "Initialized {tree}"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      invoke-virtual {{v6}}, L{farg};->touchAll()V
+
+      const-string v3, "All of {tree} hierarchy initialized"
+      invoke-virtual {{v2, v3}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Error; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v3
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  OUTPUT_FORMAT = """
+About to initialize {tree}
+{initialize_output}
+Initialized {tree}
+{touch_output}
+All of {tree} hierarchy initialized
+""".strip()
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def __str__(self):
+    """
+    Print the smali code for this test function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(),
+                                              farg=self.farg.get_name(),
+                                              tree = self.farg.get_tree())
+
+  def get_name(self):
+    """
+    Gets the name of this test function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def get_expected(self):
+    """
+    Get the expected output of this function.
+    """
+    return self.OUTPUT_FORMAT.format(
+        tree = self.farg.get_tree(),
+        initialize_output = self.farg.get_initialize_output().strip(),
+        touch_output = self.farg.get_touch_output().strip())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test interface initialization order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public class {class_name} implements {ifaces} {{
+#
+#   public {class_name}() {{
+#   }}
+.method public constructor <init>()V
+  .locals 2
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+#   public void marker() {{
+#     return;
+#   }}
+.method public marker()V
+  .locals 0
+  return-void
+.end method
+
+#   public void touchAll() {{
+.method public touchAll()V
+  .locals 2
+  sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  {touch_calls}
+  return-void
+.end method
+#   }}
+# }}
+"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  TOUCH_CALL_TEMPLATE = """
+#     System.out.println("{class_name} touching {iface_name}");
+#     {iface_name}.field.touch();
+      const-string v1, "{class_name} touching {iface_name}"
+      invoke-virtual {{v0, v1}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      sget-object v1, L{iface_name};->field:LDisplayer;
+      invoke-virtual {{v1}}, LDisplayer;->touch()V
+"""
+
+  TOUCH_OUTPUT_TEMPLATE = """
+{class_name} touching {iface_name}
+{touch_output}
+""".strip()
+
+  def __init__(self, ifaces):
+    """
+    Initialize this test class which implements the given interfaces
+    """
+    self.ifaces = ifaces
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{fname} {iftree}]".format(fname = self.get_name(), iftree = print_tree(self.ifaces))
+
+  def get_initialize_output(self):
+    return "\n".join(map(lambda i: i.get_initialize_output().strip(), dump_tree(self.ifaces)))
+
+  def get_touch_output(self):
+    return "\n".join(map(lambda a: self.TOUCH_OUTPUT_TEMPLATE.format(
+                                      class_name = self.class_name,
+                                      iface_name = a.get_name(),
+                                      touch_output = a.get_touch_output()).strip(),
+                         self.get_all_interfaces()))
+
+  def get_all_interfaces(self):
+    """
+    Returns a set of all interfaces this class transitively implements
+    """
+    return sorted(set(dump_tree(self.ifaces)))
+
+  def __str__(self):
+    """
+    Print the smali code for this class.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    touches  = '\n'.join(map(lambda a: self.TOUCH_CALL_TEMPLATE.format(class_name = self.class_name,
+                                                                       iface_name = a.get_name()),
+                             self.get_all_interfaces()))
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           implements_spec = s_ifaces,
+                                           ifaces = j_ifaces,
+                                           class_name = self.class_name,
+                                           touch_calls = touches)
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+#   public static final Displayer field = new Displayer("{tree}");
+.field public final static field:LDisplayer;
+
+.method static constructor <clinit>()V
+    .locals 3
+    const-string v2, "{tree}"
+    new-instance v1, LDisplayer;
+    invoke-direct {{v1, v2}}, LDisplayer;-><init>(Ljava/lang/String;)V
+    sput-object v1, L{class_name};->field:LDisplayer;
+    return-void
+.end method
+
+#   public void marker();
+.method public abstract marker()V
+.end method
+
+{funcs}
+
+# }}
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default void {class_name}_DEFAULT_FUNC() {{
+#     return;
+#   }}
+.method public {class_name}_DEFAULT_FUNC()V
+  .locals 0
+  return-void
+.end method
+"""
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  OUTPUT_TEMPLATE = "initialization of {tree}"
+
+  def __init__(self, ifaces, default):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = ifaces
+    self.default = default
+    end = "_DEFAULT" if default else ""
+    self.class_name = "INTERFACE_"+gensym()+end
+    self.cloned = False
+    self.initialized = False
+
+  def clone(self):
+    """
+    Clones this interface, returning a new one with the same structure but
+    different name.
+    """
+    return TestInterface(tuple(map(lambda a: a.clone(), self.ifaces)), self.default)
+
+  def get_name(self):
+    """
+    Gets the name of this interface
+    """
+    return self.class_name
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def get_initialize_output(self):
+    """
+    Returns the expected output upon the class that implements this interface being initialized.
+    """
+    if self.default and not self.initialized:
+      self.initialized = True
+      return self.OUTPUT_TEMPLATE.format(tree = self.get_tree())
+    else:
+      return ""
+
+  def get_touch_output(self):
+    """
+    Returns the expected output upon this interface being touched.
+    """
+    if not self.default and not self.initialized:
+      self.initialized = True
+      return self.OUTPUT_TEMPLATE.format(tree = self.get_tree())
+    else:
+      return ""
+
+  def __str__(self):
+    """
+    Print the smali code for this interface.
+    """
+    s_ifaces = '\n'.join(map(lambda a: self.IMPLEMENTS_TEMPLATE.format(iface_name = a.get_name()),
+                             self.ifaces))
+    j_ifaces = ', '.join(map(lambda a: a.get_name(), self.ifaces))
+    if self.default:
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(class_name = self.class_name)
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def dump_tree(ifaces):
+  """
+  Yields all the interfaces transitively implemented by the set in
+  reverse-depth-first order
+  """
+  for i in ifaces:
+    yield from dump_tree(i.ifaces)
+    yield i
+
+def print_tree(ifaces):
+  """
+  Prints the tree for the given ifaces.
+  """
+  return " ".join(i.get_tree() for i in  ifaces)
+
+def clone_all(l):
+  return tuple(a.clone() for a in l)
+
+# Cached output of subtree_sizes for speed of access.
+SUBTREES = [set(tuple(l) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for split in SUBTREES[num]:
+      ifaces = []
+      for sub in split:
+        ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestClass(clone_all(supers))
+      for i in range(len(set(dump_tree(supers)) - set(supers))):
+        ns = clone_all(supers)
+        selected = sorted(set(dump_tree(ns)) - set(ns))[i]
+        yield TestClass(tuple([selected] + list(ns)))
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    yield TestInterface(tuple(), False)
+    yield TestInterface(tuple(), True)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    for supers in itertools.product(*ifaces):
+      yield TestInterface(clone_all(supers), False)
+      yield TestInterface(clone_all(supers), True)
+      # TODO Should add on some from higher up the tree.
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in dump_tree(clazz.ifaces):
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index db16b97..9ff620b 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -41,8 +41,7 @@
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
   TEST_ART_RUN_TEST_DEPENDENCIES += \
-    $(JACK_JAR) \
-    $(JACK_LAUNCHER_JAR) \
+    $(JACK) \
     $(JILL_JAR)
 endif
 
@@ -60,16 +59,17 @@
   else
     run_test_options += --build-with-javac-dx
   endif
+  ifeq ($(ART_TEST_QUIET),true)
+    run_test_options += --quiet
+  endif
 $$(dmart_target): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES)
+$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
 	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
 	  JACK=$(abspath $(JACK)) \
-	  JACK_VM_COMMAND="$(JACK_VM) $(DEFAULT_JACK_VM_ARGS) $(JAVA_TMPDIR_ARG) -jar $(abspath $(JACK_LAUNCHER_JAR)) " \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
-	  JACK_JAR=$(abspath $(JACK_JAR)) \
 	  JILL_JAR=$(abspath $(JILL_JAR)) \
 	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
 	$(hide) touch $$@
@@ -214,13 +214,29 @@
   055-enum-performance \
   133-static-invoke-super
 
- # disable timing sensitive tests on "dist" builds.
+# disable timing sensitive tests on "dist" builds.
 ifdef dist_goal
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
         $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
 endif
 
+# Tests that require python3.
+TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS := \
+  960-default-smali \
+  961-default-iface-resolution-generated \
+  964-default-iface-init-generated \
+
+# Check if we have python3 to run our tests.
+ifeq ($(wildcard /usr/bin/python3),)
+  $(warning "No python3 found. Disabling tests: $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS)")
+
+  # Currently disable tests requiring python3 when it is not installed.
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS :=
 
 # Note 116-nodex2oat is not broken per-se it just doesn't (and isn't meant to) work with --prebuild.
@@ -286,10 +302,14 @@
 TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS :=
 
 # Tests that are broken with GC stress.
-# 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
-# hope the second process got into the expected state. The slowness of gcstress makes this bad.
+# * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
+#   hope the second process got into the expected state. The slowness of gcstress makes this bad.
+# * 961-default-iface-resolution-generated is a very long test that often will take more than the
+#   timeout to run when gcstress is enabled. This is because gcstress slows down allocations
+#   significantly which this test does a lot.
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  961-default-iface-resolution-generated
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -313,13 +333,15 @@
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES))
 
 # 131 is an old test. The functionality has been implemented at an earlier stage and is checked
-# in tests 138.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+# in tests 138. Blacklisted for debug builds since these builds have duplicate classes checks which
+# punt to interpreter.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),debug,$(PREBUILD_TYPES), \
     $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES))
 
-# 138-duplicate-classes-check. Turned off temporarily, b/21333911.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+# 138-duplicate-classes-check. Turned on for debug builds since debug builds have duplicate classes
+# checks enabled, b/2133391.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \
     $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),138-duplicate-classes-check,$(ALL_ADDRESS_SIZES))
 
@@ -422,53 +444,9 @@
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
     441-checker-inliner \
-    442-checker-constant-folding \
-    444-checker-nce \
-    445-checker-licm \
-    446-checker-inliner2 \
-    447-checker-inliner3 \
-    449-checker-bce \
-    450-checker-types \
-    455-checker-gvn \
-    458-checker-instruction-simplification \
-    462-checker-inlining-across-dex-files \
-    463-checker-boolean-simplifier \
-    464-checker-inline-sharpen-calls \
-    465-checker-clinit-gvn \
-    468-checker-bool-simplifier-regression \
-    473-checker-inliner-constants \
-    474-checker-boolean-input \
-    476-checker-ctor-memory-barrier \
-    477-checker-bound-type \
-    478-checker-clinit-check-pruning \
-    478-checker-inliner-nested-loop \
-    480-checker-dead-blocks \
-    482-checker-loop-back-edge-use \
-    484-checker-register-hints \
-    485-checker-dce-loop-update \
-    485-checker-dce-switch \
-    486-checker-must-do-null-check \
-    487-checker-inline-calls \
-    488-checker-inline-recursive-calls \
-    490-checker-inline \
-    492-checker-inline-invoke-interface \
-    493-checker-inline-invoke-interface \
-    494-checker-instanceof-tests \
-    495-checker-checkcast-tests \
-    496-checker-inlining-and-class-loader \
-    508-checker-disassembly \
     510-checker-try-catch \
-    517-checker-builder-fallthrough \
     521-checker-array-set-null \
-    522-checker-regression-monitor-exit \
-    523-checker-can-throw-regression \
-    525-checker-arrays-and-fields \
-    526-checker-caller-callee-regs \
-    529-checker-unresolved \
-    530-checker-loops \
-    530-checker-regression-reftype-final \
-    532-checker-nonnull-arrayset \
-    534-checker-bce-deoptimization \
+    536-checker-intrinsic-optimization \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -519,8 +497,10 @@
 
 # Tests that should fail in the read barrier configuration.
 # 137: Read barrier forces interpreter. Cannot run this with the interpreter.
+# 141: Class unloading test is flaky with CC since CC seems to occasionally keep class loaders live.
 TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  141-class-unload
 
 ifeq ($(ART_USE_READ_BARRIER),true)
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -669,13 +649,13 @@
     uc_host_or_target := HOST
     test_groups := ART_RUN_TEST_HOST_RULES
     run_test_options += --host
-    prereq_rule := $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES)
+    prereq_rule := $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(HOST_JACK_CLASSPATH_DEPENDENCIES)
     jack_classpath := $(HOST_JACK_CLASSPATH)
   else
     ifeq ($(1),target)
       uc_host_or_target := TARGET
       test_groups := ART_RUN_TEST_TARGET_RULES
-      prereq_rule := test-art-target-sync
+      prereq_rule := test-art-target-sync $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
       jack_classpath := $(TARGET_JACK_CLASSPATH)
     else
       $$(error found $(1) expected $(TARGET_TYPES))
@@ -881,6 +861,9 @@
   ifneq ($(ART_TEST_ANDROID_ROOT),)
     run_test_options := --android-root $(ART_TEST_ANDROID_ROOT) $$(run_test_options)
   endif
+  ifeq ($(ART_TEST_QUIET),true)
+    run_test_options += --quiet
+  endif
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath)
 .PHONY: $$(run_test_rule_name)
@@ -891,9 +874,7 @@
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
 	    JACK=$(abspath $(JACK)) \
-	    JACK_VM_COMMAND="$(JACK_VM) $(DEFAULT_JACK_VM_ARGS) $(JAVA_TMPDIR_ARG) -jar $(abspath $(JACK_LAUNCHER_JAR)) " \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
-	    JACK_JAR=$(abspath $(JACK_JAR)) \
 	    JILL_JAR=$(abspath $(JILL_JAR)) \
 	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(12) \
 	      && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
diff --git a/test/Lookup/A.java b/test/Lookup/A.java
new file mode 100644
index 0000000..666ba18
--- /dev/null
+++ b/test/Lookup/A.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class A {}
diff --git a/test/Lookup/AB.java b/test/Lookup/AB.java
new file mode 100644
index 0000000..b231708
--- /dev/null
+++ b/test/Lookup/AB.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class AB {}
diff --git a/test/Lookup/C.java b/test/Lookup/C.java
new file mode 100644
index 0000000..5b90069
--- /dev/null
+++ b/test/Lookup/C.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class C {}
diff --git a/test/etc/default-build b/test/etc/default-build
index c281bca..7242428 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -17,8 +17,45 @@
 # Stop if something fails.
 set -e
 
+# Set default values for directories.
+if [ -d smali ]; then
+  HAS_SMALI=true
+else
+  HAS_SMALI=false
+fi
+
+if [ -d src ]; then
+  HAS_SRC=true
+else
+  HAS_SRC=false
+fi
+
+if [ -d src2 ]; then
+  HAS_SRC2=true
+else
+  HAS_SRC2=false
+fi
+
+if [ -d src-multidex ]; then
+  HAS_SRC_MULTIDEX=true
+else
+  HAS_SRC_MULTIDEX=false
+fi
+
+if [ -d src-ex ]; then
+  HAS_SRC_EX=true
+else
+  HAS_SRC_EX=false
+fi
+
 DX_FLAGS=""
 SKIP_DX_MERGER="false"
+EXPERIMENTAL=""
+
+# Setup experimental flag mappings in a bash associative array.
+declare -A JACK_EXPERIMENTAL_ARGS
+JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8"
+JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8"
 
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
@@ -26,6 +63,27 @@
     option="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
+  elif [ "x$1" = "x--jvm" ]; then
+    shift
+  elif [ "x$1" = "x--no-src" ]; then
+    HAS_SRC=false
+    shift
+  elif [ "x$1" = "x--no-src2" ]; then
+    HAS_SRC2=false
+    shift
+  elif [ "x$1" = "x--no-src-multidex" ]; then
+    HAS_SRC_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-src-ex" ]; then
+    HAS_SRC_EX=false
+    shift
+  elif [ "x$1" = "x--no-smali" ]; then
+    HAS_SMALI=false
+    shift
+  elif [ "x$1" = "x--experimental" ]; then
+    shift
+    EXPERIMENTAL="${EXPERIMENTAL} $1"
+    shift
   elif expr "x$1" : "x--" >/dev/null 2>&1; then
     echo "unknown $0 option: $1" 1>&2
     exit 1
@@ -34,17 +92,22 @@
   fi
 done
 
+# Add args from the experimental mappings.
+for experiment in ${EXPERIMENTAL}; do
+  JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+done
+
 if [ -e classes.dex ]; then
   zip $TEST_NAME.jar classes.dex
   exit 0
 fi
 
-if ! [ -d src ] && ! [ -d src2 ]; then
+if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then
   # No src directory? Then forget about trying to run dx.
   SKIP_DX_MERGER="true"
 fi
 
-if [ -d src-multidex ]; then
+if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
   # Jack does not support this configuration unless we specify how to partition the DEX file
   # with a .jpp file.
   USE_JACK="false"
@@ -52,27 +115,29 @@
 
 if [ ${USE_JACK} = "true" ]; then
   # Jack toolchain
-  if [ -d src ]; then
-    ${JACK} --output-jack src.jack src
+  if [ "${HAS_SRC}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src.jack src
     imported_jack_files="--import src.jack"
   fi
 
-  if [ -d src2 ]; then
-    ${JACK} --output-jack src2.jack src2
+  if [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
     imported_jack_files="--import src2.jack ${imported_jack_files}"
   fi
 
   # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
   # class definitions from src2 first.
-  ${JACK} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+  fi
 else
   # Legacy toolchain with javac+dx
-  if [ -d src ]; then
+  if [ "${HAS_SRC}" = "true" ]; then
     mkdir classes
-    ${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+    ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
   fi
 
-  if [ -d src-multidex ]; then
+  if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
     mkdir classes2
     ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
     if [ ${NEED_DEX} = "true" ]; then
@@ -81,20 +146,22 @@
     fi
   fi
 
-  if [ -d src2 ]; then
+  if [ "${HAS_SRC2}" = "true" ]; then
     mkdir -p classes
-    ${JAVAC} -d classes `find src2 -name '*.java'`
+    ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'`
   fi
 
-  if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
-    ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
-      --dump-width=1000 ${DX_FLAGS} classes
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
+        --dump-width=1000 ${DX_FLAGS} classes
+    fi
   fi
 fi
 
-if [ -d smali ]; then
+if [ "${HAS_SMALI}" = "true" ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx256m --experimental --api-level 23 --output smali_classes.dex `find smali -name '*.smali'`
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
 
   # Don't bother with dexmerger if we provide our own main function in a smali file.
   if [ ${SKIP_DX_MERGER} = "false" ]; then
@@ -104,18 +171,18 @@
   fi
 fi
 
-if [ -d src-ex ]; then
+if [ ${HAS_SRC_EX} = "true" ]; then
   if [ ${USE_JACK} = "true" ]; then
       # Rename previous "classes.dex" so it is not overwritten.
       mv classes.dex classes-1.dex
       #TODO find another way to append src.jack to the jack classpath
-      ${JACK}:src.jack --output-dex . src-ex
+      ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex
       zip $TEST_NAME-ex.jar classes.dex
       # Restore previous "classes.dex" so it can be zipped.
       mv classes-1.dex classes.dex
   else
     mkdir classes-ex
-    ${JAVAC} -d classes-ex -cp classes `find src-ex -name '*.java'`
+    ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'`
     if [ ${NEED_DEX} = "true" ]; then
       ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \
         --dump-width=1000 ${DX_FLAGS} classes-ex
@@ -131,7 +198,7 @@
 fi
 
 # Create a single jar with two dex files for multidex.
-if [ -d src-multidex ]; then
+if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
   zip $TEST_NAME.jar classes.dex classes2.dex
 elif [ ${NEED_DEX} = "true" ]; then
   zip $TEST_NAME.jar classes.dex
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index fbefa07..18867fd 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -18,6 +18,7 @@
 DEBUGGER="n"
 DEV_MODE="n"
 DEX2OAT=""
+EXPERIMENTAL=""
 FALSE_BIN="/system/bin/false"
 FLAGS=""
 GDB=""
@@ -196,6 +197,13 @@
         FLAGS="${FLAGS} -Xcompiler-option --compile-pic"
         COMPILE_FLAGS="${COMPILE_FLAGS} --compile-pic"
         shift
+    elif [ "x$1" = "x--experimental" ]; then
+        if [ "$#" -lt 2 ]; then
+            echo "missing --experimental option" 1>&2
+            exit 1
+        fi
+        EXPERIMENTAL="$EXPERIMENTAL $2"
+        shift 2
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -204,6 +212,13 @@
     fi
 done
 
+if [ "$USE_JVM" = "n" ]; then
+    for feature in ${EXPERIMENTAL}; do
+        FLAGS="${FLAGS} -Xexperimental:${feature} -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:${feature}"
+        COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xexperimental:${feature}"
+    done
+fi
+
 if [ "x$1" = "x" ] ; then
   MAIN="Main"
 else
diff --git a/test/run-all-tests b/test/run-all-tests
index 13490c4..6d5c28c 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -41,12 +41,48 @@
     if [ "x$1" = "x--host" ]; then
         run_args="${run_args} --host"
         shift
+    elif [ "x$1" = "x--use-java-home" ]; then
+        run_args="${run_args} --use-java-home"
+        shift
+    elif [ "x$1" = "x--no-image" ]; then
+        run_args="${run_args} --no-image"
+        shift
+    elif [ "x$1" = "x--quick" ]; then
+        run_args="${run_args} --quick"
+        shift
+    elif [ "x$1" = "x--optimizing" ]; then
+        run_args="${run_args} --optimizing"
+        shift
+    elif [ "x$1" = "x--image" ]; then
+        run_args="${run_args} --image"
+        shift
+    elif [ "x$1" = "x--never-clean" ]; then
+        run_args="${run_args} --never-clean"
+        shift
     elif [ "x$1" = "x--jvm" ]; then
         run_args="${run_args} --jvm"
         shift
     elif [ "x$1" = "x--debug" ]; then
         run_args="${run_args} --debug"
         shift
+    elif [ "x$1" = "x--build-only" ]; then
+        run_args="${run_args} --build-only"
+        shift
+    elif [ "x$1" = "x--build-with-jack" ]; then
+        run_args="${run_args} --build-with-jack"
+        shift
+    elif [ "x$1" = "x--build-with-javac-dx" ]; then
+        run_args="${run_args} --build-with-javac-dx"
+        shift
+    elif [ "x$1" = "x--dex2oat-swap" ]; then
+        run_args="${run_args} --dex2oat-swap"
+        shift
+    elif [ "x$1" = "x--dalvik" ]; then
+        run_args="${run_args} --dalvik"
+        shift
+    elif [ "x$1" = "x--debuggable" ]; then
+        run_args="${run_args} --debuggable"
+        shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
         shift
@@ -56,15 +92,15 @@
     elif [ "x$1" = "x--jit" ]; then
         run_args="${run_args} --jit"
         shift
+    elif [ "x$1" = "x--verify-soft-fail" ]; then
+        run_args="${run_args} --verify-soft-fail"
+        shift
     elif [ "x$1" = "x--no-verify" ]; then
         run_args="${run_args} --no-verify"
         shift
     elif [ "x$1" = "x--no-optimize" ]; then
         run_args="${run_args} --no-optimize"
         shift
-    elif [ "x$1" = "x--valgrind" ]; then
-        run_args="${run_args} --valgrind"
-        shift
     elif [ "x$1" = "x--dev" ]; then
         run_args="${run_args} --dev"
         shift
@@ -113,6 +149,15 @@
     elif [ "x$1" = "x--always-clean" ]; then
         run_args="${run_args} --always-clean"
         shift
+    elif [ "x$1" = "x--pic-test" ]; then
+        run_args="${run_args} --pic-test"
+        shift
+    elif [ "x$1" = "x--pic-image" ]; then
+        run_args="${run_args} --pic-image"
+        shift
+    elif [ "x$1" = "x--strace" ]; then
+        run_args="${run_args} --strace"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -131,9 +176,13 @@
         echo "  Options are all passed to run-test; refer to that for " \
              "further documentation:"
         echo "    --debug --dev --host --interpreter --jit --jvm --no-optimize"
-        echo "    --no-verify -O --update --valgrind --zygote --64 --relocate"
-        echo "    --prebuild --always-clean --gcstress --gcverify --trace"
-        echo "    --no-patchoat --no-dex2oat"
+        echo "    --no-verify --verify-soft-fail -O --update --zygote --64"
+        echo "    --relocate --prebuild --always-clean --gcstress --gcverify"
+        echo "    --trace --no-patchoat --no-dex2oat --use-java-home --pic-image"
+        echo "    --pic-test --strace --debuggable --dalvik --dex2oat-swap"
+        echo "    --build-only --build-with-jack --build-with-javac-dx"
+        echo "    --never-clean --image --no-image --quick --optimizing"
+        echo "    --no-relocate --no-prebuild"
         echo "  Specific Runtime Options:"
         echo "    --seq                Run tests one-by-one, avoiding failures caused by busy CPU"
     ) 1>&2
diff --git a/test/run-test b/test/run-test
index a5b6e92..9b0261e 100755
--- a/test/run-test
+++ b/test/run-test
@@ -40,13 +40,13 @@
   tmp_dir="${TMPDIR}/$USER/${test_dir}"
 fi
 checker="${progdir}/../tools/checker/checker.py"
-
 export JAVA="java"
 export JAVAC="javac -g"
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
 export USE_JACK="false"
+export SMALI_ARGS="--experimental --api-level 23"
 
 # If dx was not set by the environment variable, assume it is in the path.
 if [ -z "$DX" ]; then
@@ -83,22 +83,9 @@
     export ANDROID_BUILD_TOP=$oldwd
 fi
 
-# If JACK_VM_COMMAND is not set, assume it launches the prebuilt jack-launcher.
-if [ -z "$JACK_VM_COMMAND" ]; then
-  if [ ! -z "$TMPDIR" ]; then
-    jack_temp_dir="-Djava.io.tmpdir=$TMPDIR"
-  fi
-  export JACK_VM_COMMAND="java -Dfile.encoding=UTF-8 -Xms2560m -XX:+TieredCompilation $jack_temp_dir -jar $ANDROID_BUILD_TOP/prebuilts/sdk/tools/jack-launcher.jar"
-fi
-
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
 if [ -z "$JACK_CLASSPATH" ]; then
-  export JACK_CLASSPATH="$ANDROID_BUILD_TOP/out/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack"
-fi
-
-# If JACK_JAR is not set, assume it is located in the prebuilts directory.
-if [ -z "$JACK_JAR" ]; then
-  export JACK_JAR="$ANDROID_BUILD_TOP/prebuilts/sdk/tools/jack.jar"
+  export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack"
 fi
 
 # If JILL_JAR is not set, assume it is located in the prebuilts directory.
@@ -123,6 +110,7 @@
 run_args="--quiet"
 build_args=""
 
+quiet="no"
 debuggable="no"
 prebuild_mode="yes"
 target_mode="yes"
@@ -155,6 +143,18 @@
         DEX_LOCATION=$tmp_dir
         run_args="${run_args} --host"
         shift
+    elif [ "x$1" = "x--quiet" ]; then
+        quiet="yes"
+        shift
+    elif [ "x$1" = "x--use-java-home" ]; then
+        if [ -n "${JAVA_HOME}" ]; then
+          export JAVA="${JAVA_HOME}/bin/java"
+          export JAVAC="${JAVA_HOME}/bin/javac -g"
+        else
+          echo "Passed --use-java-home without JAVA_HOME variable set!"
+          usage="yes"
+        fi
+        shift
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
         runtime="jvm"
@@ -162,6 +162,7 @@
         NEED_DEX="false"
         USE_JACK="false"
         run_args="${run_args} --jvm"
+        build_args="${build_args} --jvm"
         shift
     elif [ "x$1" = "x-O" ]; then
         lib="libart.so"
@@ -354,6 +355,29 @@
     fi
 done
 
+# Allocate file descriptor real_stderr and redirect it to the shell's error
+# output (fd 2).
+if [ ${BASH_VERSINFO[1]} -ge 4 ] && [ ${BASH_VERSINFO[2]} -ge 1 ]; then
+  exec {real_stderr}>&2
+else
+  # In bash before version 4.1 we need to do a manual search for free file
+  # descriptors.
+  FD=3
+  while [ -e /dev/fd/$FD ]; do FD=$((FD + 1)); done
+  real_stderr=$FD
+  eval "exec ${real_stderr}>&2"
+fi
+if [ "$quiet" = "yes" ]; then
+  # Force the default standard output and error to go to /dev/null so we will
+  # not print them.
+  exec 1>/dev/null
+  exec 2>/dev/null
+fi
+
+function err_echo() {
+  echo "$@" 1>&${real_stderr}
+}
+
 # tmp_dir may be relative, resolve.
 #
 # Cannot use realpath, as it does not exist on Mac.
@@ -386,7 +410,7 @@
         run_args="${run_args} --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin"
     fi
 elif [ "$trace_stream" = "true" ]; then
-    echo "Cannot use --stream without --trace."
+    err_echo "Cannot use --stream without --trace."
     exit 1
 fi
 
@@ -413,7 +437,7 @@
 if [ "$target_mode" = "no" ]; then
     if [ "$runtime" = "jvm" ]; then
         if [ "$prebuild_mode" = "yes" ]; then
-            echo "--prebuild with --jvm is unsupported";
+            err_echo "--prebuild with --jvm is unsupported"
             exit 1;
         fi
     fi
@@ -443,7 +467,7 @@
     if [ "$target_mode" = "no" ]; then
         # ANDROID_HOST_OUT is not set in a build environment.
         if [ -z "$ANDROID_HOST_OUT" ]; then
-            export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
+            export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
         fi
         guess_host_arch_name
         run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art"
@@ -465,7 +489,7 @@
 
 if [ "$have_image" = "no" ]; then
     if [ "$runtime" != "art" ]; then
-        echo "--no-image is only supported on the art runtime"
+        err_echo "--no-image is only supported on the art runtime"
         exit 1
     fi
     if [ "$target_mode" = "no" ]; then
@@ -488,7 +512,12 @@
 fi
 
 if [ "$dev_mode" = "yes" -a "$update_mode" = "yes" ]; then
-    echo "--dev and --update are mutually exclusive" 1>&2
+    err_echo "--dev and --update are mutually exclusive"
+    usage="yes"
+fi
+
+if [ "$dev_mode" = "yes" -a "$quiet" = "yes" ]; then
+    err_echo "--dev and --quiet are mutually exclusive"
     usage="yes"
 fi
 
@@ -502,7 +531,7 @@
     if [ '!' -d "$test_dir" ]; then
         td2=`echo ${test_dir}-*`
         if [ '!' -d "$td2" ]; then
-            echo "${test_dir}: no such test directory" 1>&2
+            err_echo "${test_dir}: no such test directory"
             usage="yes"
         fi
         test_dir="$td2"
@@ -531,6 +560,7 @@
         echo "    --debug               Wait for a debugger to attach."
         echo "    --debuggable          Whether to compile Java code for a debugger."
         echo "    --gdb                 Run under gdb; incompatible with some tests."
+        echo "    --gdb-arg             Pass an option to gdb."
         echo "    --build-only          Build test files only (off by default)."
         echo "    --build-with-javac-dx Build test files with javac and dx (on by default)."
         echo "    --build-with-jack     Build test files with jack and jill (off by default)."
@@ -556,14 +586,20 @@
         echo "                          the image and oat files be relocated to a random"
         echo "                          address before running. (default)"
         echo "    --no-relocate         Force the use of no relocating in the test"
+        echo "    --image               Run the test using a precompiled boot image. (default)"
+        echo "    --no-image            Run the test without a precompiled boot image."
         echo "    --host                Use the host-mode virtual machine."
         echo "    --invoke-with         Pass --invoke-with option to runtime."
         echo "    --dalvik              Use Dalvik (off by default)."
         echo "    --jvm                 Use a host-local RI virtual machine."
+        echo "    --use-java-home       Use the JAVA_HOME environment variable"
+        echo "                          to find the java compiler and runtime"
+        echo "                          (if applicable) to run the test with."
         echo "    --output-path [path]  Location where to store the build" \
              "files."
         echo "    --64                  Run the test in 64-bit mode"
         echo "    --trace               Run with method tracing"
+        echo "    --strace              Run with syscall tracing from strace."
         echo "    --stream              Run method tracing in streaming mode (requires --trace)"
         echo "    --gcstress            Run with gc stress testing"
         echo "    --gcverify            Run with gc verification"
@@ -573,7 +609,11 @@
         echo "    --dex2oat-swap        Use a dex2oat swap file."
         echo "    --instruction-set-features [string]"
         echo "                          Set instruction-set-features for compilation."
-    ) 1>&2
+        echo "    --pic-image           Use an image compiled with position independent code for the"
+        echo "                          boot class path."
+        echo "    --pic-test            Compile the test code position independent."
+        echo "    --quiet               Don't print anything except failure messages"
+    ) 1>&2  # Direct to stderr so usage is not printed if --quiet is set.
     exit 1
 fi
 
@@ -584,12 +624,12 @@
 td_expected="${test_dir}/${expected}"
 
 if [ ! -r $td_info ]; then
-    echo "${test_dir}: missing file $td_info" 1>&2
+    err_echo "${test_dir}: missing file $td_info"
     exit 1
 fi
 
 if [ ! -r $td_expected ]; then
-    echo "${test_dir}: missing file $td_expected" 1>&2
+    err_echo "${test_dir}: missing file $td_expected"
     exit 1
 fi
 
@@ -637,18 +677,24 @@
   # on a particular DEX output, keep building them with dx for now (b/19467889).
   USE_JACK="false"
 
-  if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" -a "$debuggable" = "no" ]; then
+  if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" ]; then
     # In no-prebuild mode, the compiler is only invoked if both dex2oat and
     # patchoat are available. Disable Checker otherwise (b/22552692).
     if [ "$prebuild_mode" = "yes" ] || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
       run_checker="yes"
+
       if [ "$target_mode" = "no" ]; then
         cfg_output_dir="$tmp_dir"
-        checker_arch_option="--arch=${host_arch_name^^}"
+        checker_args="--arch=${host_arch_name^^}"
       else
         cfg_output_dir="$DEX_LOCATION"
-        checker_arch_option="--arch=${target_arch_name^^}"
+        checker_args="--arch=${target_arch_name^^}"
       fi
+
+      if [ "$debuggable" = "yes" ]; then
+        checker_args="$checker_args --debuggable"
+      fi
+
       run_args="${run_args} -Xcompiler-option --dump-cfg=$cfg_output_dir/$cfg_output \
                             -Xcompiler-option -j1"
     fi
@@ -678,7 +724,7 @@
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
   if ! ulimit -S "$build_file_size_limit"; then
-    echo "ulimit file size setting failed"
+    err_echo "ulimit file size setting failed"
   fi
 fi
 
@@ -691,7 +737,7 @@
     echo "build exit status: $build_exit" 1>&2
     if [ "$build_exit" = '0' ]; then
         if ! ulimit -S "$run_file_size_limit"; then
-          echo "ulimit file size setting failed"
+          err_echo "ulimit file size setting failed"
         fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" 2>&1
@@ -702,12 +748,12 @@
                 if [ "$target_mode" = "yes" ]; then
                   adb pull $cfg_output_dir/$cfg_output &> /dev/null
                 fi
-                "$checker" $checker_arch_option "$cfg_output" "$tmp_dir" 2>&1
+                "$checker" $checker_args "$cfg_output" "$tmp_dir" 2>&1
                 checker_exit="$?"
                 if [ "$checker_exit" = "0" ]; then
                     good="yes"
                 fi
-                echo "checker exit status: $checker_exit" 1>&2
+                err_echo "checker exit status: $checker_exit"
             else
                 good="yes"
             fi
@@ -719,7 +765,7 @@
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
         if ! ulimit -S "$run_file_size_limit"; then
-          echo "ulimit file size setting failed"
+          err_echo "ulimit file size setting failed"
         fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
@@ -727,13 +773,13 @@
           if [ "$target_mode" = "yes" ]; then
             adb pull $cfg_output_dir/$cfg_output &> /dev/null
           fi
-          "$checker" -q $checker_arch_option "$cfg_output" "$tmp_dir" >> "$output" 2>&1
+          "$checker" -q $checker_args "$cfg_output" "$tmp_dir" >> "$output" 2>&1
         fi
         sed -e 's/[[:cntrl:]]$//g' < "$output" >"${td_expected}"
         good="yes"
     else
-        cat "$build_output" 1>&2
-        echo "build exit status: $build_exit" 1>&2
+        cat "$build_output" 1>&${real_stderr} 1>&2
+        err_echo "build exit status: $build_exit"
     fi
 elif [ "$build_only" = "yes" ]; then
     good="yes"
@@ -745,7 +791,7 @@
         diff --strip-trailing-cr -q "$expected" "$output" >/dev/null
         if [ "$?" '!=' "0" ]; then
             good="no"
-            echo "BUILD FAILED For ${TEST_NAME}"
+            err_echo "BUILD FAILED For ${TEST_NAME}"
         fi
     fi
     # Clean up extraneous files that are not used by tests.
@@ -756,22 +802,22 @@
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
         if ! ulimit -S "$run_file_size_limit"; then
-          echo "ulimit file size setting failed"
+          err_echo "ulimit file size setting failed"
         fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
         run_exit="$?"
         if [ "$run_exit" != "0" ]; then
-            echo "run exit status: $run_exit" 1>&2
+            err_echo "run exit status: $run_exit"
             good_run="no"
         elif [ "$run_checker" = "yes" ]; then
             if [ "$target_mode" = "yes" ]; then
               adb pull $cfg_output_dir/$cfg_output &> /dev/null
             fi
-            "$checker" -q $checker_arch_option "$cfg_output" "$tmp_dir" >> "$output" 2>&1
+            "$checker" -q $checker_args "$cfg_output" "$tmp_dir" >> "$output" 2>&1
             checker_exit="$?"
             if [ "$checker_exit" != "0" ]; then
-                echo "checker exit status: $checker_exit" 1>&2
+                err_echo "checker exit status: $checker_exit"
                 good_run="no"
             else
                 good_run="yes"
@@ -818,7 +864,7 @@
         echo ' '
     fi
 
-) 1>&2
+) 2>&${real_stderr} 1>&2
 
 # Clean up test files.
 if [ "$always_clean" = "yes" -o "$good" = "yes" ] && [ "$never_clean" = "no" ]; then
@@ -846,6 +892,6 @@
         fi
     fi
 
-) 1>&2
+) 2>&${real_stderr} 1>&2
 
 exit 1
diff --git a/test/utils/python/generate_smali_main.py b/test/utils/python/generate_smali_main.py
new file mode 100755
index 0000000..d796d31
--- /dev/null
+++ b/test/utils/python/generate_smali_main.py
@@ -0,0 +1,376 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali Main file from a classes.xml file.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright
+import testgen.mixins as mixins
+
+from collections import namedtuple
+import itertools
+import functools
+import xml.etree.ElementTree as ET
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A mainclass and main method for this test.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_groups}
+
+{test_funcs}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass
+    """
+    self.tests = set()
+    self.global_funcs = set()
+
+  def add_instance(self, it):
+    """
+    Add an instance test for the given class
+    """
+    self.tests.add(it)
+
+  def add_func(self, f):
+    """
+    Add a function to the class
+    """
+    self.global_funcs.add(f)
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print this class
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    for t in sorted(all_tests):
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    funcs = ""
+    for f in self.global_funcs:
+      funcs += str(f)
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           test_groups=test_groups,
+                                           main_func=main_func, test_funcs=funcs)
+
+
+class InstanceTest(mixins.Named, mixins.NameComparableMixin):
+  """
+  A method that runs tests for a particular concrete type, It calls the test
+  cases for running it in all possible ways.
+  """
+
+  INSTANCE_TEST_TEMPLATE = """
+#   public static void {test_name}() {{
+#     System.out.println("Testing for type {ty}");
+#     String s = "{ty}";
+#     {ty} v = new {ty}();
+.method public static {test_name}()V
+    .locals 3
+    sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Testing for type {ty}"
+    invoke-virtual {{v2,v0}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "{ty}"
+    new-instance v1, L{ty};
+    invoke-direct {{v1}}, L{ty};-><init>()V
+
+    {invokes}
+
+    const-string v0, "End testing for type {ty}"
+    invoke-virtual {{v2,v0}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+#     System.out.println("End testing for type {ty}");
+#   }}
+"""
+
+  TEST_INVOKE_TEMPLATE = """
+#     {fname}(s, v);
+    invoke-static {{v0, v1}}, {fname}(Ljava/lang/String;L{farg};)V
+"""
+
+  def __init__(self, main, ty):
+    """
+    Initialize this test group for the given type
+    """
+    self.ty = ty
+    self.main = main
+    self.funcs = set()
+    self.main.add_instance(self)
+
+  def get_name(self):
+    """
+    Get the name of this test group
+    """
+    return "TEST_NAME_"+self.ty
+
+  def add_func(self, f):
+    """
+    Add a test function to this test group
+    """
+    self.main.add_func(f)
+    self.funcs.add(f)
+
+  def __str__(self):
+    """
+    Returns the smali code for this function
+    """
+    func_invokes = ""
+    for f in sorted(self.funcs, key=lambda a: (a.func, a.farg)):
+      func_invokes += self.TEST_INVOKE_TEMPLATE.format(fname=f.get_name(),
+                                                       farg=f.farg)
+
+    return self.INSTANCE_TEST_TEMPLATE.format(test_name=self.get_name(), ty=self.ty,
+                                              invokes=func_invokes)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A single test case that attempts to invoke a function on receiver of a given type.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}(String s, {farg} v) {{
+#     try {{
+#       System.out.printf("%s-{invoke_type:<9} {farg:>9}.{callfunc}()='%s'\\n", s, v.{callfunc}());
+#       return;
+#     }} catch (Error e) {{
+#       System.out.printf("%s-{invoke_type} on {farg}: {callfunc}() threw exception!\\n", s);
+#       e.printStackTrace(System.out);
+#     }}
+#   }}
+.method public static {fname}(Ljava/lang/String;L{farg};)V
+    .locals 7
+    :call_{fname}_try_start
+      const/4 v0, 2
+      new-array v1,v0, [Ljava/lang/Object;
+      const/4 v0, 0
+      aput-object p0,v1,v0
+
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      const-string v3, "%s-{invoke_type:<9} {farg:>9}.{callfunc}()='%s'\\n"
+
+      invoke-{invoke_type} {{p1}}, L{farg};->{callfunc}()Ljava/lang/String;
+      move-result-object v4
+      const/4 v0, 1
+      aput-object v4, v1, v0
+
+      invoke-virtual {{v2,v3,v1}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/Error; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :error_{fname}_start
+      move-exception v3
+      const/4 v0, 1
+      new-array v1,v0, [Ljava/lang/Object;
+      const/4 v0, 0
+      aput-object p0, v1, v0
+      sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+      const-string v4, "%s-{invoke_type} on {farg}: {callfunc}() threw exception!\\n"
+      invoke-virtual {{v2,v4,v1}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
+      invoke-virtual {{v3,v2}}, Ljava/lang/Error;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  def __init__(self, func, farg, invoke):
+    """
+    Initialize this test function for the given invoke type and argument
+    """
+    self.func = func
+    self.farg = farg
+    self.invoke = invoke
+
+  def get_name(self):
+    """
+    Get the name of this test
+    """
+    return "Test_Func_{}_{}_{}".format(self.func, self.farg, self.invoke)
+
+  def __str__(self):
+    """
+    Get the smali code for this test function
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name(),
+                                              farg=self.farg,
+                                              invoke_type=self.invoke,
+                                              callfunc=self.func)
+
+def flatten_classes(classes, c):
+  """
+  Iterate over all the classes 'c' can be used as
+  """
+  while c:
+    yield c
+    c = classes.get(c.super_class)
+
+def flatten_class_methods(classes, c):
+  """
+  Iterate over all the methods 'c' can call
+  """
+  for c1 in flatten_classes(classes, c):
+    yield from c1.methods
+
+def flatten_interfaces(dat, c):
+  """
+  Iterate over all the interfaces 'c' transitively implements
+  """
+  def get_ifaces(cl):
+    for i2 in cl.implements:
+      yield dat.interfaces[i2]
+      yield from get_ifaces(dat.interfaces[i2])
+
+  for cl in flatten_classes(dat.classes, c):
+    yield from get_ifaces(cl)
+
+def flatten_interface_methods(dat, i):
+  """
+  Iterate over all the interface methods 'c' can call
+  """
+  yield from i.methods
+  for i2 in flatten_interfaces(dat, i):
+    yield from i2.methods
+
+def make_main_class(dat):
+  """
+  Creates a Main.smali file that runs all the tests
+  """
+  m = MainClass()
+  for c in dat.classes.values():
+    i = InstanceTest(m, c.name)
+    for clazz in flatten_classes(dat.classes, c):
+      for meth in flatten_class_methods(dat.classes, clazz):
+        i.add_func(Func(meth, clazz.name, 'virtual'))
+      for iface in flatten_interfaces(dat, clazz):
+        for meth in flatten_interface_methods(dat, iface):
+          i.add_func(Func(meth, clazz.name, 'virtual'))
+          i.add_func(Func(meth, iface.name, 'interface'))
+  return m
+
+class TestData(namedtuple("TestData", ['classes', 'interfaces'])):
+  """
+  A class representing the classes.xml document.
+  """
+  pass
+
+class Clazz(namedtuple("Clazz", ["name", "methods", "super_class", "implements"])):
+  """
+  A class representing a class element in the classes.xml document.
+  """
+  pass
+
+class IFace(namedtuple("IFace", ["name", "methods", "super_class", "implements"])):
+  """
+  A class representing an interface element in the classes.xml document.
+  """
+  pass
+
+def parse_xml(xml):
+  """
+  Parse the xml description of this test.
+  """
+  classes = dict()
+  ifaces  = dict()
+  root = ET.fromstring(xml)
+  for iface in root.find("interfaces"):
+    name = iface.attrib['name']
+    implements = [a.text for a in iface.find("implements")]
+    methods = [a.text for a in iface.find("methods")]
+    ifaces[name] = IFace(name = name,
+                         super_class = iface.attrib['super'],
+                         methods = methods,
+                         implements = implements)
+  for clazz in root.find('classes'):
+    name = clazz.attrib['name']
+    implements = [a.text for a in clazz.find("implements")]
+    methods = [a.text for a in clazz.find("methods")]
+    classes[name] = Clazz(name = name,
+                          super_class = clazz.attrib['super'],
+                          methods = methods,
+                          implements = implements)
+  return TestData(classes, ifaces)
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  class_data = parse_xml((smali_dir / "classes.xml").open().read())
+  make_main_class(class_data).dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/utils/python/testgen/mixins.py b/test/utils/python/testgen/mixins.py
new file mode 100644
index 0000000..085e51d
--- /dev/null
+++ b/test/utils/python/testgen/mixins.py
@@ -0,0 +1,135 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Common mixins and abstract base classes (ABCs) useful for writing test generators in python
+"""
+
+import abc
+import collections.abc
+import functools
+
+class Named(metaclass=abc.ABCMeta):
+  """
+  An abc that defines a get_name method.
+  """
+
+  @abc.abstractmethod
+  def get_name(self):
+    """
+    Returns a unique name to use as the identity for implementing comparisons.
+    """
+    pass
+
+class FileLike(metaclass=abc.ABCMeta):
+  """
+  An abc that defines get_file_name and get_file_extension methods.
+  """
+
+  @abc.abstractmethod
+  def get_file_name(self):
+    """Returns the filename this object represents"""
+    pass
+
+  @abc.abstractmethod
+  def get_file_extension(self):
+    """Returns the file extension of the file this object represents"""
+    pass
+
+@functools.lru_cache(maxsize=None)
+def get_file_extension_mixin(ext):
+  """
+  Gets a mixin that defines get_file_name(self) in terms of get_name(self) with the
+  given file extension.
+  """
+
+  class FExt(object):
+    """
+    A mixin defining get_file_name(self) in terms of get_name(self)
+    """
+
+    def get_file_name(self):
+      return self.get_name() + ext
+
+    def get_file_extension(self):
+      return ext
+
+  # Register the ABCs
+  Named.register(FExt)
+  FileLike.register(FExt)
+
+  return FExt
+
+class SmaliFileMixin(get_file_extension_mixin(".smali")):
+  """
+  A mixin that defines that the file this class belongs to is get_name() + ".smali".
+  """
+  pass
+
+class NameComparableMixin(object):
+  """
+  A mixin that defines the object comparison and related functionality in terms
+  of a get_name(self) function.
+  """
+
+  def __lt__(self, other):
+    return self.get_name() < other.get_name()
+
+  def __gt__(self, other):
+    return self.get_name() > other.get_name()
+
+  def __eq__(self, other):
+    return self.get_name() == other.get_name()
+
+  def __le__(self, other):
+    return self.get_name() <= other.get_name()
+
+  def __ge__(self, other):
+    return self.get_name() >= other.get_name()
+
+  def __ne__(self, other):
+    return self.get_name() != other.get_name()
+
+  def __hash__(self):
+    return hash(self.get_name())
+
+Named.register(NameComparableMixin)
+collections.abc.Hashable.register(NameComparableMixin)
+
+class DumpMixin(metaclass=abc.ABCMeta):
+  """
+  A mixin to add support for dumping the string representation of an object to a
+  file. Requires the get_file_name(self) method be defined.
+  """
+
+  @abc.abstractmethod
+  def __str__(self):
+    """
+    Returns the data to be printed to a file by dump.
+    """
+    pass
+
+  def dump(self, directory):
+    """
+    Dump this object to a file in the given directory
+    """
+    out_file = directory / self.get_file_name()
+    if out_file.exists():
+      out_file.unlink()
+    with out_file.open('w') as out:
+      print(str(self), file=out)
+
+FileLike.register(DumpMixin)
diff --git a/test/utils/python/testgen/utils.py b/test/utils/python/testgen/utils.py
new file mode 100644
index 0000000..769ad16
--- /dev/null
+++ b/test/utils/python/testgen/utils.py
@@ -0,0 +1,80 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Common functions useful for writing test generators in python
+"""
+
+import itertools
+import os
+import string
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# An iterator which yields strings made from lowercase letters. First yields
+# all 1 length strings, then all 2 and so on. It does this alphabetically.
+NAME_GEN = itertools.chain.from_iterable(
+    map(lambda n: itertools.product(string.ascii_lowercase, repeat=n),
+        itertools.count(1)))
+
+def gensym():
+  """
+  Returns a new, globally unique, identifier name that is a valid Java symbol
+  on each call.
+  """
+  return ''.join(next(NAME_GEN))
+
+def filter_blanks(s):
+  """
+  Takes a string returns the same string sans empty lines
+  """
+  return "\n".join(a for a in s.split("\n") if a.strip() != "")
+
+def get_copyright(filetype = "java"):
+  """
+  Returns the standard copyright header for the given filetype
+  """
+  if filetype == "smali":
+    return "\n".join(map(lambda a: "# " + a, get_copyright("java").split("\n")))
+  else:
+    fname = filetype + ".txt"
+    with (Path(BUILD_TOP)/"development"/"docs"/"copyright-templates"/fname).open() as template:
+      return "".join(template.readlines())
+
+def subtree_sizes(n):
+  """
+  A generator that yields a tuple containing a possible arrangement of subtree
+  nodes for a tree with a total of 'n' leaf nodes.
+  """
+  if n == 0:
+    return
+  elif n == 1:
+    yield (0,)
+  elif n == 2:
+    yield (1, 1)
+  else:
+    for prevt in subtree_sizes(n - 1):
+      prev = list(prevt)
+      yield tuple([1] + prev)
+      for i in range(len(prev)):
+        prev[i] += 1
+        yield tuple(prev)
+        prev[i] -= 1
+
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index 71366c1..6869b04 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -74,7 +74,7 @@
 AHAT_TEST_DUMP_DEPENDENCIES := \
 	$(ART_HOST_EXECUTABLES) \
 	$(HOST_OUT_EXECUTABLES)/art \
-	$(HOST_CORE_IMG_OUT_BASE)$(CORE_IMG_SUFFIX)
+	$(HOST_CORE_IMG_OUT_BASE)-optimizing-pic$(CORE_IMG_SUFFIX)
 
 $(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_ART := $(HOST_OUT_EXECUTABLES)/art
 $(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_DUMP_JAR := $(AHAT_TEST_DUMP_JAR)
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index a8e3884..d6f55aa 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -10,8 +10,6 @@
 
 TODO:
  * Add more tips to the help page.
-   - Note that only 'app' heap matters, not 'zygote' or 'image'.
-   - Say what a dex cache is.
    - Recommend how to start looking at a heap dump.
    - Say how to enable allocation sites.
    - Where to submit feedback, questions, and bug reports.
@@ -24,16 +22,12 @@
  * Show site context and heap and class filter in "Objects" view?
  * Have a menu at the top of an object view with links to the sections?
  * Include ahat version and hprof file in the menu at the top of the page?
+ * Show root types.
  * Heaped Table
    - Make sortable by clicking on headers.
    - Use consistent order for heap columns.
       Sometimes I see "app" first, sometimes last (from one heap dump to
       another) How about, always sort by name?
- * For long strings, limit the string length shown in the summary view to
-   something reasonable.  Say 50 chars, then add a "..." at the end.
- * For string summaries, if the string is an offset into a bigger byte array,
-   make sure to show just the part that's in the bigger byte array, not the
-   entire byte array.
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
  * Should help be part of an AhatHandler, that automatically gets the menu and
@@ -72,6 +66,9 @@
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
    showing all the instances.
+ * That InstanceUtils.asString properly takes into account "offset" and
+   "count" fields, if they are present.
+ * InstanceUtils.getDexCacheLocation
 
 Reported Issues:
  * Request to be able to sort tables by size.
@@ -87,7 +84,6 @@
    index.
  * What's the difference between getId and getUniqueId?
  * I see objects with duplicate references.
- * Don't store stack trace by heap (CL 157252)
  * A way to get overall retained size by heap.
  * A method Instance.isReachable()
 
@@ -98,6 +94,9 @@
  * Computing, for each instance, the other instances it dominates.
 
 Release History:
+ 0.2 Oct 20, 2015
+   Take into account 'count' and 'offset' when displaying strings.
+
  0.1ss Aug 04, 2015
    Enable stack allocations code (using custom modified perflib).
    Sort objects in 'objects/' with default sort.
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 3035ef7..43658f3 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -18,14 +18,12 @@
 
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
-import com.android.tools.perflib.heap.HprofParser;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.RootObj;
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
-import com.android.tools.perflib.heap.io.HprofBuffer;
-import com.android.tools.perflib.heap.io.MemoryMappedFileBuffer;
+import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import java.io.File;
@@ -56,8 +54,7 @@
    * Create an AhatSnapshot from an hprof file.
    */
   public static AhatSnapshot fromHprof(File hprof) throws IOException {
-    HprofBuffer buffer = new MemoryMappedFileBuffer(hprof);
-    Snapshot snapshot = (new HprofParser(buffer)).parse();
+    Snapshot snapshot = Snapshot.createSnapshot(new MemoryMappedFileBuffer(hprof));
     snapshot.computeDominators();
     return new AhatSnapshot(snapshot);
   }
@@ -185,20 +182,17 @@
 
   // Return the stack where the given instance was allocated.
   private static StackTrace getStack(Instance inst) {
-    // TODO: return inst.getStack() once perflib is fixed.
-    return null;
+    return inst.getStack();
   }
 
   // Return the list of stack frames for a stack trace.
   private static StackFrame[] getStackFrames(StackTrace stack) {
-    // TODO: Use stack.getFrames() once perflib is fixed.
-    return null;
+    return stack.getFrames();
   }
 
   // Return the serial number of the given stack trace.
   private static int getStackTraceSerialNumber(StackTrace stack) {
-    // TODO: Use stack.getSerialNumber() once perflib is fixed.
-    return 0;
+    return stack.getSerialNumber();
   }
 
   // Get the site associated with the given stack id and depth.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index a6ac3b8..7fa53c7 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -37,22 +37,6 @@
   }
 
   /**
-   * Read the char[] value from an hprof Instance.
-   * Returns null if the object can't be interpreted as a char[].
-   */
-  private static char[] asCharArray(Instance inst) {
-    if (! (inst instanceof ArrayInstance)) {
-      return null;
-    }
-
-    ArrayInstance array = (ArrayInstance) inst;
-    if (array.getArrayType() != Type.CHAR) {
-      return null;
-    }
-    return array.asCharArray(0, array.getValues().length);
-  }
-
-  /**
    * Read the byte[] value from an hprof Instance.
    * Returns null if the instance is not a byte array.
    */
@@ -76,14 +60,52 @@
   }
 
 
-  // Read the string value from an hprof Instance.
-  // Returns null if the object can't be interpreted as a string.
+  /**
+   * Read the string value from an hprof Instance.
+   * Returns null if the object can't be interpreted as a string.
+   */
   public static String asString(Instance inst) {
+    return asString(inst, -1);
+  }
+
+  /**
+   * Read the string value from an hprof Instance.
+   * Returns null if the object can't be interpreted as a string.
+   * The returned string is truncated to maxChars characters.
+   * If maxChars is negative, the returned string is not truncated.
+   */
+  public static String asString(Instance inst, int maxChars) {
     if (!isInstanceOfClass(inst, "java.lang.String")) {
       return null;
     }
-    char[] value = getCharArrayField(inst, "value");
-    return (value == null) ? null : new String(value);
+
+    Object value = getField(inst, "value");
+    if (!(value instanceof ArrayInstance)) {
+      return null;
+    }
+
+    ArrayInstance chars = (ArrayInstance) value;
+    if (chars.getArrayType() != Type.CHAR) {
+      return null;
+    }
+
+    // TODO: When perflib provides a better way to get the length of the
+    // array, we should use that here.
+    int numChars = chars.getValues().length;
+    int count = getIntField(inst, "count", numChars);
+    if (count == 0) {
+      return "";
+    }
+    if (0 <= maxChars && maxChars < count) {
+      count = maxChars;
+    }
+
+    int offset = getIntField(inst, "offset", 0);
+    int end = offset + count - 1;
+    if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
+      return new String(chars.asCharArray(offset, count));
+    }
+    return null;
   }
 
   /**
@@ -175,6 +197,15 @@
   }
 
   /**
+   * Read an int field of an instance, returning a default value if the field
+   * was not an int or could not be read.
+   */
+  private static int getIntField(Instance inst, String fieldName, int def) {
+    Integer value = getIntField(inst, fieldName);
+    return value == null ? def : value;
+  }
+
+  /**
    * Read the given field from the given instance.
    * The field is assumed to be a byte[] field.
    * Returns null if the field value is null, not a byte[] or could not be read.
@@ -187,14 +218,6 @@
     return asByteArray((Instance)value);
   }
 
-  private static char[] getCharArrayField(Instance inst, String fieldName) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Instance)) {
-      return null;
-    }
-    return asCharArray((Instance)value);
-  }
-
   // Return the bitmap instance associated with this object, or null if there
   // is none. This works for android.graphics.Bitmap instances and their
   // underlying Byte[] instances.
@@ -221,16 +244,36 @@
     return null;
   }
 
+  private static boolean isJavaLangRefReference(Instance inst) {
+    ClassObj cls = (inst == null) ? null : inst.getClassObj();
+    while (cls != null) {
+      if ("java.lang.ref.Reference".equals(cls.getClassName())) {
+        return true;
+      }
+      cls = cls.getSuperClassObj();
+    }
+    return false;
+  }
+
+  public static Instance getReferent(Instance inst) {
+    if (isJavaLangRefReference(inst)) {
+      return getRefField(inst, "referent");
+    }
+    return null;
+  }
+
   /**
    * Assuming inst represents a DexCache object, return the dex location for
    * that dex cache. Returns null if the given instance doesn't represent a
    * DexCache object or the location could not be found.
+   * If maxChars is non-negative, the returned location is truncated to
+   * maxChars in length.
    */
-  public static String getDexCacheLocation(Instance inst) {
+  public static String getDexCacheLocation(Instance inst, int maxChars) {
     if (isInstanceOfClass(inst, "java.lang.DexCache")) {
       Instance location = getRefField(inst, "location");
       if (location != null) {
-        return asString(location);
+        return asString(location, maxChars);
       }
     }
     return null;
diff --git a/tools/ahat/src/Value.java b/tools/ahat/src/Value.java
index 9b483fa..7c969b3 100644
--- a/tools/ahat/src/Value.java
+++ b/tools/ahat/src/Value.java
@@ -25,6 +25,10 @@
  */
 class Value {
 
+  // For string literals, we limit the number of characters we show to
+  // kMaxChars in case the string is really long.
+  private static int kMaxChars = 200;
+
   /**
    * Create a DocString representing a summary of the given instance.
    */
@@ -41,21 +45,36 @@
     }
 
     link.append(inst.toString());
+    URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
+    DocString formatted = DocString.link(objTarget, link);
 
     // Annotate Strings with their values.
-    String stringValue = InstanceUtils.asString(inst);
+    String stringValue = InstanceUtils.asString(inst, kMaxChars);
     if (stringValue != null) {
-      link.appendFormat("\"%s\"", stringValue);
+      formatted.appendFormat(" \"%s", stringValue);
+      formatted.append(kMaxChars == stringValue.length() ? "..." : "\"");
+    }
+
+    // Annotate Reference with its referent
+    Instance referent = InstanceUtils.getReferent(inst);
+    if (referent != null) {
+      formatted.append(" for ");
+
+      // It should not be possible for a referent to refer back to the
+      // reference object, even indirectly, so there shouldn't be any issues
+      // with infinite recursion here.
+      formatted.append(renderInstance(referent));
     }
 
     // Annotate DexCache with its location.
-    String dexCacheLocation = InstanceUtils.getDexCacheLocation(inst);
+    String dexCacheLocation = InstanceUtils.getDexCacheLocation(inst, kMaxChars);
     if (dexCacheLocation != null) {
-      link.append(" for " + dexCacheLocation);
+      formatted.appendFormat(" for %s", dexCacheLocation);
+      if (kMaxChars == dexCacheLocation.length()) {
+        formatted.append("...");
+      }
     }
 
-    URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
-    DocString formatted = DocString.link(objTarget, link);
 
     // Annotate bitmaps with a thumbnail.
     Instance bitmap = InstanceUtils.getAssociatedBitmapInstance(inst);
diff --git a/tools/ahat/src/help.html b/tools/ahat/src/help.html
index b48d791..b7ae2ce 100644
--- a/tools/ahat/src/help.html
+++ b/tools/ahat/src/help.html
@@ -54,3 +54,38 @@
     </ul>
   </li>
 </ul>
+
+<h2>Tips:</h2>
+<h3>Heaps</h3>
+<p>
+Android heap dumps contain information for multiple heaps. The <b>app</b> heap
+is the memory used by your application. The <b>zygote</b> and <b>image</b>
+heaps are used by the system. You should ignore everything in the zygote and
+image heap and look only at the app heap. This is because changes in your
+application will not effect the zygote or image heaps, and because the zygote
+and image heaps are shared, they don't contribute significantly to your
+applications PSS.
+</p>
+
+<h3>Bitmaps</h3>
+<p>
+Bitmaps store their data using byte[] arrays. Whenever you see a large
+byte[], check if it is a bitmap by looking to see if there is a single
+android.graphics.Bitmap object referring to it. The byte[] will be marked as a
+root, but it is really being retained by the android.graphics.Bitmap object.
+</p>
+
+<h3>DexCaches</h3>
+<p>
+For each DexFile you load, there will be a corresponding DexCache whose size
+is proportional to the number of strings, fields, methods, and classes in your
+dex file. The DexCache entries may or may not be visible depending on the
+version of the Android platform the heap dump is from.
+</p>
+
+<h3>FinalizerReferences</h3>
+<p>
+A FinalizerReference is allocated for every object on the heap that has a
+non-trivial finalizer. These are stored in a linked list reachable from the
+FinalizerReference class object.
+</p>
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index 7efb1a7..421de17 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 0.2
+Implementation-Version: 0.3
 Main-Class: com.android.ahat.Main
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index cea1dc1..7b8774a 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -16,6 +16,9 @@
 
 import dalvik.system.VMDebug;
 import java.io.IOException;
+import java.lang.ref.PhantomReference;
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
 
 /**
  * Program used to create a heap dump for test purposes.
@@ -33,6 +36,9 @@
     public String basicString = "hello, world";
     public String nullString = null;
     public Object anObject = new Object();
+    public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
+    public PhantomReference aPhantomReference = new PhantomReference(anObject, referenceQueue);
+    public WeakReference aWeakReference = new WeakReference(anObject, referenceQueue);
   }
 
   public static void main(String[] args) throws IOException {
diff --git a/tools/ahat/test/InstanceUtilsTest.java b/tools/ahat/test/InstanceUtilsTest.java
index 7613df4..32f48ce 100644
--- a/tools/ahat/test/InstanceUtilsTest.java
+++ b/tools/ahat/test/InstanceUtilsTest.java
@@ -25,24 +25,67 @@
 
 public class InstanceUtilsTest {
   @Test
-  public void basicString() throws IOException {
+  public void asStringBasic() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
     assertEquals("hello, world", InstanceUtils.asString(str));
   }
 
   @Test
-  public void nullString() throws IOException {
+  public void asStringTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello", InstanceUtils.asString(str, 5));
+  }
+
+  @Test
+  public void asStringExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello, world", InstanceUtils.asString(str, 12));
+  }
+
+  @Test
+  public void asStringNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello, world", InstanceUtils.asString(str, 50));
+  }
+
+  @Test
+  public void asStringNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("basicString");
+    assertEquals("hello, world", InstanceUtils.asString(str, -3));
+  }
+
+  @Test
+  public void asStringNull() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance obj = (Instance)dump.getDumpedThing("nullString");
     assertNull(InstanceUtils.asString(obj));
   }
 
   @Test
-  public void notString() throws IOException {
+  public void asStringNotString() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance obj = (Instance)dump.getDumpedThing("anObject");
     assertNotNull(obj);
     assertNull(InstanceUtils.asString(obj));
   }
+
+  @Test
+  public void basicReference() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    Instance pref = (Instance)dump.getDumpedThing("aPhantomReference");
+    Instance wref = (Instance)dump.getDumpedThing("aWeakReference");
+    Instance referent = (Instance)dump.getDumpedThing("anObject");
+    assertNotNull(pref);
+    assertNotNull(wref);
+    assertNotNull(referent);
+    assertEquals(referent, InstanceUtils.getReferent(pref));
+    assertEquals(referent, InstanceUtils.getReferent(wref));
+    assertNull(InstanceUtils.getReferent(referent));
+  }
 }
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index de9b35d..631e0a0 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -19,7 +19,9 @@
   exit 1
 fi
 
-common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests"
+out_dir=${OUT_DIR-out}
+java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
+common_targets="vogar vogar.jar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
@@ -44,9 +46,9 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg $showcommands build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets ${out_dir}/host/linux-x86/lib/libjavacoretests.so ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb"
 fi
 
 echo "Executing $make_command"
diff --git a/tools/checker/checker.py b/tools/checker/checker.py
index bc5e17d..2e9faba 100755
--- a/tools/checker/checker.py
+++ b/tools/checker/checker.py
@@ -36,7 +36,9 @@
   parser.add_argument("--dump-pass", dest="dump_pass", metavar="PASS",
                       help="print a compiler pass dump")
   parser.add_argument("--arch", dest="arch", choices=archs_list,
-                      help="Run the tests for the specified target architecture.")
+                      help="Run tests for the specified target architecture.")
+  parser.add_argument("--debuggable", action="store_true",
+                      help="Run tests for debuggable code.")
   parser.add_argument("-q", "--quiet", action="store_true",
                       help="print only errors")
   return parser.parse_args()
@@ -83,13 +85,13 @@
     Logger.fail("Source path \"" + path + "\" not found")
 
 
-def RunTests(checkPrefix, checkPath, outputFilename, targetArch):
+def RunTests(checkPrefix, checkPath, outputFilename, targetArch, debuggableMode):
   c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
   for checkFilename in FindCheckerFiles(checkPath):
     checkerFile = ParseCheckerStream(os.path.basename(checkFilename),
                                      checkPrefix,
                                      open(checkFilename, "r"))
-    MatchFiles(checkerFile, c1File, targetArch)
+    MatchFiles(checkerFile, c1File, targetArch, debuggableMode)
 
 
 if __name__ == "__main__":
@@ -103,4 +105,4 @@
   elif args.dump_pass:
     DumpPass(args.tested_file, args.dump_pass)
   else:
-    RunTests(args.check_prefix, args.source_path, args.tested_file, args.arch)
+    RunTests(args.check_prefix, args.source_path, args.tested_file, args.arch, args.debuggable)
diff --git a/tools/checker/common/archs.py b/tools/checker/common/archs.py
index 84bded9..178e0b5 100644
--- a/tools/checker/common/archs.py
+++ b/tools/checker/common/archs.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-archs_list = ['ARM', 'ARM64', 'MIPS64', 'X86', 'X86_64']
+archs_list = ['ARM', 'ARM64', 'MIPS', 'MIPS64', 'X86', 'X86_64']
diff --git a/tools/checker/file_format/checker/parser.py b/tools/checker/file_format/checker/parser.py
index 446302f..f199a50 100644
--- a/tools/checker/file_format/checker/parser.py
+++ b/tools/checker/file_format/checker/parser.py
@@ -22,7 +22,7 @@
 def __isCheckerLine(line):
   return line.startswith("///") or line.startswith("##")
 
-def __extractLine(prefix, line, arch = None):
+def __extractLine(prefix, line, arch = None, debuggable = False):
   """ Attempts to parse a check line. The regex searches for a comment symbol
       followed by the CHECK keyword, given attribute and a colon at the very
       beginning of the line. Whitespaces are ignored.
@@ -30,10 +30,11 @@
   rIgnoreWhitespace = r"\s*"
   rCommentSymbols = [r"///", r"##"]
   arch_specifier = r"-%s" % arch if arch is not None else r""
+  dbg_specifier = r"-DEBUGGABLE" if debuggable else r""
   regexPrefix = rIgnoreWhitespace + \
                 r"(" + r"|".join(rCommentSymbols) + r")" + \
                 rIgnoreWhitespace + \
-                prefix + arch_specifier + r":"
+                prefix + arch_specifier + dbg_specifier + r":"
 
   # The 'match' function succeeds only if the pattern is matched at the
   # beginning of the line.
@@ -56,10 +57,11 @@
 
   # Lines beginning with 'CHECK-START' start a new test case.
   # We currently only consider the architecture suffix in "CHECK-START" lines.
-  for arch in [None] + archs_list:
-    startLine = __extractLine(prefix + "-START", line, arch)
-    if startLine is not None:
-      return None, startLine, arch
+  for debuggable in [True, False]:
+    for arch in [None] + archs_list:
+      startLine = __extractLine(prefix + "-START", line, arch, debuggable)
+      if startLine is not None:
+        return None, startLine, (arch, debuggable)
 
   # Lines starting only with 'CHECK' are matched in order.
   plainLine = __extractLine(prefix, line)
@@ -167,9 +169,11 @@
   fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix, fileName)
   fnLineOutsideChunk = lambda line, lineNo: \
       Logger.fail("Checker line not inside a group", fileName, lineNo)
-  for caseName, caseLines, startLineNo, testArch in \
+  for caseName, caseLines, startLineNo, testData in \
       SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
-    testCase = TestCase(checkerFile, caseName, startLineNo, testArch)
+    testArch = testData[0]
+    forDebuggable = testData[1]
+    testCase = TestCase(checkerFile, caseName, startLineNo, testArch, forDebuggable)
     for caseLine in caseLines:
       ParseCheckerAssertion(testCase, caseLine[0], caseLine[1], caseLine[2])
   return checkerFile
diff --git a/tools/checker/file_format/checker/struct.py b/tools/checker/file_format/checker/struct.py
index 7ee09cd..a31aa54 100644
--- a/tools/checker/file_format/checker/struct.py
+++ b/tools/checker/file_format/checker/struct.py
@@ -36,7 +36,7 @@
 
 class TestCase(PrintableMixin):
 
-  def __init__(self, parent, name, startLineNo, testArch = None):
+  def __init__(self, parent, name, startLineNo, testArch = None, forDebuggable = False):
     assert isinstance(parent, CheckerFile)
 
     self.parent = parent
@@ -44,6 +44,7 @@
     self.assertions = []
     self.startLineNo = startLineNo
     self.testArch = testArch
+    self.forDebuggable = forDebuggable
 
     if not self.name:
       Logger.fail("Test case does not have a name", self.fileName, self.startLineNo)
diff --git a/tools/checker/file_format/checker/test.py b/tools/checker/file_format/checker/test.py
index 495dabc..579c190 100644
--- a/tools/checker/file_format/checker/test.py
+++ b/tools/checker/file_format/checker/test.py
@@ -290,7 +290,7 @@
           /// CHECK-NEXT: bar
         """)
 
-class CheckerParser_ArchTests(unittest.TestCase):
+class CheckerParser_SuffixTests(unittest.TestCase):
 
   noarch_block = """
                   /// CHECK-START: Group
@@ -308,11 +308,12 @@
                   /// CHECK-DAG:   yoyo
                 """
 
+  def parse(self, checkerText):
+    return ParseCheckerStream("<test_file>", "CHECK", io.StringIO(ToUnicode(checkerText)))
+
   def test_NonArchTests(self):
     for arch in [None] + archs_list:
-      checkerFile = ParseCheckerStream("<test-file>",
-                                       "CHECK",
-                                       io.StringIO(ToUnicode(self.noarch_block)))
+      checkerFile = self.parse(self.noarch_block)
       self.assertEqual(len(checkerFile.testCases), 1)
       self.assertEqual(len(checkerFile.testCases[0].assertions), 4)
 
@@ -320,9 +321,7 @@
     for targetArch in archs_list:
       for testArch in [a for a in archs_list if a != targetArch]:
         checkerText = self.arch_block.format(test_arch = testArch)
-        checkerFile = ParseCheckerStream("<test-file>",
-                                         "CHECK",
-                                         io.StringIO(ToUnicode(checkerText)))
+        checkerFile = self.parse(checkerText)
         self.assertEqual(len(checkerFile.testCases), 1)
         self.assertEqual(len(checkerFile.testCasesForArch(testArch)), 1)
         self.assertEqual(len(checkerFile.testCasesForArch(targetArch)), 0)
@@ -330,13 +329,42 @@
   def test_Arch(self):
     for arch in archs_list:
       checkerText = self.arch_block.format(test_arch = arch)
-      checkerFile = ParseCheckerStream("<test-file>",
-                                       "CHECK",
-                                       io.StringIO(ToUnicode(checkerText)))
+      checkerFile = self.parse(checkerText)
       self.assertEqual(len(checkerFile.testCases), 1)
       self.assertEqual(len(checkerFile.testCasesForArch(arch)), 1)
       self.assertEqual(len(checkerFile.testCases[0].assertions), 4)
 
+  def test_NoDebugAndArch(self):
+    testCase = self.parse("""
+        /// CHECK-START: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertFalse(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, None)
+
+  def test_SetDebugNoArch(self):
+    testCase = self.parse("""
+        /// CHECK-START-DEBUGGABLE: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertTrue(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, None)
+
+  def test_NoDebugSetArch(self):
+    testCase = self.parse("""
+        /// CHECK-START-ARM: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertFalse(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, "ARM")
+
+  def test_SetDebugAndArch(self):
+    testCase = self.parse("""
+        /// CHECK-START-ARM-DEBUGGABLE: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertTrue(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, "ARM")
 
 class CheckerParser_EvalTests(unittest.TestCase):
   def parseTestCase(self, string):
diff --git a/tools/checker/match/file.py b/tools/checker/match/file.py
index 6601a1e..3ded074 100644
--- a/tools/checker/match/file.py
+++ b/tools/checker/match/file.py
@@ -159,10 +159,13 @@
     matchFrom = match.scope.end + 1
     variables = match.variables
 
-def MatchFiles(checkerFile, c1File, targetArch):
+def MatchFiles(checkerFile, c1File, targetArch, debuggableMode):
   for testCase in checkerFile.testCases:
     if testCase.testArch not in [None, targetArch]:
       continue
+    if testCase.forDebuggable != debuggableMode:
+      continue
+
     # TODO: Currently does not handle multiple occurrences of the same group
     # name, e.g. when a pass is run multiple times. It will always try to
     # match a check group against the first output group of the same name.
diff --git a/tools/checker/run_unit_tests.py b/tools/checker/run_unit_tests.py
index 2e8f208..a0d274d 100755
--- a/tools/checker/run_unit_tests.py
+++ b/tools/checker/run_unit_tests.py
@@ -19,7 +19,7 @@
 from file_format.checker.test      import CheckerParser_PrefixTest, \
                                           CheckerParser_TestExpressionTest, \
                                           CheckerParser_FileLayoutTest, \
-                                          CheckerParser_ArchTests, \
+                                          CheckerParser_SuffixTests, \
                                           CheckerParser_EvalTests
 from match.test                    import MatchLines_Test, \
                                           MatchFiles_Test
diff --git a/tools/cpplint.py b/tools/cpplint.py
index 4f063d9..308dd8c 100755
--- a/tools/cpplint.py
+++ b/tools/cpplint.py
@@ -90,6 +90,7 @@
 _USAGE = """
 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
                    [--counting=total|toplevel|detailed]
+                   [--quiet]
         <file> [file] ...
 
   The style guidelines this tries to follow are those in
@@ -115,6 +116,9 @@
     verbose=#
       Specify a number 0-5 to restrict errors to certain verbosity levels.
 
+    quiet
+      Don't print anything if no errors are found.
+
     filter=-x,+y,...
       Specify a comma-separated list of category-filters to apply: only
       error messages whose category names pass the filters will be printed.
@@ -558,6 +562,9 @@
     self.filters = _DEFAULT_FILTERS[:]
     self.counting = 'total'  # In what way are we counting errors?
     self.errors_by_category = {}  # string to int dict storing error counts
+    # BEGIN android-added
+    self.quiet = False      # global setting.
+    # END android-added
 
     # output format:
     # "emacs" - format that emacs can parse (default)
@@ -568,6 +575,14 @@
     """Sets the output format for errors."""
     self.output_format = output_format
 
+  # BEGIN android-added
+  def SetQuiet(self, level):
+    """Sets the module's quiet setting, and returns the previous setting."""
+    last_quiet = self.quiet
+    self.quiet = level
+    return last_quiet
+  # END android-added
+
   def SetVerboseLevel(self, level):
     """Sets the module's verbosity, and returns the previous setting."""
     last_verbose_level = self.verbose_level
@@ -638,6 +653,17 @@
   _cpplint_state.SetOutputFormat(output_format)
 
 
+# BEGIN android-added
+def _Quiet():
+  """Returns the module's quiet setting."""
+  return _cpplint_state.quiet
+
+
+def _SetQuiet(level):
+  """Sets the module's quiet status, and returns the previous setting."""
+  return _cpplint_state.SetQuiet(level)
+# END android-added
+
 def _VerboseLevel():
   """Returns the module's verbosity setting."""
   return _cpplint_state.verbose_level
@@ -3888,6 +3914,9 @@
   """
 
   _SetVerboseLevel(vlevel)
+# BEGIN android-added
+  old_errors = _cpplint_state.error_count
+# END android-added
 
   try:
     # Support the UNIX convention of using "-" for stdin.  Note that
@@ -3938,8 +3967,11 @@
             'One or more unexpected \\r (^M) found;'
             'better to use only a \\n')
 
-  sys.stderr.write('Done processing %s\n' % filename)
-
+# BEGIN android-changed
+  # sys.stderr.write('Done processing %s\n' % filename)
+  if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
+    sys.stderr.write('Done processing %s\n' % filename)
+# END android-changed
 
 def PrintUsage(message):
   """Prints a brief usage string and exits, optionally with an error message.
@@ -3977,6 +4009,9 @@
   try:
     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
                                                  'stdout', # TODO(enh): added --stdout
+                                                 # BEGIN android-added
+                                                 'quiet',
+                                                 # END android-added
                                                  'counting=',
                                                  'filter=',
                                                  'root='])
@@ -3987,6 +4022,9 @@
   output_format = _OutputFormat()
   output_stream = sys.stderr # TODO(enh): added --stdout
   filters = ''
+  # BEGIN android-added
+  quiet = _Quiet()
+  # END android-added
   counting_style = ''
 
   for (opt, val) in opts:
@@ -3994,6 +4032,10 @@
       PrintUsage(None)
     elif opt == '--stdout': # TODO(enh): added --stdout
       output_stream = sys.stdout # TODO(enh): added --stdout
+    # BEGIN android-added
+    elif opt == '--quiet':
+      quiet = True
+    # END android-added
     elif opt == '--output':
       if not val in ('emacs', 'vs7', 'eclipse'):
         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
@@ -4019,6 +4061,9 @@
   _SetVerboseLevel(verbosity)
   _SetFilters(filters)
   _SetCountingStyle(counting_style)
+  # BEGIN android-added
+  _SetQuiet(quiet)
+  # END android-added
   sys.stderr = output_stream # TODO(enh): added --stdout
 
   return filenames
@@ -4037,7 +4082,11 @@
   _cpplint_state.ResetErrorCounts()
   for filename in filenames:
     ProcessFile(filename, _cpplint_state.verbose_level)
-  _cpplint_state.PrintErrorCounts()
+  # BEGIN android-changed
+  # _cpplint_state.PrintErrorCounts()
+  if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
+    _cpplint_state.PrintErrorCounts()
+  # END android-changed
 
   sys.exit(_cpplint_state.error_count > 0)
 
diff --git a/tools/dmtracedump/Android.mk b/tools/dmtracedump/Android.mk
new file mode 100644
index 0000000..da0d632
--- /dev/null
+++ b/tools/dmtracedump/Android.mk
@@ -0,0 +1,32 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Java method trace dump tool
+
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := cc
+LOCAL_SRC_FILES := tracedump.cc
+LOCAL_CFLAGS += -O0 -g -Wall
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_MODULE := dmtracedump
+include $(BUILD_HOST_EXECUTABLE)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := cc
+LOCAL_SRC_FILES := createtesttrace.cc
+LOCAL_CFLAGS += -O0 -g -Wall
+LOCAL_MODULE := create_test_dmtrace
+include $(BUILD_HOST_EXECUTABLE)
diff --git a/tools/dmtracedump/createtesttrace.cc b/tools/dmtracedump/createtesttrace.cc
new file mode 100644
index 0000000..444cce4
--- /dev/null
+++ b/tools/dmtracedump/createtesttrace.cc
@@ -0,0 +1,449 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Create a test file in the format required by dmtrace.
+ */
+#include "profile.h"  // from VM header
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+/*
+ * Values from the header of the data file.
+ */
+typedef struct DataHeader {
+  uint32_t magic;
+  int16_t version;
+  int16_t offsetToData;
+  int64_t startWhen;
+} DataHeader;
+
+#define VERSION 2
+int32_t versionNumber = VERSION;
+int32_t verbose = 0;
+
+DataHeader header = {0x574f4c53, VERSION, sizeof(DataHeader), 0LL};
+
+const char* versionHeader = "*version\n";
+const char* clockDef = "clock=thread-cpu\n";
+
+const char* keyThreads =
+    "*threads\n"
+    "1      main\n"
+    "2      foo\n"
+    "3      bar\n"
+    "4      blah\n";
+
+const char* keyEnd = "*end\n";
+
+typedef struct dataRecord {
+  uint32_t time;
+  int32_t threadId;
+  uint32_t action; /* 0=entry, 1=exit, 2=exception exit */
+  char* fullName;
+  char* className;
+  char* methodName;
+  char* signature;
+  uint32_t methodId;
+} dataRecord;
+
+dataRecord* records;
+
+#define BUF_SIZE 1024
+char buf[BUF_SIZE];
+
+typedef struct stack {
+  dataRecord** frames;
+  int32_t indentLevel;
+} stack;
+
+/* Mac OS doesn't have strndup(), so implement it here.
+ */
+char* strndup(const char* src, size_t len) {
+  char* dest = new char[len + 1];
+  strncpy(dest, src, len);
+  dest[len] = 0;
+  return dest;
+}
+
+/*
+ * Parse the input file.  It looks something like this:
+ * # This is a comment line
+ * 4  1 A
+ * 6  1  B
+ * 8  1  B
+ * 10 1 A
+ *
+ * where the first column is the time, the second column is the thread id,
+ * and the third column is the method (actually just the class name).  The
+ * number of spaces between the 2nd and 3rd columns is the indentation and
+ * determines the call stack.  Each called method must be indented by one
+ * more space.  In the example above, A is called at time 4, A calls B at
+ * time 6, B returns at time 8, and A returns at time 10.  Thread 1 is the
+ * only thread that is running.
+ *
+ * An alternative file format leaves out the first two columns:
+ * A
+ *  B
+ *  B
+ * A
+ *
+ * In this file format, the thread id is always 1, and the time starts at
+ * 2 and increments by 2 for each line.
+ */
+void parseInputFile(const char* inputFileName) {
+  FILE* inputFp = fopen(inputFileName, "r");
+  if (inputFp == nullptr) {
+    perror(inputFileName);
+    exit(1);
+  }
+
+  /* Count the number of lines in the buffer */
+  int32_t numRecords = 0;
+  int32_t maxThreadId = 1;
+  int32_t maxFrames = 0;
+  char* indentEnd;
+  while (fgets(buf, BUF_SIZE, inputFp)) {
+    char* cp = buf;
+    if (*cp == '#') continue;
+    numRecords += 1;
+    if (isdigit(*cp)) {
+      while (isspace(*cp)) cp += 1;
+      int32_t threadId = strtoul(cp, &cp, 0);
+      if (maxThreadId < threadId) maxThreadId = threadId;
+    }
+    indentEnd = cp;
+    while (isspace(*indentEnd)) indentEnd += 1;
+    if (indentEnd - cp + 1 > maxFrames) maxFrames = indentEnd - cp + 1;
+  }
+  int32_t numThreads = maxThreadId + 1;
+
+  /* Add space for a sentinel record at the end */
+  numRecords += 1;
+  records = new dataRecord[numRecords];
+  stack* callStack = new stack[numThreads];
+  for (int32_t ii = 0; ii < numThreads; ++ii) {
+    callStack[ii].frames = nullptr;
+    callStack[ii].indentLevel = 0;
+  }
+
+  rewind(inputFp);
+
+  uint32_t time = 0;
+  int32_t linenum = 0;
+  int32_t nextRecord = 0;
+  int32_t indentLevel = 0;
+  while (fgets(buf, BUF_SIZE, inputFp)) {
+    uint32_t threadId;
+    int32_t len;
+    int32_t indent;
+    int32_t action;
+    char* save_cp;
+
+    linenum += 1;
+    char* cp = buf;
+
+    /* Skip lines that start with '#' */
+    if (*cp == '#') continue;
+
+    /* Get time and thread id */
+    if (!isdigit(*cp)) {
+      /* If the line does not begin with a digit, then fill in
+       * default values for the time and threadId.
+       */
+      time += 2;
+      threadId = 1;
+    } else {
+      time = strtoul(cp, &cp, 0);
+      while (isspace(*cp)) cp += 1;
+      threadId = strtoul(cp, &cp, 0);
+      cp += 1;
+    }
+
+    // Allocate space for the thread stack, if necessary
+    if (callStack[threadId].frames == nullptr) {
+      dataRecord** stk = new dataRecord*[maxFrames];
+      callStack[threadId].frames = stk;
+    }
+    indentLevel = callStack[threadId].indentLevel;
+
+    save_cp = cp;
+    while (isspace(*cp)) {
+      cp += 1;
+    }
+    indent = cp - save_cp + 1;
+    records[nextRecord].time = time;
+    records[nextRecord].threadId = threadId;
+
+    save_cp = cp;
+    while (*cp != '\n') cp += 1;
+
+    /* Remove trailing spaces */
+    cp -= 1;
+    while (isspace(*cp)) cp -= 1;
+    cp += 1;
+    len = cp - save_cp;
+    records[nextRecord].fullName = strndup(save_cp, len);
+
+    /* Parse the name to support "class.method signature" */
+    records[nextRecord].className = nullptr;
+    records[nextRecord].methodName = nullptr;
+    records[nextRecord].signature = nullptr;
+    cp = strchr(save_cp, '.');
+    if (cp) {
+      len = cp - save_cp;
+      if (len > 0) records[nextRecord].className = strndup(save_cp, len);
+      save_cp = cp + 1;
+      cp = strchr(save_cp, ' ');
+      if (cp == nullptr) cp = strchr(save_cp, '\n');
+      if (cp && cp > save_cp) {
+        len = cp - save_cp;
+        records[nextRecord].methodName = strndup(save_cp, len);
+        save_cp = cp + 1;
+        cp = strchr(save_cp, ' ');
+        if (cp == nullptr) cp = strchr(save_cp, '\n');
+        if (cp && cp > save_cp) {
+          len = cp - save_cp;
+          records[nextRecord].signature = strndup(save_cp, len);
+        }
+      }
+    }
+
+    if (verbose) {
+      printf("Indent: %d; IndentLevel: %d; Line: %s", indent, indentLevel, buf);
+    }
+
+    action = 0;
+    if (indent == indentLevel + 1) {  // Entering a method
+      if (verbose) printf("  Entering %s\n", records[nextRecord].fullName);
+      callStack[threadId].frames[indentLevel] = &records[nextRecord];
+    } else if (indent == indentLevel) {  // Exiting a method
+      // Exiting method must be currently on top of stack (unless stack is
+      // empty)
+      if (callStack[threadId].frames[indentLevel - 1] == nullptr) {
+        if (verbose)
+          printf("  Exiting %s (past bottom of stack)\n",
+                 records[nextRecord].fullName);
+        callStack[threadId].frames[indentLevel - 1] = &records[nextRecord];
+        action = 1;
+      } else {
+        if (indentLevel < 1) {
+          fprintf(stderr, "Error: line %d: %s", linenum, buf);
+          fprintf(stderr, "  expected positive (>0) indentation, found %d\n",
+                  indent);
+          exit(1);
+        }
+        char* name = callStack[threadId].frames[indentLevel - 1]->fullName;
+        if (strcmp(name, records[nextRecord].fullName) == 0) {
+          if (verbose) printf("  Exiting %s\n", name);
+          action = 1;
+        } else {  // exiting method doesn't match stack's top method
+          fprintf(stderr, "Error: line %d: %s", linenum, buf);
+          fprintf(stderr, "  expected exit from %s\n",
+                  callStack[threadId].frames[indentLevel - 1]->fullName);
+          exit(1);
+        }
+      }
+    } else {
+      if (nextRecord != 0) {
+        fprintf(stderr, "Error: line %d: %s", linenum, buf);
+        fprintf(stderr, "  expected indentation %d [+1], found %d\n",
+                indentLevel, indent);
+        exit(1);
+      }
+
+      if (verbose) {
+        printf("  Nonzero indent at first record\n");
+        printf("  Entering %s\n", records[nextRecord].fullName);
+      }
+
+      // This is the first line of data, so we allow a larger
+      // initial indent.  This allows us to test popping off more
+      // frames than we entered.
+      indentLevel = indent - 1;
+      callStack[threadId].frames[indentLevel] = &records[nextRecord];
+    }
+
+    if (action == 0)
+      indentLevel += 1;
+    else
+      indentLevel -= 1;
+    records[nextRecord].action = action;
+    callStack[threadId].indentLevel = indentLevel;
+
+    nextRecord += 1;
+  }
+
+  /* Mark the last record with a sentinel */
+  memset(&records[nextRecord], 0, sizeof(dataRecord));
+}
+
+/*
+ * Write values to the binary data file.
+ */
+void write2LE(FILE* fp, uint16_t val) {
+  putc(val & 0xff, fp);
+  putc(val >> 8, fp);
+}
+
+void write4LE(FILE* fp, uint32_t val) {
+  putc(val & 0xff, fp);
+  putc((val >> 8) & 0xff, fp);
+  putc((val >> 16) & 0xff, fp);
+  putc((val >> 24) & 0xff, fp);
+}
+
+void write8LE(FILE* fp, uint64_t val) {
+  putc(val & 0xff, fp);
+  putc((val >> 8) & 0xff, fp);
+  putc((val >> 16) & 0xff, fp);
+  putc((val >> 24) & 0xff, fp);
+  putc((val >> 32) & 0xff, fp);
+  putc((val >> 40) & 0xff, fp);
+  putc((val >> 48) & 0xff, fp);
+  putc((val >> 56) & 0xff, fp);
+}
+
+void writeDataRecord(FILE* dataFp, int32_t threadId, uint32_t methodVal, uint32_t elapsedTime) {
+  if (versionNumber == 1)
+    putc(threadId, dataFp);
+  else
+    write2LE(dataFp, threadId);
+  write4LE(dataFp, methodVal);
+  write4LE(dataFp, elapsedTime);
+}
+
+void writeDataHeader(FILE* dataFp) {
+  struct timeval tv;
+  struct timezone tz;
+
+  gettimeofday(&tv, &tz);
+  uint64_t startTime = tv.tv_sec;
+  startTime = (startTime << 32) | tv.tv_usec;
+  header.version = versionNumber;
+  write4LE(dataFp, header.magic);
+  write2LE(dataFp, header.version);
+  write2LE(dataFp, header.offsetToData);
+  write8LE(dataFp, startTime);
+}
+
+void writeKeyMethods(FILE* keyFp) {
+  const char* methodStr = "*methods\n";
+  fwrite(methodStr, strlen(methodStr), 1, keyFp);
+
+  /* Assign method ids in multiples of 4 */
+  uint32_t methodId = 0;
+  for (dataRecord* pRecord = records; pRecord->fullName; ++pRecord) {
+    if (pRecord->methodId) continue;
+    uint32_t id = ++methodId << 2;
+    pRecord->methodId = id;
+
+    /* Assign this id to all the other records that have the
+     * same name.
+     */
+    for (dataRecord* pNext = pRecord + 1; pNext->fullName; ++pNext) {
+      if (pNext->methodId) continue;
+      if (strcmp(pRecord->fullName, pNext->fullName) == 0) pNext->methodId = id;
+    }
+    if (pRecord->className == nullptr || pRecord->methodName == nullptr) {
+      fprintf(keyFp, "%#x        %s      m       ()\n", pRecord->methodId,
+              pRecord->fullName);
+    } else if (pRecord->signature == nullptr) {
+      fprintf(keyFp, "%#x        %s      %s      ()\n", pRecord->methodId,
+              pRecord->className, pRecord->methodName);
+    } else {
+      fprintf(keyFp, "%#x        %s      %s      %s\n", pRecord->methodId,
+              pRecord->className, pRecord->methodName, pRecord->signature);
+    }
+  }
+}
+
+void writeKeys(FILE* keyFp) {
+  fprintf(keyFp, "%s%d\n%s", versionHeader, versionNumber, clockDef);
+  fwrite(keyThreads, strlen(keyThreads), 1, keyFp);
+  writeKeyMethods(keyFp);
+  fwrite(keyEnd, strlen(keyEnd), 1, keyFp);
+}
+
+void writeDataRecords(FILE* dataFp) {
+  for (dataRecord* pRecord = records; pRecord->fullName; ++pRecord) {
+    uint32_t val = METHOD_COMBINE(pRecord->methodId, pRecord->action);
+    writeDataRecord(dataFp, pRecord->threadId, val, pRecord->time);
+  }
+}
+
+void writeTrace(const char* traceFileName) {
+  FILE* fp = fopen(traceFileName, "w");
+  if (fp == nullptr) {
+    perror(traceFileName);
+    exit(1);
+  }
+  writeKeys(fp);
+  writeDataHeader(fp);
+  writeDataRecords(fp);
+  fclose(fp);
+}
+
+int32_t parseOptions(int32_t argc, char** argv) {
+  int32_t err = 0;
+  while (1) {
+    int32_t opt = getopt(argc, argv, "v:d");
+    if (opt == -1) break;
+    switch (opt) {
+      case 'v':
+        versionNumber = strtoul(optarg, nullptr, 0);
+        if (versionNumber != 1 && versionNumber != 2) {
+          fprintf(stderr, "Error: version number (%d) must be 1 or 2\n", versionNumber);
+          err = 1;
+        }
+        break;
+      case 'd':
+        verbose = 1;
+        break;
+      default:
+        err = 1;
+        break;
+    }
+  }
+  return err;
+}
+
+int32_t main(int32_t argc, char** argv) {
+  char* inputFile;
+  char* traceFileName = nullptr;
+
+  if (parseOptions(argc, argv) || argc - optind != 2) {
+    fprintf(stderr, "Usage: %s [-v version] [-d] input_file trace_prefix\n", argv[0]);
+    exit(1);
+  }
+
+  inputFile = argv[optind++];
+  parseInputFile(inputFile);
+  traceFileName = argv[optind++];
+
+  writeTrace(traceFileName);
+
+  return 0;
+}
diff --git a/tools/dmtracedump/dmtracedump.pl b/tools/dmtracedump/dmtracedump.pl
new file mode 100755
index 0000000..6e487c6
--- /dev/null
+++ b/tools/dmtracedump/dmtracedump.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl
+
+opendir(DIR, ".") || die "can't opendir $some_dir: $!";
+@traces = grep { /.*\.dmtrace\.data/ } readdir(DIR);
+
+foreach (@traces)
+{
+    $input = $_;
+    $input =~ s/\.data$//;
+
+    $output = "$input.html";
+
+    print("dmtracedump -h -p $input > $output\n");
+    system("dmtracedump -h -p '$input' > '$output'");
+
+}
+
+closedir DIR;
diff --git a/tools/dmtracedump/dumpdir.sh b/tools/dmtracedump/dumpdir.sh
new file mode 100644
index 0000000..81992a2
--- /dev/null
+++ b/tools/dmtracedump/dumpdir.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+FILES=`ls $1/*.data | sed "s/^\\(.*\\).data$/\\1/"`
+
+mkdir -p $2
+
+for F in $FILES
+do
+    G=$2/`echo $F | sed "s/.*\\///g"`.html
+    dmtracedump -h -p $F > $G
+done
diff --git a/tools/dmtracedump/profile.h b/tools/dmtracedump/profile.h
new file mode 100644
index 0000000..8182352
--- /dev/null
+++ b/tools/dmtracedump/profile.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Android's method call profiling goodies.
+ */
+#ifndef ART_TOOLS_DMTRACEDUMP_PROFILE_H_
+#define ART_TOOLS_DMTRACEDUMP_PROFILE_H_
+
+/*
+ * Enumeration for the two "action" bits.
+ */
+enum {
+  METHOD_TRACE_ENTER = 0x00,   // method entry
+  METHOD_TRACE_EXIT = 0x01,    // method exit
+  METHOD_TRACE_UNROLL = 0x02,  // method exited by exception unrolling
+  // 0x03 currently unused
+};
+
+#define TOKEN_CHAR '*'
+
+/*
+ * Common definitions, shared with the dump tool.
+ */
+#define METHOD_ACTION_MASK 0x03 /* two bits */
+#define METHOD_ID(_method) ((_method) & (~METHOD_ACTION_MASK))
+#define METHOD_ACTION(_method) (((unsigned int)(_method)) & METHOD_ACTION_MASK)
+#define METHOD_COMBINE(_method, _action) ((_method) | (_action))
+
+#endif  // ART_TOOLS_DMTRACEDUMP_PROFILE_H_
diff --git a/tools/dmtracedump/tracedump.cc b/tools/dmtracedump/tracedump.cc
new file mode 100644
index 0000000..f70e2c2
--- /dev/null
+++ b/tools/dmtracedump/tracedump.cc
@@ -0,0 +1,2616 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Process dmtrace output.
+ *
+ * This is the wrong way to go about it -- C is a clumsy language for
+ * shuffling data around.  It'll do for a first pass.
+ */
+#include "profile.h"  // from VM header
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+/* Version number in the key file.
+ * Version 1 uses one byte for the thread id.
+ * Version 2 uses two bytes for the thread ids.
+ * Version 3 encodes the record size and adds an optional extra timestamp field.
+ */
+int32_t versionNumber;
+
+/* arbitrarily limit indentation */
+#define MAX_STACK_DEPTH 10000
+
+/* thread list in key file is not reliable, so just max out */
+#define MAX_THREADS 32768
+
+/* Size of temporary buffers for escaping html strings */
+#define HTML_BUFSIZE 10240
+
+const char* htmlHeader =
+    "<html>\n<head>\n<script type=\"text/javascript\" "
+    "src=\"%ssortable.js\"></script>\n"
+    "<script langugage=\"javascript\">\n"
+    "function toggle(item) {\n"
+    "    obj=document.getElementById(item);\n"
+    "    visible=(obj.style.display!=\"none\" && obj.style.display!=\"\");\n"
+    "    key=document.getElementById(\"x\" + item);\n"
+    "    if (visible) {\n"
+    "        obj.style.display=\"none\";\n"
+    "        key.innerHTML=\"+\";\n"
+    "    } else {\n"
+    "        obj.style.display=\"block\";\n"
+    "        key.innerHTML=\"-\";\n"
+    "    }\n"
+    "}\n"
+    "function onMouseOver(obj) {\n"
+    "    obj.style.background=\"lightblue\";\n"
+    "}\n"
+    "function onMouseOut(obj) {\n"
+    "    obj.style.background=\"white\";\n"
+    "}\n"
+    "</script>\n"
+    "<style type=\"text/css\">\n"
+    "div { font-family: courier; font-size: 13 }\n"
+    "div.parent { margin-left: 15; display: none }\n"
+    "div.leaf { margin-left: 10 }\n"
+    "div.header { margin-left: 10 }\n"
+    "div.link { margin-left: 10; cursor: move }\n"
+    "span.parent { padding-right: 10; }\n"
+    "span.leaf { padding-right: 10; }\n"
+    "a img { border: 0;}\n"
+    "table.sortable th { border-width: 0px 1px 1px 1px; background-color: "
+    "#ccc;}\n"
+    "a { text-decoration: none; }\n"
+    "a:hover { text-decoration: underline; }\n"
+    "table.sortable th, table.sortable td { text-align: left;}"
+    "table.sortable tr.odd td { background-color: #ddd; }\n"
+    "table.sortable tr.even td { background-color: #fff; }\n"
+    "</style>\n"
+    "</head><body>\n\n";
+
+const char* htmlFooter = "\n</body>\n</html>\n";
+const char* profileSeparator =
+    "======================================================================";
+
+const char* tableHeader =
+    "<table class='sortable' id='%s'><tr>\n"
+    "<th>Method</th>\n"
+    "<th>Run 1 (us)</th>\n"
+    "<th>Run 2 (us)</th>\n"
+    "<th>Diff (us)</th>\n"
+    "<th>Diff (%%)</th>\n"
+    "<th>1: # calls</th>\n"
+    "<th>2: # calls</th>\n"
+    "</tr>\n";
+
+const char* tableHeaderMissing =
+    "<table class='sortable' id='%s'>\n"
+    "<th>Method</th>\n"
+    "<th>Exclusive</th>\n"
+    "<th>Inclusive</th>\n"
+    "<th># calls</th>\n";
+
+#define GRAPH_LABEL_VISITED 0x0001
+#define GRAPH_NODE_VISITED 0x0002
+
+/*
+ * Values from the header of the data file.
+ */
+typedef struct DataHeader {
+  uint32_t magic;
+  int16_t version;
+  int16_t offsetToData;
+  int64_t startWhen;
+  int16_t recordSize;
+} DataHeader;
+
+/*
+ * Entry from the thread list.
+ */
+typedef struct ThreadEntry {
+  int32_t threadId;
+  const char* threadName;
+} ThreadEntry;
+
+struct MethodEntry;
+typedef struct TimedMethod {
+  struct TimedMethod* next;
+  uint64_t elapsedInclusive;
+  int32_t numCalls;
+  struct MethodEntry* method;
+} TimedMethod;
+
+typedef struct ClassEntry {
+  const char* className;
+  uint64_t elapsedExclusive;
+  int32_t numMethods;
+  struct MethodEntry** methods; /* list of methods in this class */
+  int32_t numCalls[2];              /* 0=normal, 1=recursive */
+} ClassEntry;
+
+typedef struct UniqueMethodEntry {
+  uint64_t elapsedExclusive;
+  int32_t numMethods;
+  struct MethodEntry** methods; /* list of methods with same name */
+  int32_t numCalls[2];              /* 0=normal, 1=recursive */
+} UniqueMethodEntry;
+
+/*
+ * Entry from the method list.
+ */
+typedef struct MethodEntry {
+  int64_t methodId;
+  const char* className;
+  const char* methodName;
+  const char* signature;
+  const char* fileName;
+  int32_t lineNum;
+  uint64_t elapsedExclusive;
+  uint64_t elapsedInclusive;
+  uint64_t topExclusive; /* non-recursive exclusive time */
+  uint64_t recursiveInclusive;
+  struct TimedMethod* parents[2];  /* 0=normal, 1=recursive */
+  struct TimedMethod* children[2]; /* 0=normal, 1=recursive */
+  int32_t numCalls[2];             /* 0=normal, 1=recursive */
+  int32_t index;                   /* used after sorting to number methods */
+  int32_t recursiveEntries;        /* number of entries on the stack */
+  int32_t graphState; /* used when graphing to see if this method has been visited before */
+} MethodEntry;
+
+/*
+ * The parsed contents of the key file.
+ */
+typedef struct DataKeys {
+  char* fileData; /* contents of the entire file */
+  int64_t fileLen;
+  int32_t numThreads;
+  ThreadEntry* threads;
+  int32_t numMethods;
+  MethodEntry* methods; /* 2 extra methods: "toplevel" and "unknown" */
+} DataKeys;
+
+#define TOPLEVEL_INDEX 0
+#define UNKNOWN_INDEX 1
+
+typedef struct StackEntry {
+  MethodEntry* method;
+  uint64_t entryTime;
+} StackEntry;
+
+typedef struct CallStack {
+  int32_t top;
+  StackEntry calls[MAX_STACK_DEPTH];
+  uint64_t lastEventTime;
+  uint64_t threadStartTime;
+} CallStack;
+
+typedef struct DiffEntry {
+  MethodEntry* method1;
+  MethodEntry* method2;
+  int64_t differenceExclusive;
+  int64_t differenceInclusive;
+  double differenceExclusivePercentage;
+  double differenceInclusivePercentage;
+} DiffEntry;
+
+// Global options
+typedef struct Options {
+  const char* traceFileName;
+  const char* diffFileName;
+  const char* graphFileName;
+  int32_t keepDotFile;
+  int32_t dump;
+  int32_t outputHtml;
+  const char* sortableUrl;
+  int32_t threshold;
+} Options;
+
+typedef struct TraceData {
+  int32_t numClasses;
+  ClassEntry* classes;
+  CallStack* stacks[MAX_THREADS];
+  int32_t depth[MAX_THREADS];
+  int32_t numUniqueMethods;
+  UniqueMethodEntry* uniqueMethods;
+} TraceData;
+
+static Options gOptions;
+
+/* Escapes characters in the source string that are html special entities.
+ * The escaped string is written to "dest" which must be large enough to
+ * hold the result.  A pointer to "dest" is returned.  The characters and
+ * their corresponding escape sequences are:
+ *  '<'  &lt;
+ *  '>'  &gt;
+ *  '&'  &amp;
+ */
+char* htmlEscape(const char* src, char* dest, int32_t len) {
+  char* destStart = dest;
+
+  if (src == nullptr) return nullptr;
+
+  int32_t nbytes = 0;
+  while (*src) {
+    if (*src == '<') {
+      nbytes += 4;
+      if (nbytes >= len) break;
+      *dest++ = '&';
+      *dest++ = 'l';
+      *dest++ = 't';
+      *dest++ = ';';
+    } else if (*src == '>') {
+      nbytes += 4;
+      if (nbytes >= len) break;
+      *dest++ = '&';
+      *dest++ = 'g';
+      *dest++ = 't';
+      *dest++ = ';';
+    } else if (*src == '&') {
+      nbytes += 5;
+      if (nbytes >= len) break;
+      *dest++ = '&';
+      *dest++ = 'a';
+      *dest++ = 'm';
+      *dest++ = 'p';
+      *dest++ = ';';
+    } else {
+      nbytes += 1;
+      if (nbytes >= len) break;
+      *dest++ = *src;
+    }
+    src += 1;
+  }
+  if (nbytes >= len) {
+    fprintf(stderr, "htmlEscape(): buffer overflow\n");
+    exit(1);
+  }
+  *dest = 0;
+
+  return destStart;
+}
+
+/* Initializes a MethodEntry
+ */
+void initMethodEntry(MethodEntry* method, int64_t methodId, const char* className,
+                     const char* methodName, const char* signature, const char* fileName,
+                     const char* lineNumStr) {
+  method->methodId = methodId;
+  method->className = className;
+  method->methodName = methodName;
+  method->signature = signature;
+  method->fileName = fileName;
+  method->lineNum = (lineNumStr != nullptr) ? atoi(lineNumStr) : -1;
+  method->elapsedExclusive = 0;
+  method->elapsedInclusive = 0;
+  method->topExclusive = 0;
+  method->recursiveInclusive = 0;
+  method->parents[0] = nullptr;
+  method->parents[1] = nullptr;
+  method->children[0] = nullptr;
+  method->children[1] = nullptr;
+  method->numCalls[0] = 0;
+  method->numCalls[1] = 0;
+  method->index = 0;
+  method->recursiveEntries = 0;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * methods into decreasing order of exclusive elapsed time.
+ */
+int32_t compareElapsedExclusive(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  uint64_t elapsed1 = methodA->elapsedExclusive;
+  uint64_t elapsed2 = methodB->elapsedExclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+    result = strcmp(methodA->methodName, methodB->methodName);
+    if (result == 0) result = strcmp(methodA->signature, methodB->signature);
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * methods into decreasing order of inclusive elapsed time.
+ */
+int32_t compareElapsedInclusive(const void* a, const void* b) {
+  const MethodEntry* methodA = *(MethodEntry const**) a;
+  const MethodEntry* methodB = *(MethodEntry const**) b;
+  uint64_t elapsed1 = methodA->elapsedInclusive;
+  uint64_t elapsed2 = methodB->elapsedInclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+    result = strcmp(methodA->methodName, methodB->methodName);
+    if (result == 0) result = strcmp(methodA->signature, methodB->signature);
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * TimedMethods into decreasing order of inclusive elapsed time.
+ */
+int32_t compareTimedMethod(const void* a, const void* b) {
+  const TimedMethod* timedA = (TimedMethod const*) a;
+  const TimedMethod* timedB = (TimedMethod const*) b;
+  uint64_t elapsed1 = timedA->elapsedInclusive;
+  uint64_t elapsed2 = timedB->elapsedInclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  MethodEntry* methodA = timedA->method;
+  MethodEntry* methodB = timedB->method;
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+    result = strcmp(methodA->methodName, methodB->methodName);
+    if (result == 0) result = strcmp(methodA->signature, methodB->signature);
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * MethodEntry pointers into alphabetical order of class names.
+ */
+int32_t compareClassNames(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    int64_t idA = methodA->methodId;
+    int64_t idB = methodB->methodId;
+    if (idA < idB) return -1;
+    if (idA > idB) return 1;
+    return 0;
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * classes into decreasing order of exclusive elapsed time.
+ */
+int32_t compareClassExclusive(const void* a, const void* b) {
+  const ClassEntry* classA = *(const ClassEntry**) a;
+  const ClassEntry* classB = *(const ClassEntry**) b;
+  uint64_t elapsed1 = classA->elapsedExclusive;
+  uint64_t elapsed2 = classB->elapsedExclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two classs are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(classA->className, classB->className);
+  if (result == 0) {
+    /* Break ties with the first method id.  This is probably not
+     * needed.
+     */
+    int64_t idA = classA->methods[0]->methodId;
+    int64_t idB = classB->methods[0]->methodId;
+    if (idA < idB) return -1;
+    if (idA > idB) return 1;
+    return 0;
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * MethodEntry pointers into alphabetical order by method name,
+ * then by class name.
+ */
+int32_t compareMethodNames(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+    return compareClassNames(a, b);
+  }
+  int32_t result = strcmp(methodA->methodName, methodB->methodName);
+  if (result == 0) {
+    result = strcmp(methodA->className, methodB->className);
+    if (result == 0) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * unique methods into decreasing order of exclusive elapsed time.
+ */
+int32_t compareUniqueExclusive(const void* a, const void* b) {
+  const UniqueMethodEntry* uniqueA = *(const UniqueMethodEntry**) a;
+  const UniqueMethodEntry* uniqueB = *(const UniqueMethodEntry**) b;
+  uint64_t elapsed1 = uniqueA->elapsedExclusive;
+  uint64_t elapsed2 = uniqueB->elapsedExclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(uniqueA->methods[0]->className, uniqueB->methods[0]->className);
+  if (result == 0) {
+    int64_t idA = uniqueA->methods[0]->methodId;
+    int64_t idB = uniqueB->methods[0]->methodId;
+    if (idA < idB) return -1;
+    if (idA > idB) return 1;
+    return 0;
+  }
+  return result;
+}
+
+/*
+ * Free a DataKeys struct.
+ */
+void freeDataKeys(DataKeys* pKeys) {
+  if (pKeys == nullptr) return;
+
+  free(pKeys->fileData);
+  free(pKeys->threads);
+  free(pKeys->methods);
+  free(pKeys);
+}
+
+/*
+ * Find the offset to the next occurrence of the specified character.
+ *
+ * "data" should point somewhere within the current line.  "len" is the
+ * number of bytes left in the buffer.
+ *
+ * Returns -1 if we hit the end of the buffer.
+ */
+int32_t findNextChar(const char* data, int32_t len, char lookFor) {
+  const char* start = data;
+
+  while (len > 0) {
+    if (*data == lookFor) return data - start;
+
+    data++;
+    len--;
+  }
+
+  return -1;
+}
+
+/*
+ * Count the number of lines until the next token.
+ *
+ * Returns -1 if none found before EOF.
+ */
+int32_t countLinesToToken(const char* data, int32_t len) {
+  int32_t count = 0;
+  int32_t next;
+
+  while (*data != TOKEN_CHAR) {
+    next = findNextChar(data, len, '\n');
+    if (next < 0) return -1;
+    count++;
+    data += next + 1;
+    len -= next + 1;
+  }
+
+  return count;
+}
+
+/*
+ * Make sure we're at the start of the right section.
+ *
+ * Returns the length of the token line, or -1 if something is wrong.
+ */
+int32_t checkToken(const char* data, int32_t len, const char* cmpStr) {
+  int32_t cmpLen = strlen(cmpStr);
+  int32_t next;
+
+  if (*data != TOKEN_CHAR) {
+    fprintf(stderr, "ERROR: not at start of %s (found '%.10s')\n", cmpStr, data);
+    return -1;
+  }
+
+  next = findNextChar(data, len, '\n');
+  if (next < cmpLen + 1) return -1;
+
+  if (strncmp(data + 1, cmpStr, cmpLen) != 0) {
+    fprintf(stderr, "ERROR: '%s' not found (got '%.7s')\n", cmpStr, data + 1);
+    return -1;
+  }
+
+  return next + 1;
+}
+
+/*
+ * Parse the "*version" section.
+ */
+int64_t parseVersion(DataKeys* pKeys, int64_t offset, int32_t verbose) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "version");
+  if (next <= 0) return -1;
+
+  data += next;
+
+  /*
+   * Count the number of items in the "version" section.
+   */
+  int32_t count = countLinesToToken(data, dataEnd - data);
+  if (count <= 0) {
+    fprintf(stderr, "ERROR: failed while reading version (found %d)\n", count);
+    return -1;
+  }
+
+  /* find the end of the line */
+  next = findNextChar(data, dataEnd - data, '\n');
+  if (next < 0) return -1;
+
+  data[next] = '\0';
+  versionNumber = strtoul(data, nullptr, 0);
+  if (verbose) printf("VERSION: %d\n", versionNumber);
+
+  data += next + 1;
+
+  /* skip over the rest of the stuff, which is "name=value" lines */
+  for (int32_t i = 1; i < count; i++) {
+    next = findNextChar(data, dataEnd - data, '\n');
+    if (next < 0) return -1;
+    // data[next] = '\0';
+    // printf("IGNORING: '%s'\n", data);
+    data += next + 1;
+  }
+
+  return data - pKeys->fileData;
+}
+
+/*
+ * Parse the "*threads" section.
+ */
+int64_t parseThreads(DataKeys* pKeys, int64_t offset) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "threads");
+
+  data += next;
+
+  /*
+   * Count the number of thread entries (one per line).
+   */
+  int32_t count = countLinesToToken(data, dataEnd - data);
+  if (count <= 0) {
+    fprintf(stderr, "ERROR: failed while reading threads (found %d)\n", count);
+    return -1;
+  }
+
+  // printf("+++ found %d threads\n", count);
+  pKeys->threads = new ThreadEntry[count];
+  if (pKeys->threads == nullptr) return -1;
+
+  /*
+   * Extract all entries.
+   */
+  for (int32_t i = 0; i < count; i++) {
+    next = findNextChar(data, dataEnd - data, '\n');
+    assert(next > 0);
+    data[next] = '\0';
+
+    int32_t tab = findNextChar(data, next, '\t');
+    data[tab] = '\0';
+
+    pKeys->threads[i].threadId = atoi(data);
+    pKeys->threads[i].threadName = data + tab + 1;
+
+    data += next + 1;
+  }
+
+  pKeys->numThreads = count;
+  return data - pKeys->fileData;
+}
+
+/*
+ * Parse the "*methods" section.
+ */
+int64_t parseMethods(DataKeys* pKeys, int64_t offset) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "methods");
+  if (next < 0) return -1;
+
+  data += next;
+
+  /*
+   * Count the number of method entries (one per line).
+   */
+  int32_t count = countLinesToToken(data, dataEnd - data);
+  if (count <= 0) {
+    fprintf(stderr, "ERROR: failed while reading methods (found %d)\n", count);
+    return -1;
+  }
+
+  /* Reserve an extra method at location 0 for the "toplevel" method,
+   * and another extra method for all other "unknown" methods.
+   */
+  count += 2;
+  pKeys->methods = new MethodEntry[count];
+  if (pKeys->methods == nullptr) return -1;
+  initMethodEntry(&pKeys->methods[TOPLEVEL_INDEX], -2, "(toplevel)", nullptr, nullptr,
+                  nullptr, nullptr);
+  initMethodEntry(&pKeys->methods[UNKNOWN_INDEX], -1, "(unknown)", nullptr, nullptr,
+                  nullptr, nullptr);
+
+  /*
+   * Extract all entries, starting with index 2.
+   */
+  for (int32_t i = UNKNOWN_INDEX + 1; i < count; i++) {
+    next = findNextChar(data, dataEnd - data, '\n');
+    assert(next > 0);
+    data[next] = '\0';
+
+    int32_t tab1 = findNextChar(data, next, '\t');
+    int32_t tab2 = findNextChar(data + (tab1 + 1), next - (tab1 + 1), '\t');
+    int32_t tab3 = findNextChar(data + (tab1 + tab2 + 2), next - (tab1 + tab2 + 2), '\t');
+    int32_t tab4 = findNextChar(data + (tab1 + tab2 + tab3 + 3),
+                                next - (tab1 + tab2 + tab3 + 3), '\t');
+    int32_t tab5 = findNextChar(data + (tab1 + tab2 + tab3 + tab4 + 4),
+                                next - (tab1 + tab2 + tab3 + tab4 + 4), '\t');
+    if (tab1 < 0) {
+      fprintf(stderr, "ERROR: missing field on method line: '%s'\n", data);
+      return -1;
+    }
+    assert(data[tab1] == '\t');
+    data[tab1] = '\0';
+
+    char* endptr;
+    int64_t id = strtoul(data, &endptr, 0);
+    if (*endptr != '\0') {
+      fprintf(stderr, "ERROR: bad method ID '%s'\n", data);
+      return -1;
+    }
+
+    // Allow files that specify just a function name, instead of requiring
+    // "class \t method \t signature"
+    if (tab2 > 0 && tab3 > 0) {
+      tab2 += tab1 + 1;
+      tab3 += tab2 + 1;
+      assert(data[tab2] == '\t');
+      assert(data[tab3] == '\t');
+      data[tab2] = data[tab3] = '\0';
+
+      // This is starting to get awkward.  Allow filename and line #.
+      if (tab4 > 0 && tab5 > 0) {
+        tab4 += tab3 + 1;
+        tab5 += tab4 + 1;
+
+        assert(data[tab4] == '\t');
+        assert(data[tab5] == '\t');
+        data[tab4] = data[tab5] = '\0';
+
+        initMethodEntry(&pKeys->methods[i], id, data + tab1 + 1,
+                        data + tab2 + 1, data + tab3 + 1, data + tab4 + 1,
+                        data + tab5 + 1);
+      } else {
+        initMethodEntry(&pKeys->methods[i], id, data + tab1 + 1,
+                        data + tab2 + 1, data + tab3 + 1, nullptr, nullptr);
+      }
+    } else {
+      initMethodEntry(&pKeys->methods[i], id, data + tab1 + 1, nullptr, nullptr, nullptr,
+                      nullptr);
+    }
+
+    data += next + 1;
+  }
+
+  pKeys->numMethods = count;
+  return data - pKeys->fileData;
+}
+
+/*
+ * Parse the "*end" section.
+ */
+int64_t parseEnd(DataKeys* pKeys, int64_t offset) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "end");
+  if (next < 0) return -1;
+
+  data += next;
+
+  return data - pKeys->fileData;
+}
+
+/*
+ * Sort the thread list entries.
+ */
+static int32_t compareThreads(const void* thread1, const void* thread2) {
+  return ((const ThreadEntry*) thread1)->threadId -
+         ((const ThreadEntry*) thread2)->threadId;
+}
+
+void sortThreadList(DataKeys* pKeys) {
+  qsort(pKeys->threads, pKeys->numThreads, sizeof(pKeys->threads[0]), compareThreads);
+}
+
+/*
+ * Sort the method list entries.
+ */
+static int32_t compareMethods(const void* meth1, const void* meth2) {
+  int64_t id1 = ((const MethodEntry*) meth1)->methodId;
+  int64_t id2 = ((const MethodEntry*) meth2)->methodId;
+  if (id1 < id2) return -1;
+  if (id1 > id2) return 1;
+  return 0;
+}
+
+void sortMethodList(DataKeys* pKeys) {
+  qsort(pKeys->methods, pKeys->numMethods, sizeof(MethodEntry), compareMethods);
+}
+
+/*
+ * Parse the key section, and return a copy of the parsed contents.
+ */
+DataKeys* parseKeys(FILE* fp, int32_t verbose) {
+  int64_t offset;
+  DataKeys* pKeys = new DataKeys();
+  memset(pKeys, 0, sizeof(DataKeys));
+  if (pKeys == nullptr) return nullptr;
+
+  /*
+   * We load the entire file into memory.  We do this, rather than memory-
+   * mapping it, because we want to change some whitespace to NULs.
+   */
+  if (fseek(fp, 0L, SEEK_END) != 0) {
+    perror("fseek");
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+  pKeys->fileLen = ftell(fp);
+  if (pKeys->fileLen == 0) {
+    fprintf(stderr, "Key file is empty.\n");
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+  rewind(fp);
+
+  pKeys->fileData = new char[pKeys->fileLen];
+  if (pKeys->fileData == nullptr) {
+    fprintf(stderr, "ERROR: unable to alloc %" PRIu64 " bytes\n", pKeys->fileLen);
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+
+  if (fread(pKeys->fileData, 1, pKeys->fileLen, fp) != (size_t)pKeys->fileLen) {
+    fprintf(stderr, "ERROR: unable to read %" PRIu64 " bytes from trace file\n", pKeys->fileLen);
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+
+  offset = 0;
+  offset = parseVersion(pKeys, offset, verbose);
+  offset = parseThreads(pKeys, offset);
+  offset = parseMethods(pKeys, offset);
+  offset = parseEnd(pKeys, offset);
+  if (offset < 0) {
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+
+  /* Reduce our allocation now that we know where the end of the key section is. */
+  pKeys->fileData = reinterpret_cast<char*>(realloc(pKeys->fileData, offset));
+  pKeys->fileLen = offset;
+  /* Leave fp pointing to the beginning of the data section. */
+  fseek(fp, offset, SEEK_SET);
+
+  sortThreadList(pKeys);
+  sortMethodList(pKeys);
+
+  /*
+   * Dump list of threads.
+   */
+  if (verbose) {
+    printf("Threads (%d):\n", pKeys->numThreads);
+    for (int32_t i = 0; i < pKeys->numThreads; i++) {
+      printf("%2d %s\n", pKeys->threads[i].threadId, pKeys->threads[i].threadName);
+    }
+  }
+
+#if 0
+  /*
+   * Dump list of methods.
+   */
+  if (verbose) {
+    printf("Methods (%d):\n", pKeys->numMethods);
+    for (int32_t i = 0; i < pKeys->numMethods; i++) {
+      printf("0x%08x %s : %s : %s\n",
+             pKeys->methods[i].methodId, pKeys->methods[i].className,
+             pKeys->methods[i].methodName, pKeys->methods[i].signature);
+    }
+  }
+#endif
+
+  return pKeys;
+}
+
+/*
+ * Read values from the binary data file.
+ */
+
+/*
+ * Make the return value "uint32_t" instead of "uint16_t" so that we can detect EOF.
+ */
+uint32_t read2LE(FILE* fp) {
+  uint32_t val = getc(fp);
+  val |= getc(fp) << 8;
+  return val;
+}
+uint32_t read4LE(FILE* fp) {
+  uint32_t val = getc(fp);
+  val |= getc(fp) << 8;
+  val |= getc(fp) << 16;
+  val |= getc(fp) << 24;
+  return val;
+}
+uint64_t read8LE(FILE* fp) {
+  uint64_t val = getc(fp);
+  val |= (uint64_t) getc(fp) << 8;
+  val |= (uint64_t) getc(fp) << 16;
+  val |= (uint64_t) getc(fp) << 24;
+  val |= (uint64_t) getc(fp) << 32;
+  val |= (uint64_t) getc(fp) << 40;
+  val |= (uint64_t) getc(fp) << 48;
+  val |= (uint64_t) getc(fp) << 56;
+  return val;
+}
+
+/*
+ * Parse the header of the data section.
+ *
+ * Returns with the file positioned at the start of the record data.
+ */
+int32_t parseDataHeader(FILE* fp, DataHeader* pHeader) {
+  pHeader->magic = read4LE(fp);
+  pHeader->version = read2LE(fp);
+  pHeader->offsetToData = read2LE(fp);
+  pHeader->startWhen = read8LE(fp);
+  int32_t bytesToRead = pHeader->offsetToData - 16;
+  if (pHeader->version == 1) {
+    pHeader->recordSize = 9;
+  } else if (pHeader->version == 2) {
+    pHeader->recordSize = 10;
+  } else if (pHeader->version == 3) {
+    pHeader->recordSize = read2LE(fp);
+    bytesToRead -= 2;
+  } else {
+    fprintf(stderr, "Unsupported trace file version: %d\n", pHeader->version);
+    return -1;
+  }
+
+  if (fseek(fp, bytesToRead, SEEK_CUR) != 0) {
+    return -1;
+  }
+
+  return 0;
+}
+
+/*
+ * Look up a method by it's method ID.
+ *
+ * Returns nullptr if no matching method was found.
+ */
+MethodEntry* lookupMethod(DataKeys* pKeys, int64_t methodId) {
+  int32_t lo = 0;
+  int32_t hi = pKeys->numMethods - 1;
+
+  while (hi >= lo) {
+    int32_t mid = (hi + lo) / 2;
+
+    int64_t id = pKeys->methods[mid].methodId;
+    if (id == methodId) /* match */
+      return &pKeys->methods[mid];
+    else if (id < methodId) /* too low */
+      lo = mid + 1;
+    else /* too high */
+      hi = mid - 1;
+  }
+
+  return nullptr;
+}
+
+/*
+ * Reads the next data record, and assigns the data values to threadId,
+ * methodVal and elapsedTime.  On end-of-file, the threadId, methodVal,
+ * and elapsedTime are unchanged.  Returns 1 on end-of-file, otherwise
+ * returns 0.
+ */
+int32_t readDataRecord(FILE* dataFp, DataHeader* dataHeader, int32_t* threadId,
+                   uint32_t* methodVal, uint64_t* elapsedTime) {
+  int32_t id;
+  int32_t bytesToRead = dataHeader->recordSize;
+  if (dataHeader->version == 1) {
+    id = getc(dataFp);
+    bytesToRead -= 1;
+  } else {
+    id = read2LE(dataFp);
+    bytesToRead -= 2;
+  }
+  if (id == EOF) return 1;
+  *threadId = id;
+
+  *methodVal = read4LE(dataFp);
+  *elapsedTime = read4LE(dataFp);
+  bytesToRead -= 8;
+
+  while (bytesToRead-- > 0) {
+    getc(dataFp);
+  }
+
+  if (feof(dataFp)) {
+    fprintf(stderr, "WARNING: hit EOF mid-record\n");
+    return 1;
+  }
+  return 0;
+}
+
+/*
+ * Read the key file and use it to produce formatted output from the
+ * data file.
+ */
+void dumpTrace() {
+  static const char* actionStr[] = {"ent", "xit", "unr", "???"};
+  MethodEntry bogusMethod = {
+      0, "???", "???",        "???",        "???",  -1, 0, 0,
+      0, 0,     {nullptr, nullptr}, {nullptr, nullptr}, {0, 0}, 0,  0, -1};
+  char bogusBuf[80];
+  TraceData traceData;
+
+  // printf("Dumping '%s' '%s'\n", dataFileName, keyFileName);
+
+  char spaces[MAX_STACK_DEPTH + 1];
+  memset(spaces, '.', MAX_STACK_DEPTH);
+  spaces[MAX_STACK_DEPTH] = '\0';
+
+  for (int32_t i = 0; i < MAX_THREADS; i++)
+    traceData.depth[i] = 2;  // adjust for return from start function
+
+  FILE* dataFp = fopen(gOptions.traceFileName, "rb");
+  if (dataFp == nullptr) return;
+
+  DataKeys* pKeys = parseKeys(dataFp, 1);
+  if (pKeys == nullptr) {
+    fclose(dataFp);
+    return;
+  }
+
+  DataHeader dataHeader;
+  if (parseDataHeader(dataFp, &dataHeader) < 0) {
+    fclose(dataFp);
+    freeDataKeys(pKeys);
+    return;
+  }
+
+  printf("Trace (threadID action usecs class.method signature):\n");
+
+  while (1) {
+    /*
+     * Extract values from file.
+     */
+    int32_t threadId;
+    uint32_t methodVal;
+    uint64_t elapsedTime;
+    if (readDataRecord(dataFp, &dataHeader, &threadId, &methodVal, &elapsedTime))
+      break;
+
+    int32_t action = METHOD_ACTION(methodVal);
+    int64_t methodId = METHOD_ID(methodVal);
+
+    /*
+     * Generate a line of output.
+     */
+    int64_t lastEnter = 0;
+    int32_t mismatch = 0;
+    if (action == METHOD_TRACE_ENTER) {
+      traceData.depth[threadId]++;
+      lastEnter = methodId;
+    } else {
+      /* quick test for mismatched adjacent enter/exit */
+      if (lastEnter != 0 && lastEnter != methodId) mismatch = 1;
+    }
+
+    int32_t printDepth = traceData.depth[threadId];
+    char depthNote = ' ';
+    if (printDepth < 0) {
+      printDepth = 0;
+      depthNote = '-';
+    } else if (printDepth > MAX_STACK_DEPTH) {
+      printDepth = MAX_STACK_DEPTH;
+      depthNote = '+';
+    }
+
+    MethodEntry* method = lookupMethod(pKeys, methodId);
+    if (method == nullptr) {
+      method = &bogusMethod;
+      sprintf(bogusBuf, "methodId: %#" PRIx64 "", methodId);
+      method->signature = bogusBuf;
+    }
+
+    if (method->methodName) {
+      printf("%2d %s%c %8" PRIu64 "%c%s%s.%s %s\n", threadId, actionStr[action],
+             mismatch ? '!' : ' ', elapsedTime, depthNote,
+             spaces + (MAX_STACK_DEPTH - printDepth), method->className,
+             method->methodName, method->signature);
+    } else {
+      printf("%2d %s%c %8" PRIu64 "%c%s%s\n", threadId, actionStr[action],
+             mismatch ? '!' : ' ', elapsedTime, depthNote,
+             spaces + (MAX_STACK_DEPTH - printDepth), method->className);
+    }
+
+    if (action != METHOD_TRACE_ENTER) {
+      traceData.depth[threadId]--; /* METHOD_TRACE_EXIT or METHOD_TRACE_UNROLL */
+      lastEnter = 0;
+    }
+
+    mismatch = 0;
+  }
+
+  fclose(dataFp);
+  freeDataKeys(pKeys);
+}
+
+/* This routine adds the given time to the parent and child methods.
+ * This is called when the child routine exits, after the child has
+ * been popped from the stack.  The elapsedTime parameter is the
+ * duration of the child routine, including time spent in called routines.
+ */
+void addInclusiveTime(MethodEntry* parent, MethodEntry* child, uint64_t elapsedTime) {
+#if 0
+  bool verbose = false;
+  if (strcmp(child->className, debugClassName) == 0)
+    verbose = true;
+#endif
+
+  int32_t childIsRecursive = (child->recursiveEntries > 0);
+  int32_t parentIsRecursive = (parent->recursiveEntries > 1);
+
+  if (child->recursiveEntries == 0) {
+    child->elapsedInclusive += elapsedTime;
+  } else if (child->recursiveEntries == 1) {
+    child->recursiveInclusive += elapsedTime;
+  }
+  child->numCalls[childIsRecursive] += 1;
+
+#if 0
+  if (verbose) {
+    fprintf(stderr,
+            "%s %d elapsedTime: %lld eI: %lld, rI: %lld\n",
+            child->className, child->recursiveEntries,
+            elapsedTime, child->elapsedInclusive,
+            child->recursiveInclusive);
+  }
+#endif
+
+  /* Find the child method in the parent */
+  TimedMethod* pTimed;
+  TimedMethod* children = parent->children[parentIsRecursive];
+  for (pTimed = children; pTimed; pTimed = pTimed->next) {
+    if (pTimed->method == child) {
+      pTimed->elapsedInclusive += elapsedTime;
+      pTimed->numCalls += 1;
+      break;
+    }
+  }
+  if (pTimed == nullptr) {
+    /* Allocate a new TimedMethod */
+    pTimed = new TimedMethod();
+    pTimed->elapsedInclusive = elapsedTime;
+    pTimed->numCalls = 1;
+    pTimed->method = child;
+
+    /* Add it to the front of the list */
+    pTimed->next = children;
+    parent->children[parentIsRecursive] = pTimed;
+  }
+
+  /* Find the parent method in the child */
+  TimedMethod* parents = child->parents[childIsRecursive];
+  for (pTimed = parents; pTimed; pTimed = pTimed->next) {
+    if (pTimed->method == parent) {
+      pTimed->elapsedInclusive += elapsedTime;
+      pTimed->numCalls += 1;
+      break;
+    }
+  }
+  if (pTimed == nullptr) {
+    /* Allocate a new TimedMethod */
+    pTimed = new TimedMethod();
+    pTimed->elapsedInclusive = elapsedTime;
+    pTimed->numCalls = 1;
+    pTimed->method = parent;
+
+    /* Add it to the front of the list */
+    pTimed->next = parents;
+    child->parents[childIsRecursive] = pTimed;
+  }
+
+#if 0
+  if (verbose) {
+    fprintf(stderr,
+            "  %s %d eI: %lld\n",
+            parent->className, parent->recursiveEntries,
+            pTimed->elapsedInclusive);
+  }
+#endif
+}
+
+/* Sorts a linked list and returns a newly allocated array containing
+ * the sorted entries.
+ */
+TimedMethod* sortTimedMethodList(TimedMethod* list, int32_t* num) {
+  /* Count the elements */
+  TimedMethod* pTimed;
+  int32_t num_entries = 0;
+  for (pTimed = list; pTimed; pTimed = pTimed->next) num_entries += 1;
+  *num = num_entries;
+  if (num_entries == 0) return nullptr;
+
+  /* Copy all the list elements to a new array and sort them */
+  int32_t ii;
+  TimedMethod* sorted = new TimedMethod[num_entries];
+  for (ii = 0, pTimed = list; pTimed; pTimed = pTimed->next, ++ii)
+    memcpy(&sorted[ii], pTimed, sizeof(TimedMethod));
+  qsort(sorted, num_entries, sizeof(TimedMethod), compareTimedMethod);
+
+  /* Fix up the "next" pointers so that they work. */
+  for (ii = 0; ii < num_entries - 1; ++ii) sorted[ii].next = &sorted[ii + 1];
+  sorted[num_entries - 1].next = nullptr;
+
+  return sorted;
+}
+
+/* Define flag values for printInclusiveMethod() */
+static const int32_t kIsRecursive = 1;
+
+/* This prints the inclusive stats for all the parents or children of a
+ * method, depending on the list that is passed in.
+ */
+void printInclusiveMethod(MethodEntry* method, TimedMethod* list, int32_t numCalls, int32_t flags) {
+  char buf[80];
+  const char* anchor_close = "";
+  const char* spaces = "      "; /* 6 spaces */
+  int32_t num_spaces = strlen(spaces);
+  const char* space_ptr = &spaces[num_spaces];
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+
+  if (gOptions.outputHtml) anchor_close = "</a>";
+
+  int32_t num;
+  TimedMethod* sorted = sortTimedMethodList(list, &num);
+  double methodTotal = method->elapsedInclusive;
+  for (TimedMethod* pTimed = sorted; pTimed; pTimed = pTimed->next) {
+    MethodEntry* relative = pTimed->method;
+    const char* className = relative->className;
+    const char* methodName = relative->methodName;
+    const char* signature = relative->signature;
+    double per = 100.0 * pTimed->elapsedInclusive / methodTotal;
+    sprintf(buf, "[%d]", relative->index);
+    if (gOptions.outputHtml) {
+      int32_t len = strlen(buf);
+      if (len > num_spaces) len = num_spaces;
+      sprintf(buf, "<a href=\"#m%d\">[%d]", relative->index, relative->index);
+      space_ptr = &spaces[len];
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+    }
+    int32_t nCalls = numCalls;
+    if (nCalls == 0) nCalls = relative->numCalls[0] + relative->numCalls[1];
+    if (relative->methodName) {
+      if (flags & kIsRecursive) {
+        // Don't display percentages for recursive functions
+        printf("%6s %5s   %6s %s%6s%s %6d/%-6d %9" PRIu64 " %s.%s %s\n", "", "",
+               "", space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className, methodName, signature);
+      } else {
+        printf("%6s %5s   %5.1f%% %s%6s%s %6d/%-6d %9" PRIu64 " %s.%s %s\n", "",
+               "", per, space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className, methodName, signature);
+      }
+    } else {
+      if (flags & kIsRecursive) {
+        // Don't display percentages for recursive functions
+        printf("%6s %5s   %6s %s%6s%s %6d/%-6d %9" PRIu64 " %s\n", "", "", "",
+               space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className);
+      } else {
+        printf("%6s %5s   %5.1f%% %s%6s%s %6d/%-6d %9" PRIu64 " %s\n", "", "",
+               per, space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className);
+      }
+    }
+  }
+}
+
+void countRecursiveEntries(CallStack* pStack, int32_t top, MethodEntry* method) {
+  method->recursiveEntries = 0;
+  for (int32_t ii = 0; ii < top; ++ii) {
+    if (pStack->calls[ii].method == method) method->recursiveEntries += 1;
+  }
+}
+
+void stackDump(CallStack* pStack, int32_t top) {
+  for (int32_t ii = 0; ii < top; ++ii) {
+    MethodEntry* method = pStack->calls[ii].method;
+    uint64_t entryTime = pStack->calls[ii].entryTime;
+    if (method->methodName) {
+      fprintf(stderr, "  %2d: %8" PRIu64 " %s.%s %s\n", ii, entryTime,
+              method->className, method->methodName, method->signature);
+    } else {
+      fprintf(stderr, "  %2d: %8" PRIu64 " %s\n", ii, entryTime, method->className);
+    }
+  }
+}
+
+void outputTableOfContents() {
+  printf("<a name=\"contents\"></a>\n");
+  printf("<h2>Table of Contents</h2>\n");
+  printf("<ul>\n");
+  printf("  <li><a href=\"#exclusive\">Exclusive profile</a></li>\n");
+  printf("  <li><a href=\"#inclusive\">Inclusive profile</a></li>\n");
+  printf("  <li><a href=\"#class\">Class/method profile</a></li>\n");
+  printf("  <li><a href=\"#method\">Method/class profile</a></li>\n");
+  printf("</ul>\n\n");
+}
+
+void outputNavigationBar() {
+  printf("<a href=\"#contents\">[Top]</a>\n");
+  printf("<a href=\"#exclusive\">[Exclusive]</a>\n");
+  printf("<a href=\"#inclusive\">[Inclusive]</a>\n");
+  printf("<a href=\"#class\">[Class]</a>\n");
+  printf("<a href=\"#method\">[Method]</a>\n");
+  printf("<br><br>\n");
+}
+
+void printExclusiveProfile(MethodEntry** pMethods, int32_t numMethods, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+  const char* anchor_close = "";
+  char anchor_buf[80];
+  anchor_buf[0] = 0;
+  if (gOptions.outputHtml) {
+    anchor_close = "</a>";
+    printf("<a name=\"exclusive\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  /* First, sort the methods into decreasing order of inclusive
+   * elapsed time so that we can assign the method indices.
+   */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+
+  for (int32_t ii = 0; ii < numMethods; ++ii) pMethods[ii]->index = ii;
+
+  /* Sort the methods into decreasing order of exclusive elapsed time. */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareElapsedExclusive);
+
+  printf("Total cycles: %" PRIu64 "\n\n", sumThreadTime);
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n");
+  }
+  printf("Exclusive elapsed times for each method, not including time spent in\n");
+  printf("children, sorted by exclusive time.\n\n");
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n<pre>\n");
+  }
+
+  printf("    Usecs  self %%  sum %%  Method\n");
+
+  double sum = 0;
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    MethodEntry* method = pMethods[ii];
+    /* Don't show methods with zero cycles */
+    if (method->elapsedExclusive == 0) break;
+    const char* className = method->className;
+    const char* methodName = method->methodName;
+    const char* signature = method->signature;
+    sum += method->elapsedExclusive;
+    double per = 100.0 * method->elapsedExclusive / total;
+    double sum_per = 100.0 * sum / total;
+    if (gOptions.outputHtml) {
+      sprintf(anchor_buf, "<a href=\"#m%d\">", method->index);
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+    }
+    if (method->methodName) {
+      printf("%9" PRIu64 "  %6.2f %6.2f  %s[%d]%s %s.%s %s\n",
+             method->elapsedExclusive, per, sum_per, anchor_buf, method->index,
+             anchor_close, className, methodName, signature);
+    } else {
+      printf("%9" PRIu64 "  %6.2f %6.2f  %s[%d]%s %s\n",
+             method->elapsedExclusive, per, sum_per, anchor_buf, method->index,
+             anchor_close, className);
+    }
+  }
+  if (gOptions.outputHtml) {
+    printf("</pre>\n");
+  }
+}
+
+/* check to make sure that the child method meets the threshold of the parent */
+int32_t checkThreshold(MethodEntry* parent, MethodEntry* child) {
+  double parentTime = parent->elapsedInclusive;
+  double childTime = child->elapsedInclusive;
+  int64_t percentage = (childTime / parentTime) * 100.0;
+  return (percentage < gOptions.threshold) ? 0 : 1;
+}
+
+void createLabels(FILE* file, MethodEntry* method) {
+  fprintf(file,
+          "node%d[label = \"[%d] %s.%s (%" PRIu64 ", %" PRIu64 ", %d)\"]\n",
+          method->index, method->index, method->className, method->methodName,
+          method->elapsedInclusive / 1000, method->elapsedExclusive / 1000,
+          method->numCalls[0]);
+
+  method->graphState = GRAPH_LABEL_VISITED;
+
+  for (TimedMethod* child = method->children[0]; child; child = child->next) {
+    MethodEntry* childMethod = child->method;
+
+    if ((childMethod->graphState & GRAPH_LABEL_VISITED) == 0 &&
+        checkThreshold(method, childMethod)) {
+      createLabels(file, child->method);
+    }
+  }
+}
+
+void createLinks(FILE* file, MethodEntry* method) {
+  method->graphState |= GRAPH_NODE_VISITED;
+
+  for (TimedMethod* child = method->children[0]; child; child = child->next) {
+    MethodEntry* childMethod = child->method;
+    if (checkThreshold(method, child->method)) {
+      fprintf(file, "node%d -> node%d\n", method->index, child->method->index);
+      // only visit children that haven't been visited before
+      if ((childMethod->graphState & GRAPH_NODE_VISITED) == 0) {
+        createLinks(file, child->method);
+      }
+    }
+  }
+}
+
+void createInclusiveProfileGraphNew(DataKeys* dataKeys) {
+  // create a temporary file in /tmp
+  char path[FILENAME_MAX];
+  if (gOptions.keepDotFile) {
+    snprintf(path, FILENAME_MAX, "%s.dot", gOptions.graphFileName);
+  } else {
+    snprintf(path, FILENAME_MAX, "dot-%d-%d.dot", (int32_t)time(nullptr), rand());
+  }
+
+  FILE* file = fopen(path, "w+");
+
+  fprintf(file, "digraph g {\nnode [shape = record,height=.1];\n");
+
+  createLabels(file, dataKeys->methods);
+  createLinks(file, dataKeys->methods);
+
+  fprintf(file, "}");
+  fclose(file);
+
+  // now that we have the dot file generate the image
+  char command[1024];
+  snprintf(command, 1024, "dot -Tpng -o \"%s\" \"%s\"", gOptions.graphFileName, path);
+
+  system(command);
+
+  if (!gOptions.keepDotFile) {
+    remove(path);
+  }
+}
+
+void printInclusiveProfile(MethodEntry** pMethods, int32_t numMethods, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+  char anchor_buf[80];
+  const char* anchor_close = "";
+  anchor_buf[0] = 0;
+  if (gOptions.outputHtml) {
+    anchor_close = "</a>";
+    printf("<a name=\"inclusive\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  /* Sort the methods into decreasing order of inclusive elapsed time. */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+
+  printf("\nInclusive elapsed times for each method and its parents and children,\n");
+  printf("sorted by inclusive time.\n\n");
+
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n<pre>\n");
+  }
+
+  printf("index  %%/total %%/self  index     calls         usecs name\n");
+
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    char buf[40];
+
+    MethodEntry* method = pMethods[ii];
+    /* Don't show methods with zero cycles */
+    if (method->elapsedInclusive == 0) break;
+
+    const char* className = method->className;
+    const char* methodName = method->methodName;
+    const char* signature = method->signature;
+
+    if (gOptions.outputHtml) {
+      printf("<a name=\"m%d\"></a>", method->index);
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+    }
+    printf("----------------------------------------------------\n");
+
+    /* Sort and print the parents */
+    int32_t numCalls = method->numCalls[0] + method->numCalls[1];
+    printInclusiveMethod(method, method->parents[0], numCalls, 0);
+    if (method->parents[1]) {
+      printf("               +++++++++++++++++++++++++\n");
+      printInclusiveMethod(method, method->parents[1], numCalls, kIsRecursive);
+    }
+
+    double per = 100.0 * method->elapsedInclusive / total;
+    sprintf(buf, "[%d]", ii);
+    if (method->methodName) {
+      printf("%-6s %5.1f%%   %5s %6s %6d+%-6d %9" PRIu64 " %s.%s %s\n", buf,
+             per, "", "", method->numCalls[0], method->numCalls[1],
+             method->elapsedInclusive, className, methodName, signature);
+    } else {
+      printf("%-6s %5.1f%%   %5s %6s %6d+%-6d %9" PRIu64 " %s\n", buf, per, "",
+             "", method->numCalls[0], method->numCalls[1],
+             method->elapsedInclusive, className);
+    }
+    double excl_per = 100.0 * method->topExclusive / method->elapsedInclusive;
+    printf("%6s %5s   %5.1f%% %6s %6s %6s %9" PRIu64 "\n", "", "", excl_per,
+           "excl", "", "", method->topExclusive);
+
+    /* Sort and print the children */
+    printInclusiveMethod(method, method->children[0], 0, 0);
+    if (method->children[1]) {
+      printf("               +++++++++++++++++++++++++\n");
+      printInclusiveMethod(method, method->children[1], 0, kIsRecursive);
+    }
+  }
+  if (gOptions.outputHtml) {
+    printf("</pre>\n");
+  }
+}
+
+void createClassList(TraceData* traceData, MethodEntry** pMethods, int32_t numMethods) {
+  /* Sort the methods into alphabetical order to find the unique class
+   * names.
+   */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareClassNames);
+
+  /* Count the number of unique class names. */
+  const char* currentClassName = "";
+  const char* firstClassName = nullptr;
+  traceData->numClasses = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) {
+      continue;
+    }
+    if (strcmp(pMethods[ii]->className, currentClassName) != 0) {
+      // Remember the first one
+      if (firstClassName == nullptr) {
+        firstClassName = pMethods[ii]->className;
+      }
+      traceData->numClasses += 1;
+      currentClassName = pMethods[ii]->className;
+    }
+  }
+
+  if (traceData->numClasses == 0) {
+    traceData->classes = nullptr;
+    return;
+  }
+
+  /* Allocate space for all of the unique class names */
+  traceData->classes = new ClassEntry[traceData->numClasses];
+
+  /* Initialize the classes array */
+  memset(traceData->classes, 0, sizeof(ClassEntry) * traceData->numClasses);
+  ClassEntry* pClass = traceData->classes;
+  pClass->className = currentClassName = firstClassName;
+  int32_t prevNumMethods = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) {
+      continue;
+    }
+    if (strcmp(pMethods[ii]->className, currentClassName) != 0) {
+      pClass->numMethods = prevNumMethods;
+      (++pClass)->className = currentClassName = pMethods[ii]->className;
+      prevNumMethods = 0;
+    }
+    prevNumMethods += 1;
+  }
+  pClass->numMethods = prevNumMethods;
+
+  /* Create the array of MethodEntry pointers for each class */
+  pClass = nullptr;
+  currentClassName = "";
+  int32_t nextMethod = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) {
+      continue;
+    }
+    if (strcmp(pMethods[ii]->className, currentClassName) != 0) {
+      currentClassName = pMethods[ii]->className;
+      if (pClass == nullptr)
+        pClass = traceData->classes;
+      else
+        pClass++;
+      /* Allocate space for the methods array */
+      pClass->methods = new MethodEntry*[pClass->numMethods];
+      nextMethod = 0;
+    }
+    pClass->methods[nextMethod++] = pMethods[ii];
+  }
+}
+
+/* Prints a number of html non-breaking spaces according so that the length
+ * of the string "buf" is at least "width" characters wide.  If width is
+ * negative, then trailing spaces are added instead of leading spaces.
+ */
+void printHtmlField(char* buf, int32_t width) {
+  int32_t leadingSpaces = 1;
+  if (width < 0) {
+    width = -width;
+    leadingSpaces = 0;
+  }
+  int32_t len = strlen(buf);
+  int32_t numSpaces = width - len;
+  if (numSpaces <= 0) {
+    printf("%s", buf);
+    return;
+  }
+  if (leadingSpaces == 0) printf("%s", buf);
+  for (int32_t ii = 0; ii < numSpaces; ++ii) printf("&nbsp;");
+  if (leadingSpaces == 1) printf("%s", buf);
+}
+
+void printClassProfiles(TraceData* traceData, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE];
+  char methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+
+  if (gOptions.outputHtml) {
+    printf("<a name=\"class\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  if (traceData->numClasses == 0) {
+    printf("\nNo classes.\n");
+    if (gOptions.outputHtml) {
+      printf("<br><br>\n");
+    }
+    return;
+  }
+
+  printf("\nExclusive elapsed time for each class, summed over all the methods\n");
+  printf("in the class.\n\n");
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n");
+  }
+
+  /* For each class, sum the exclusive times in all of the methods
+   * in that class.  Also sum the number of method calls.  Also
+   * sort the methods so the most expensive appear at the top.
+   */
+  ClassEntry* pClass = traceData->classes;
+  for (int32_t ii = 0; ii < traceData->numClasses; ++ii, ++pClass) {
+    // printf("%s %d methods\n", pClass->className, pClass->numMethods);
+    int32_t numMethods = pClass->numMethods;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pClass->methods[jj];
+      pClass->elapsedExclusive += method->elapsedExclusive;
+      pClass->numCalls[0] += method->numCalls[0];
+      pClass->numCalls[1] += method->numCalls[1];
+    }
+
+    /* Sort the methods into decreasing order of exclusive time */
+    qsort(pClass->methods, numMethods, sizeof(MethodEntry*), compareElapsedExclusive);
+  }
+
+  /* Allocate an array of pointers to the classes for more efficient sorting. */
+  ClassEntry** pClasses = new ClassEntry*[traceData->numClasses];
+  for (int32_t ii = 0; ii < traceData->numClasses; ++ii)
+    pClasses[ii] = &traceData->classes[ii];
+
+  /* Sort the classes into decreasing order of exclusive time */
+  qsort(pClasses, traceData->numClasses, sizeof(ClassEntry*), compareClassExclusive);
+
+  if (gOptions.outputHtml) {
+    printf(
+        "<div class=\"header\"><span "
+        "class=\"parent\">&nbsp;</span>&nbsp;&nbsp;&nbsp;");
+    printf("Cycles %%/total Cumul.%% &nbsp;Calls+Recur&nbsp; Class</div>\n");
+  } else {
+    printf("   Cycles %%/total Cumul.%%  Calls+Recur  Class\n");
+  }
+
+  double sum = 0;
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < traceData->numClasses; ++ii) {
+    /* Skip classes with zero cycles */
+    pClass = pClasses[ii];
+    if (pClass->elapsedExclusive == 0) break;
+
+    sum += pClass->elapsedExclusive;
+    double per = 100.0 * pClass->elapsedExclusive / total;
+    double sum_per = 100.0 * sum / total;
+    const char* className = pClass->className;
+    if (gOptions.outputHtml) {
+      char buf[80];
+
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      printf(
+          "<div class=\"link\" onClick=\"javascript:toggle('d%d')\" "
+          "onMouseOver=\"javascript:onMouseOver(this)\" "
+          "onMouseOut=\"javascript:onMouseOut(this)\"><span class=\"parent\" "
+          "id=\"xd%d\">+</span>",
+          ii, ii);
+      sprintf(buf, "%" PRIu64, pClass->elapsedExclusive);
+      printHtmlField(buf, 9);
+      printf(" ");
+      sprintf(buf, "%.1f", per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%.1f", sum_per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%d", pClass->numCalls[0]);
+      printHtmlField(buf, 6);
+      printf("+");
+      sprintf(buf, "%d", pClass->numCalls[1]);
+      printHtmlField(buf, -6);
+      printf(" ");
+      printf("%s", className);
+      printf("</div>\n");
+      printf("<div class=\"parent\" id=\"d%d\">\n", ii);
+    } else {
+      printf("---------------------------------------------\n");
+      printf("%9" PRIu64 " %7.1f %7.1f %6d+%-6d %s\n", pClass->elapsedExclusive,
+             per, sum_per, pClass->numCalls[0], pClass->numCalls[1], className);
+    }
+
+    int32_t numMethods = pClass->numMethods;
+    double classExclusive = pClass->elapsedExclusive;
+    double sumMethods = 0;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pClass->methods[jj];
+      const char* methodName = method->methodName;
+      const char* signature = method->signature;
+      per = 100.0 * method->elapsedExclusive / classExclusive;
+      sumMethods += method->elapsedExclusive;
+      sum_per = 100.0 * sumMethods / classExclusive;
+      if (gOptions.outputHtml) {
+        char buf[80];
+
+        methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+        signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+        printf("<div class=\"leaf\"><span class=\"leaf\">&nbsp;</span>");
+        sprintf(buf, "%" PRIu64, method->elapsedExclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%" PRIu64, method->elapsedInclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", sum_per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%d", method->numCalls[0]);
+        printHtmlField(buf, 6);
+        printf("+");
+        sprintf(buf, "%d", method->numCalls[1]);
+        printHtmlField(buf, -6);
+        printf("&nbsp;");
+        printf("<a href=\"#m%d\">[%d]</a>&nbsp;%s&nbsp;%s", method->index,
+               method->index, methodName, signature);
+        printf("</div>\n");
+      } else {
+        printf("%9" PRIu64 " %9" PRIu64 " %7.1f %7.1f %6d+%-6d [%d] %s %s\n",
+               method->elapsedExclusive, method->elapsedInclusive, per, sum_per,
+               method->numCalls[0], method->numCalls[1], method->index,
+               methodName, signature);
+      }
+    }
+    if (gOptions.outputHtml) {
+      printf("</div>\n");
+    }
+  }
+}
+
+void createUniqueMethodList(TraceData* traceData, MethodEntry** pMethods, int32_t numMethods) {
+  /* Sort the methods into alphabetical order of method names
+   * to find the unique method names.
+   */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareMethodNames);
+
+  /* Count the number of unique method names, ignoring class and signature. */
+  const char* currentMethodName = "";
+  traceData->numUniqueMethods = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) continue;
+    if (strcmp(pMethods[ii]->methodName, currentMethodName) != 0) {
+      traceData->numUniqueMethods += 1;
+      currentMethodName = pMethods[ii]->methodName;
+    }
+  }
+  if (traceData->numUniqueMethods == 0) return;
+
+  /* Allocate space for pointers to all of the unique methods */
+  traceData->uniqueMethods = new UniqueMethodEntry[traceData->numUniqueMethods];
+
+  /* Initialize the uniqueMethods array */
+  memset(traceData->uniqueMethods, 0, sizeof(UniqueMethodEntry) * traceData->numUniqueMethods);
+  UniqueMethodEntry* pUnique = traceData->uniqueMethods;
+  currentMethodName = nullptr;
+  int32_t prevNumMethods = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) continue;
+    if (currentMethodName == nullptr) currentMethodName = pMethods[ii]->methodName;
+    if (strcmp(pMethods[ii]->methodName, currentMethodName) != 0) {
+      currentMethodName = pMethods[ii]->methodName;
+      pUnique->numMethods = prevNumMethods;
+      pUnique++;
+      prevNumMethods = 0;
+    }
+    prevNumMethods += 1;
+  }
+  pUnique->numMethods = prevNumMethods;
+
+  /* Create the array of MethodEntry pointers for each unique method */
+  pUnique = nullptr;
+  currentMethodName = "";
+  int32_t nextMethod = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) continue;
+    if (strcmp(pMethods[ii]->methodName, currentMethodName) != 0) {
+      currentMethodName = pMethods[ii]->methodName;
+      if (pUnique == nullptr)
+        pUnique = traceData->uniqueMethods;
+      else
+        pUnique++;
+      /* Allocate space for the methods array */
+      pUnique->methods = new MethodEntry*[pUnique->numMethods];
+      nextMethod = 0;
+    }
+    pUnique->methods[nextMethod++] = pMethods[ii];
+  }
+}
+
+void printMethodProfiles(TraceData* traceData, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+
+  if (traceData->numUniqueMethods == 0) return;
+
+  if (gOptions.outputHtml) {
+    printf("<a name=\"method\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  printf("\nExclusive elapsed time for each method, summed over all the classes\n");
+  printf("that contain a method with the same name.\n\n");
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n");
+  }
+
+  /* For each unique method, sum the exclusive times in all of the methods
+   * with the same name.  Also sum the number of method calls.  Also
+   * sort the methods so the most expensive appear at the top.
+   */
+  UniqueMethodEntry* pUnique = traceData->uniqueMethods;
+  for (int32_t ii = 0; ii < traceData->numUniqueMethods; ++ii, ++pUnique) {
+    int32_t numMethods = pUnique->numMethods;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pUnique->methods[jj];
+      pUnique->elapsedExclusive += method->elapsedExclusive;
+      pUnique->numCalls[0] += method->numCalls[0];
+      pUnique->numCalls[1] += method->numCalls[1];
+    }
+
+    /* Sort the methods into decreasing order of exclusive time */
+    qsort(pUnique->methods, numMethods, sizeof(MethodEntry*), compareElapsedExclusive);
+  }
+
+  /* Allocate an array of pointers to the methods for more efficient sorting. */
+  UniqueMethodEntry** pUniqueMethods = new UniqueMethodEntry*[traceData->numUniqueMethods];
+  for (int32_t ii = 0; ii < traceData->numUniqueMethods; ++ii)
+    pUniqueMethods[ii] = &traceData->uniqueMethods[ii];
+
+  /* Sort the methods into decreasing order of exclusive time */
+  qsort(pUniqueMethods, traceData->numUniqueMethods, sizeof(UniqueMethodEntry*),
+        compareUniqueExclusive);
+
+  if (gOptions.outputHtml) {
+    printf(
+        "<div class=\"header\"><span "
+        "class=\"parent\">&nbsp;</span>&nbsp;&nbsp;&nbsp;");
+    printf("Cycles %%/total Cumul.%% &nbsp;Calls+Recur&nbsp; Method</div>\n");
+  } else {
+    printf("   Cycles %%/total Cumul.%%  Calls+Recur  Method\n");
+  }
+
+  double sum = 0;
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < traceData->numUniqueMethods; ++ii) {
+    /* Skip methods with zero cycles */
+    pUnique = pUniqueMethods[ii];
+    if (pUnique->elapsedExclusive == 0) break;
+
+    sum += pUnique->elapsedExclusive;
+    double per = 100.0 * pUnique->elapsedExclusive / total;
+    double sum_per = 100.0 * sum / total;
+    const char* methodName = pUnique->methods[0]->methodName;
+    if (gOptions.outputHtml) {
+      char buf[80];
+
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      printf(
+          "<div class=\"link\" onClick=\"javascript:toggle('e%d')\" "
+          "onMouseOver=\"javascript:onMouseOver(this)\" "
+          "onMouseOut=\"javascript:onMouseOut(this)\"><span class=\"parent\" "
+          "id=\"xe%d\">+</span>",
+          ii, ii);
+      sprintf(buf, "%" PRIu64, pUnique->elapsedExclusive);
+      printHtmlField(buf, 9);
+      printf(" ");
+      sprintf(buf, "%.1f", per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%.1f", sum_per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%d", pUnique->numCalls[0]);
+      printHtmlField(buf, 6);
+      printf("+");
+      sprintf(buf, "%d", pUnique->numCalls[1]);
+      printHtmlField(buf, -6);
+      printf(" ");
+      printf("%s", methodName);
+      printf("</div>\n");
+      printf("<div class=\"parent\" id=\"e%d\">\n", ii);
+    } else {
+      printf("---------------------------------------------\n");
+      printf("%9" PRIu64 " %7.1f %7.1f %6d+%-6d %s\n",
+             pUnique->elapsedExclusive, per, sum_per, pUnique->numCalls[0],
+             pUnique->numCalls[1], methodName);
+    }
+    int32_t numMethods = pUnique->numMethods;
+    double methodExclusive = pUnique->elapsedExclusive;
+    double sumMethods = 0;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pUnique->methods[jj];
+      const char* className = method->className;
+      const char* signature = method->signature;
+      per = 100.0 * method->elapsedExclusive / methodExclusive;
+      sumMethods += method->elapsedExclusive;
+      sum_per = 100.0 * sumMethods / methodExclusive;
+      if (gOptions.outputHtml) {
+        char buf[80];
+
+        className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+        signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+        printf("<div class=\"leaf\"><span class=\"leaf\">&nbsp;</span>");
+        sprintf(buf, "%" PRIu64, method->elapsedExclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%" PRIu64, method->elapsedInclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", sum_per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%d", method->numCalls[0]);
+        printHtmlField(buf, 6);
+        printf("+");
+        sprintf(buf, "%d", method->numCalls[1]);
+        printHtmlField(buf, -6);
+        printf("&nbsp;");
+        printf("<a href=\"#m%d\">[%d]</a>&nbsp;%s.%s&nbsp;%s", method->index,
+               method->index, className, methodName, signature);
+        printf("</div>\n");
+      } else {
+        printf("%9" PRIu64 " %9" PRIu64 " %7.1f %7.1f %6d+%-6d [%d] %s.%s %s\n",
+               method->elapsedExclusive, method->elapsedInclusive, per, sum_per,
+               method->numCalls[0], method->numCalls[1], method->index,
+               className, methodName, signature);
+      }
+    }
+    if (gOptions.outputHtml) {
+      printf("</div>\n");
+    }
+  }
+}
+
+/*
+ * Read the key and data files and return the MethodEntries for those files
+ */
+DataKeys* parseDataKeys(TraceData* traceData, const char* traceFileName, uint64_t* threadTime) {
+  MethodEntry* caller;
+
+  FILE* dataFp = fopen(traceFileName, "rb");
+  if (dataFp == nullptr) return nullptr;
+
+  DataKeys* dataKeys = parseKeys(dataFp, 0);
+  if (dataKeys == nullptr) {
+    fclose(dataFp);
+    return nullptr;
+  }
+
+  DataHeader dataHeader;
+  if (parseDataHeader(dataFp, &dataHeader) < 0) {
+    fclose(dataFp);
+    return dataKeys;
+  }
+
+#if 0
+  FILE* dumpStream = fopen("debug", "w");
+#endif
+  while (1) {
+    /*
+     * Extract values from file.
+     */
+    int32_t threadId;
+    uint32_t methodVal;
+    uint64_t currentTime;
+    if (readDataRecord(dataFp, &dataHeader, &threadId, &methodVal, &currentTime))
+      break;
+
+    int32_t action = METHOD_ACTION(methodVal);
+    int64_t methodId = METHOD_ID(methodVal);
+
+    /* Get the call stack for this thread */
+    CallStack* pStack = traceData->stacks[threadId];
+
+    /* If there is no call stack yet for this thread, then allocate one */
+    if (pStack == nullptr) {
+      pStack = new CallStack();
+      pStack->top = 0;
+      pStack->lastEventTime = currentTime;
+      pStack->threadStartTime = currentTime;
+      traceData->stacks[threadId] = pStack;
+    }
+
+    /* Lookup the current method */
+    MethodEntry* method = lookupMethod(dataKeys, methodId);
+    if (method == nullptr) method = &dataKeys->methods[UNKNOWN_INDEX];
+
+#if 0
+    if (method->methodName) {
+      fprintf(dumpStream, "%2d %-8llu %d %8llu r %d c %d %s.%s %s\n",
+              threadId, currentTime, action, pStack->threadStartTime,
+              method->recursiveEntries,
+              pStack->top, method->className, method->methodName,
+              method->signature);
+    } else {
+      fprintf(dumpStream, "%2d %-8llu %d %8llu r %d c %d %s\n",
+              threadId, currentTime, action, pStack->threadStartTime,
+              method->recursiveEntries,
+              pStack->top, method->className);
+    }
+#endif
+
+    if (action == METHOD_TRACE_ENTER) {
+      /* This is a method entry */
+      if (pStack->top >= MAX_STACK_DEPTH) {
+        fprintf(stderr, "Stack overflow (exceeded %d frames)\n",
+                MAX_STACK_DEPTH);
+        exit(1);
+      }
+
+      /* Get the caller method */
+      if (pStack->top >= 1)
+        caller = pStack->calls[pStack->top - 1].method;
+      else
+        caller = &dataKeys->methods[TOPLEVEL_INDEX];
+      countRecursiveEntries(pStack, pStack->top, caller);
+      caller->elapsedExclusive += currentTime - pStack->lastEventTime;
+#if 0
+      if (caller->elapsedExclusive > 10000000)
+        fprintf(dumpStream, "%llu current %llu last %llu diff %llu\n",
+                caller->elapsedExclusive, currentTime,
+                pStack->lastEventTime,
+                currentTime - pStack->lastEventTime);
+#endif
+      if (caller->recursiveEntries <= 1) {
+        caller->topExclusive += currentTime - pStack->lastEventTime;
+      }
+
+      /* Push the method on the stack for this thread */
+      pStack->calls[pStack->top].method = method;
+      pStack->calls[pStack->top++].entryTime = currentTime;
+    } else {
+      /* This is a method exit */
+      uint64_t entryTime = 0;
+
+      /* Pop the method off the stack for this thread */
+      if (pStack->top > 0) {
+        pStack->top -= 1;
+        entryTime = pStack->calls[pStack->top].entryTime;
+        if (method != pStack->calls[pStack->top].method) {
+          if (method->methodName) {
+            fprintf(stderr, "Exit from method %s.%s %s does not match stack:\n",
+                    method->className, method->methodName, method->signature);
+          } else {
+            fprintf(stderr, "Exit from method %s does not match stack:\n",
+                    method->className);
+          }
+          stackDump(pStack, pStack->top + 1);
+          exit(1);
+        }
+      }
+
+      /* Get the caller method */
+      if (pStack->top >= 1)
+        caller = pStack->calls[pStack->top - 1].method;
+      else
+        caller = &dataKeys->methods[TOPLEVEL_INDEX];
+      countRecursiveEntries(pStack, pStack->top, caller);
+      countRecursiveEntries(pStack, pStack->top, method);
+      uint64_t elapsed = currentTime - entryTime;
+      addInclusiveTime(caller, method, elapsed);
+      method->elapsedExclusive += currentTime - pStack->lastEventTime;
+      if (method->recursiveEntries == 0) {
+        method->topExclusive += currentTime - pStack->lastEventTime;
+      }
+    }
+    /* Remember the time of the last entry or exit event */
+    pStack->lastEventTime = currentTime;
+  }
+
+  /* If we have calls on the stack when the trace ends, then clean
+   * up the stack and add time to the callers by pretending that we
+   * are exiting from their methods now.
+   */
+  uint64_t sumThreadTime = 0;
+  for (int32_t threadId = 0; threadId < MAX_THREADS; ++threadId) {
+    CallStack* pStack = traceData->stacks[threadId];
+
+    /* If this thread never existed, then continue with next thread */
+    if (pStack == nullptr) continue;
+
+    /* Also, add up the time taken by all of the threads */
+    sumThreadTime += pStack->lastEventTime - pStack->threadStartTime;
+
+    for (int32_t ii = 0; ii < pStack->top; ++ii) {
+      if (ii == 0)
+        caller = &dataKeys->methods[TOPLEVEL_INDEX];
+      else
+        caller = pStack->calls[ii - 1].method;
+      MethodEntry* method = pStack->calls[ii].method;
+      countRecursiveEntries(pStack, ii, caller);
+      countRecursiveEntries(pStack, ii, method);
+
+      uint64_t entryTime = pStack->calls[ii].entryTime;
+      uint64_t elapsed = pStack->lastEventTime - entryTime;
+      addInclusiveTime(caller, method, elapsed);
+    }
+  }
+  caller = &dataKeys->methods[TOPLEVEL_INDEX];
+  caller->elapsedInclusive = sumThreadTime;
+
+#if 0
+  fclose(dumpStream);
+#endif
+
+  if (threadTime != nullptr) {
+    *threadTime = sumThreadTime;
+  }
+
+  fclose(dataFp);
+  return dataKeys;
+}
+
+MethodEntry** parseMethodEntries(DataKeys* dataKeys) {
+  /* Create a new array of pointers to the methods and sort the pointers
+   * instead of the actual MethodEntry structs.  We need to do this
+   * because there are other lists that contain pointers to the
+   * MethodEntry structs.
+   */
+  MethodEntry** pMethods = new MethodEntry*[dataKeys->numMethods];
+  for (int32_t ii = 0; ii < dataKeys->numMethods; ++ii) {
+    MethodEntry* entry = &dataKeys->methods[ii];
+    pMethods[ii] = entry;
+  }
+
+  return pMethods;
+}
+
+/*
+ * Produce a function profile from the following methods
+ */
+void profileTrace(TraceData* traceData, MethodEntry** pMethods, int32_t numMethods,
+                  uint64_t sumThreadTime) {
+  /* Print the html header, if necessary */
+  if (gOptions.outputHtml) {
+    printf(htmlHeader, gOptions.sortableUrl);
+    outputTableOfContents();
+  }
+
+  printExclusiveProfile(pMethods, numMethods, sumThreadTime);
+  printInclusiveProfile(pMethods, numMethods, sumThreadTime);
+
+  createClassList(traceData, pMethods, numMethods);
+  printClassProfiles(traceData, sumThreadTime);
+
+  createUniqueMethodList(traceData, pMethods, numMethods);
+  printMethodProfiles(traceData, sumThreadTime);
+
+  if (gOptions.outputHtml) {
+    printf("%s", htmlFooter);
+  }
+}
+
+int32_t compareMethodNamesForDiff(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+    return compareClassNames(a, b);
+  }
+  int32_t result = strcmp(methodA->methodName, methodB->methodName);
+  if (result == 0) {
+    result = strcmp(methodA->signature, methodB->signature);
+    if (result == 0) {
+      return strcmp(methodA->className, methodB->className);
+    }
+  }
+  return result;
+}
+
+int32_t findMatch(MethodEntry** methods, int32_t size, MethodEntry* matchThis) {
+  for (int32_t i = 0; i < size; i++) {
+    MethodEntry* method = methods[i];
+
+    if (method != nullptr && !compareMethodNamesForDiff(&method, &matchThis)) {
+      // printf("%s.%s == %s.%s<br>\n", matchThis->className, matchThis->methodName,
+      //        method->className, method->methodName);
+
+      return i;
+      // if (!compareMethodNames(&method, &matchThis)) return i;
+    }
+  }
+
+  return -1;
+}
+
+int32_t compareDiffEntriesExculsive(const void* a, const void* b) {
+  const DiffEntry* entryA = (const DiffEntry*) a;
+  const DiffEntry* entryB = (const DiffEntry*) b;
+
+  if (entryA->differenceExclusive < entryB->differenceExclusive) {
+    return 1;
+  } else if (entryA->differenceExclusive > entryB->differenceExclusive) {
+    return -1;
+  }
+
+  return 0;
+}
+
+int32_t compareDiffEntriesInculsive(const void* a, const void* b) {
+  const DiffEntry* entryA = (const DiffEntry*) a;
+  const DiffEntry* entryB = (const DiffEntry*) b;
+
+  if (entryA->differenceInclusive < entryB->differenceInclusive) {
+    return 1;
+  } else if (entryA->differenceInclusive > entryB->differenceInclusive) {
+    return -1;
+  }
+
+  return 0;
+}
+
+void printMissingMethod(MethodEntry* method) {
+  char classBuf[HTML_BUFSIZE];
+  char methodBuf[HTML_BUFSIZE];
+
+  char* className = htmlEscape(method->className, classBuf, HTML_BUFSIZE);
+  char* methodName = htmlEscape(method->methodName, methodBuf, HTML_BUFSIZE);
+
+  if (gOptions.outputHtml) printf("<tr><td>\n");
+
+  printf("%s.%s ", className, methodName);
+  if (gOptions.outputHtml) printf("</td><td>");
+
+  printf("%" PRIu64 " ", method->elapsedExclusive);
+  if (gOptions.outputHtml) printf("</td><td>");
+
+  printf("%" PRIu64 " ", method->elapsedInclusive);
+  if (gOptions.outputHtml) printf("</td><td>");
+
+  printf("%d\n", method->numCalls[0]);
+  if (gOptions.outputHtml) printf("</td><td>\n");
+}
+
+void createDiff(DataKeys* d1, DataKeys* d2) {
+  MethodEntry** methods1 = parseMethodEntries(d1);
+  MethodEntry** methods2 = parseMethodEntries(d2);
+
+  // sort and assign the indicies
+  qsort(methods1, d1->numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+  for (int32_t i = 0; i < d1->numMethods; ++i) {
+    methods1[i]->index = i;
+  }
+
+  qsort(methods2, d2->numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+  for (int32_t i = 0; i < d2->numMethods; ++i) {
+    methods2[i]->index = i;
+  }
+
+  int32_t max = (d1->numMethods < d2->numMethods) ? d2->numMethods : d1->numMethods;
+  max++;
+  DiffEntry* diffs = new DiffEntry[max];
+  memset(diffs, 0, max * sizeof(DiffEntry));
+  DiffEntry* ptr = diffs;
+
+  // printf("<br>d1->numMethods: %d d1->numMethods: %d<br>\n",
+  //        d1->numMethods, d2->numMethods);
+
+  int32_t matches = 0;
+
+  for (int32_t i = 0; i < d1->numMethods; i++) {
+    int32_t match = findMatch(methods2, d2->numMethods, methods1[i]);
+    if (match >= 0) {
+      ptr->method1 = methods1[i];
+      ptr->method2 = methods2[match];
+
+      uint64_t e1 = ptr->method1->elapsedExclusive;
+      uint64_t e2 = ptr->method2->elapsedExclusive;
+      if (e1 > 0) {
+        ptr->differenceExclusive = e2 - e1;
+        ptr->differenceExclusivePercentage = (static_cast<double>(e2) /
+                                              static_cast<double>(e1)) * 100.0;
+      }
+
+      uint64_t i1 = ptr->method1->elapsedInclusive;
+      uint64_t i2 = ptr->method2->elapsedInclusive;
+      if (i1 > 0) {
+        ptr->differenceInclusive = i2 - i1;
+        ptr->differenceInclusivePercentage = (static_cast<double>(i2) /
+                                              static_cast<double>(i1)) * 100.0;
+      }
+
+      // clear these out so we don't find them again and we know which ones
+      // we have left over
+      methods1[i] = nullptr;
+      methods2[match] = nullptr;
+      ptr++;
+
+      matches++;
+    }
+  }
+  ptr->method1 = nullptr;
+  ptr->method2 = nullptr;
+
+  qsort(diffs, matches, sizeof(DiffEntry), compareDiffEntriesExculsive);
+  ptr = diffs;
+
+  if (gOptions.outputHtml) {
+    printf(htmlHeader, gOptions.sortableUrl);
+    printf("<h3>Table of Contents</h3>\n");
+    printf("<ul>\n");
+    printf("<li><a href='#exclusive'>Exclusive</a>\n");
+    printf("<li><a href='#inclusive'>Inclusive</a>\n");
+    printf("</ul>\n");
+    printf("Run 1: %s<br>\n", gOptions.diffFileName);
+    printf("Run 2: %s<br>\n", gOptions.traceFileName);
+    printf("<a name=\"exclusive\"></a><h3 id=\"exclusive\">Exclusive</h3>\n");
+    printf(tableHeader, "exclusive_table");
+  }
+
+  char classBuf[HTML_BUFSIZE];
+  char methodBuf[HTML_BUFSIZE];
+  while (ptr->method1 != nullptr && ptr->method2 != nullptr) {
+    if (gOptions.outputHtml) printf("<tr><td>\n");
+
+    char* className = htmlEscape(ptr->method1->className, classBuf, HTML_BUFSIZE);
+    char* methodName = htmlEscape(ptr->method1->methodName, methodBuf, HTML_BUFSIZE);
+
+    printf("%s.%s ", className, methodName);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method1->elapsedExclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method2->elapsedExclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->differenceExclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%.2f\n", ptr->differenceExclusivePercentage);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method1->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method2->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td></tr>\n");
+
+    ptr++;
+  }
+
+  if (gOptions.outputHtml) printf("</table>\n");
+
+  if (gOptions.outputHtml) {
+    printf(htmlHeader, gOptions.sortableUrl);
+    printf("Run 1: %s<br>\n", gOptions.diffFileName);
+    printf("Run 2: %s<br>\n", gOptions.traceFileName);
+    printf("<a name=\"inclusive\"></a><h3 id=\"inculisve\">Inclusive</h3>\n");
+    printf(tableHeader, "inclusive_table");
+  }
+
+  qsort(diffs, matches, sizeof(DiffEntry), compareDiffEntriesInculsive);
+  ptr = diffs;
+
+  while (ptr->method1 != nullptr && ptr->method2 != nullptr) {
+    if (gOptions.outputHtml) printf("<tr><td>\n");
+
+    char* className = htmlEscape(ptr->method1->className, classBuf, HTML_BUFSIZE);
+    char* methodName = htmlEscape(ptr->method1->methodName, methodBuf, HTML_BUFSIZE);
+
+    printf("%s.%s ", className, methodName);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method1->elapsedInclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method2->elapsedInclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->differenceInclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%.2f\n", ptr->differenceInclusivePercentage);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method1->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method2->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td></tr>\n");
+
+    ptr++;
+  }
+
+  if (gOptions.outputHtml) {
+    printf("</table>\n");
+    printf("<h3>Run 1 methods not found in Run 2</h3>");
+    printf(tableHeaderMissing, "?");
+  }
+
+  for (int32_t i = 0; i < d1->numMethods; ++i) {
+    if (methods1[i] != nullptr) {
+      printMissingMethod(methods1[i]);
+    }
+  }
+
+  if (gOptions.outputHtml) {
+    printf("</table>\n");
+    printf("<h3>Run 2 methods not found in Run 1</h3>");
+    printf(tableHeaderMissing, "?");
+  }
+
+  for (int32_t i = 0; i < d2->numMethods; ++i) {
+    if (methods2[i] != nullptr) {
+      printMissingMethod(methods2[i]);
+    }
+  }
+
+  if (gOptions.outputHtml) printf("</body></html\n");
+}
+
+int32_t usage(const char* program) {
+  fprintf(stderr, "Copyright (C) 2006 The Android Open Source Project\n\n");
+  fprintf(stderr,
+          "usage: %s [-ho] [-s sortable] [-d trace-file-name] [-g outfile] "
+          "trace-file-name\n",
+          program);
+  fprintf(stderr, "  -d trace-file-name  - Diff with this trace\n");
+  fprintf(stderr, "  -g outfile          - Write graph to 'outfile'\n");
+  fprintf(stderr,
+          "  -k                  - When writing a graph, keep the intermediate "
+          "DOT file\n");
+  fprintf(stderr, "  -h                  - Turn on HTML output\n");
+  fprintf(
+      stderr,
+      "  -o                  - Dump the dmtrace file instead of profiling\n");
+  fprintf(stderr,
+          "  -s                  - URL base to where the sortable javascript "
+          "file\n");
+  fprintf(stderr,
+          "  -t threshold        - Threshold percentage for including nodes in "
+          "the graph\n");
+  return 2;
+}
+
+// Returns true if there was an error
+int32_t parseOptions(int32_t argc, char** argv) {
+  while (1) {
+    int32_t opt = getopt(argc, argv, "d:hg:kos:t:");
+    if (opt == -1) break;
+    switch (opt) {
+      case 'd':
+        gOptions.diffFileName = optarg;
+        break;
+      case 'g':
+        gOptions.graphFileName = optarg;
+        break;
+      case 'k':
+        gOptions.keepDotFile = 1;
+        break;
+      case 'h':
+        gOptions.outputHtml = 1;
+        break;
+      case 'o':
+        gOptions.dump = 1;
+        break;
+      case 's':
+        gOptions.sortableUrl = optarg;
+        break;
+      case 't':
+        gOptions.threshold = atoi(optarg);
+        break;
+      default:
+        return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+ * Parse args.
+ */
+int32_t main(int32_t argc, char** argv) {
+  gOptions.threshold = -1;
+
+  // Parse the options
+  if (parseOptions(argc, argv) || argc - optind != 1) return usage(argv[0]);
+
+  gOptions.traceFileName = argv[optind];
+
+  if (gOptions.threshold < 0 || 100 <= gOptions.threshold) {
+    gOptions.threshold = 20;
+  }
+
+  if (gOptions.dump) {
+    dumpTrace();
+    return 0;
+  }
+
+  uint64_t sumThreadTime = 0;
+
+  TraceData data1;
+  DataKeys* dataKeys = parseDataKeys(&data1, gOptions.traceFileName, &sumThreadTime);
+  if (dataKeys == nullptr) {
+    fprintf(stderr, "Cannot read \"%s\".\n", gOptions.traceFileName);
+    exit(1);
+  }
+
+  if (gOptions.diffFileName != nullptr) {
+    uint64_t sum2;
+    TraceData data2;
+    DataKeys* d2 = parseDataKeys(&data2, gOptions.diffFileName, &sum2);
+    if (d2 == nullptr) {
+      fprintf(stderr, "Cannot read \"%s\".\n", gOptions.diffFileName);
+      exit(1);
+    }
+
+    createDiff(d2, dataKeys);
+
+    freeDataKeys(d2);
+  } else {
+    MethodEntry** methods = parseMethodEntries(dataKeys);
+    profileTrace(&data1, methods, dataKeys->numMethods, sumThreadTime);
+    if (gOptions.graphFileName != nullptr) {
+      createInclusiveProfileGraphNew(dataKeys);
+    }
+    free(methods);
+  }
+
+  freeDataKeys(dataKeys);
+
+  return 0;
+}
diff --git a/tools/extract-embedded-java b/tools/extract-embedded-java
new file mode 100755
index 0000000..e966552
--- /dev/null
+++ b/tools/extract-embedded-java
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ "$#" -ne "2" ]; then
+  echo "Usage: ./extract_embedded_java.sh smali_dir java_dir"
+  exit 1
+fi
+
+# Check the input and output are directories
+[[ -d "$1" ]] || exit 1
+[[ -d "$2" ]] || exit 1
+
+# For every file which has the file extension smali, set $f to be the name without
+# .smali and then:
+for f in `find "$1" -type f -name "*.smali" | xargs -n 1 -P 0 -i basename -s .smali \{\}`; do
+  # remove all lines except those starting with '# ', remove the '#' then print
+  # it to a file ${name}.java. Do this concurrently.
+  grep "^# " "$1/${f}.smali" | sed "s:# ::" > "${2}/${f}.java" &
+done
+
+# wait for all the files to be written
+wait
diff --git a/tools/generate-operator-out.py b/tools/generate-operator-out.py
index c74508d..3bd62fe 100755
--- a/tools/generate-operator-out.py
+++ b/tools/generate-operator-out.py
@@ -86,8 +86,10 @@
       if m:
         enclosing_classes.append(m.group(1))
         continue
-      m = re.compile(r'^\s*\}( .*)?;').search(raw_line)
-      if m:
+
+      # End of class/struct -- be careful not to match "do { ... } while" constructs by accident
+      m = re.compile(r'^\s*\}(\s+)?(while)?(.+)?;').search(raw_line)
+      if m and not m.group(2):
         enclosing_classes = enclosing_classes[0:len(enclosing_classes) - 1]
         continue
 
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 7ada189..9a8b462 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -157,5 +157,12 @@
   modes: [device],
   names: ["libcore.java.util.TimeZoneTest#testAllDisplayNames"],
   bug: 22786792
+},
+{
+  description: "Lack of IPv6 on some buildbot slaves",
+  result: EXEC_FAILED,
+  names: ["libcore.io.OsTest#test_byteBufferPositions_sendto_recvfrom_af_inet6",
+          "libcore.io.OsTest#test_sendtoSocketAddress_af_inet6"],
+  bug: 25178637
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index edec362..de27a6f 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -30,8 +30,6 @@
 
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
-# We use Quick's image on target because optimizing's image is not compiled debuggable.
-image="-Ximage:/data/art-test/core.art"
 args=$@
 debuggee_args="-Xcompiler-option --debuggable"
 device_dir="--device-dir=/data/local/tmp"
@@ -41,6 +39,8 @@
 image_compiler_option=""
 debug="no"
 verbose="no"
+image="-Ximage:/data/art-test/core-jit.art"
+vm_args=""
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
 
@@ -88,7 +88,11 @@
   fi
 done
 
-vm_args="--vm-arg $image"
+if [[ "$image" != "" ]]; then
+  vm_args="--vm-arg $image"
+fi
+vm_args="$vm_args --vm-arg -Xusejit:true"
+debuggee_args="$debuggee_args -Xusejit:true"
 if [[ $debug == "yes" ]]; then
   art="$art -d"
   art_debugee="$art_debugee -d"
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 80f7a37..4b5a5ca 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -57,7 +57,6 @@
                   "org.apache.harmony.luni"
                   "org.apache.harmony.nio"
                   "org.apache.harmony.regex"
-                  "org.apache.harmony.security"
                   "org.apache.harmony.testframework"
                   "org.apache.harmony.tests.java.io"
                   "org.apache.harmony.tests.java.lang"
@@ -68,6 +67,10 @@
                   "tests.java.lang.String"
                   "jsr166")
 
+# List of packages we could run, but don't have rights to revert
+# changes in case of failures.
+# "org.apache.harmony.security"
+
 vogar_args=$@
 while true; do
   if [[ "$1" == "--mode=device" ]]; then
@@ -102,4 +105,4 @@
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
+vogar $vogar_args --vm-arg -Xusejit:true --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 7faf86e..d5b8989 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -30,3 +30,10 @@
 
 echo -e "${green}List properties${nc}"
 adb shell getprop
+
+echo -e "${green}Uptime${nc}"
+adb shell uptime
+
+echo -e "${green}Kill stalled dalvikvm processes${nc}"
+processes=$(adb shell "ps" | grep dalvikvm | awk '{print $2}')
+for i in $processes; do adb shell kill -9 $i; done