Merge "Move backward branch instrumentation to all branch."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index af64470..e3f0c24 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -39,6 +39,7 @@
   NonStaticLeafMethods \
   ProtoCompare \
   ProtoCompare2 \
+  ProfileTestMultiDex \
   StaticLeafMethods \
   Statics \
   StaticsFromCode \
@@ -65,7 +66,7 @@
 
 # Dex file dependencies for each gtest.
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
-ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
+ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
@@ -78,6 +79,8 @@
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
+ART_GTEST_profile_assistant_test_DEX_DEPS := ProfileTestMultiDex
+ART_GTEST_profile_compilation_info_test_DEX_DEPS := ProfileTestMultiDex
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
@@ -191,13 +194,12 @@
   runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
-  runtime/gc/space/dlmalloc_space_base_test.cc \
   runtime/gc/space/dlmalloc_space_static_test.cc \
   runtime/gc/space/dlmalloc_space_random_test.cc \
-  runtime/gc/space/rosalloc_space_base_test.cc \
+  runtime/gc/space/large_object_space_test.cc \
   runtime/gc/space/rosalloc_space_static_test.cc \
   runtime/gc/space/rosalloc_space_random_test.cc \
-  runtime/gc/space/large_object_space_test.cc \
+  runtime/gc/space/space_create_test.cc \
   runtime/gc/task_processor_test.cc \
   runtime/gtest_test.cc \
   runtime/handle_scope_test.cc \
@@ -208,6 +210,7 @@
   runtime/interpreter/safe_math_test.cc \
   runtime/interpreter/unstarted_runtime_test.cc \
   runtime/java_vm_ext_test.cc \
+  runtime/jit/profile_compilation_info_test.cc \
   runtime/lambda/closure_test.cc \
   runtime/lambda/shorty_field_type_test.cc \
   runtime/leb128_test.cc \
@@ -267,6 +270,7 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
+  compiler/profile_assistant_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4589736..6eeef3f 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -143,7 +143,9 @@
 	jni/quick/arm64/calling_convention_arm64.cc \
 	linker/arm64/relative_patcher_arm64.cc \
 	optimizing/code_generator_arm64.cc \
+	optimizing/instruction_simplifier_arm.cc \
 	optimizing/instruction_simplifier_arm64.cc \
+	optimizing/instruction_simplifier_shared.cc \
 	optimizing/intrinsics_arm64.cc \
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 9594dce..afc8463 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -168,6 +168,12 @@
   return nullptr;
 }
 
+// Get ProfileCompilationInfo that should be passed to the driver.
+ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
+  // Null, profile information will not be taken into account.
+  return nullptr;
+}
+
 void CommonCompilerTest::SetUp() {
   CommonRuntimeTest::SetUp();
   {
@@ -207,7 +213,7 @@
                                             timer_.get(),
                                             -1,
                                             /* dex_to_oat_map */ nullptr,
-                                            /* profile_compilation_info */ nullptr));
+                                            GetProfileCompilationInfo()));
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
 }
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index b491946..7e0fbab 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,6 +23,7 @@
 
 #include "common_runtime_test.h"
 #include "compiler.h"
+#include "jit/offline_profiling_info.h"
 #include "oat_file.h"
 
 namespace art {
@@ -75,6 +76,8 @@
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetCompiledMethods();
 
+  virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+
   virtual void TearDown();
 
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 658e7d6..c250bd9 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -100,8 +100,12 @@
     } else {
       // The method index is actually the dex PC in this case.
       // Calculate the proper dex file and target method idx.
+
+      // We must be in JIT mode if we get here.
       CHECK(use_jit);
-      CHECK_EQ(invoke_type, kVirtual);
+
+      // The invoke type better be virtual, except for the string init special case above.
+      CHECK_EQ(invoke_type, string_init ? kDirect : kVirtual);
       // Don't devirt if we are in a different dex file since we can't have direct invokes in
       // another dex file unless we always put a direct / patch pointer.
       devirt_target = nullptr;
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 32d7518..4617668 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -110,9 +110,9 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
@@ -153,7 +153,7 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
-static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 82c0e86..4c03e5d 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -31,6 +31,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -240,6 +241,94 @@
   EXPECT_TRUE(expected->empty());
 }
 
+class CompilerDriverProfileTest : public CompilerDriverTest {
+ protected:
+  ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
+    ScopedObjectAccess soa(Thread::Current());
+    std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+
+    ProfileCompilationInfo info;
+    for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+      std::cout << std::string(dex_file->GetLocation());
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+    }
+    return &profile_info_;
+  }
+
+  std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
+    if (clazz == "Main") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Main.getA()",
+          "java.lang.String Main.getB()"});
+    } else if (clazz == "Second") {
+      return std::unordered_set<std::string>({
+          "java.lang.String Second.getX()",
+          "java.lang.String Second.getY()"});
+    } else {
+      return std::unordered_set<std::string>();
+    }
+  }
+
+  void CheckCompiledMethods(jobject class_loader,
+                            const std::string& clazz,
+                            const std::unordered_set<std::string>& expected_methods) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+    ASSERT_NE(klass, nullptr);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    size_t number_of_compiled_methods = 0;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      std::string name = PrettyMethod(&m, true);
+      const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+      ASSERT_NE(code, nullptr);
+      if (expected_methods.find(name) != expected_methods.end()) {
+        number_of_compiled_methods++;
+        EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
+      } else {
+        EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
+      }
+    }
+    EXPECT_EQ(expected_methods.size(), number_of_compiled_methods);
+  }
+
+ private:
+  ProfileCompilationInfo profile_info_;
+};
+
+TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Need to enable dex-file writability. Methods rejected to be compiled will run through the
+  // dex-to-dex compiler.
+  ProfileCompilationInfo info;
+  for (const DexFile* dex_file : GetDexFiles(class_loader)) {
+    ASSERT_TRUE(dex_file->EnableWrite());
+  }
+
+  CompileAll(class_loader);
+
+  std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main");
+  std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second");
+  CheckCompiledMethods(class_loader, "LMain;", m);
+  CheckCompiledMethods(class_loader, "LSecond;", s);
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index b67e8dd..35b3e15 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -29,7 +29,7 @@
   // TODO: Arm S0–S31 register mapping is obsolescent.
   //   We should use VFP-v3/Neon D0-D31 mapping instead.
   //   However, D0 is aliased to pair of S0 and S1, so using that
-  //   mapping we can not easily say S0 is spilled and S1 is not.
+  //   mapping we cannot easily say S0 is spilled and S1 is not.
   //   There are ways around this in DWARF but they are complex.
   //   It would be much simpler to always spill whole D registers.
   //   Arm64 mapping is correct since we already do this there.
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index a7461a5..46484b1 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -100,12 +100,6 @@
       header_.sh_entsize = entsize;
     }
 
-    ~Section() OVERRIDE {
-      if (started_) {
-        CHECK(finished_);
-      }
-    }
-
     // Start writing of this section.
     void Start() {
       CHECK(!started_);
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 3e64762..e03614f 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -212,7 +212,7 @@
     case kNone:
       break;
   }
-  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  LOG(FATAL) << "Cannot write CIE frame for ISA " << isa;
   UNREACHABLE();
 }
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6859605..12132c0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -95,25 +95,37 @@
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0,
-                           0,
-                           0,
-                           compiler_driver_.get(),
-                           writer.get(),
-                           /*compiling_boot_image*/true,
-                           &timings,
-                           &key_value_store);
+      const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
       std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
           compiler_driver_->GetInstructionSet(),
           &compiler_driver_->GetCompilerOptions(),
           oat_file.GetFile());
-      bool success = writer->PrepareImageAddressSpace();
-      ASSERT_TRUE(success);
-
       elf_writer->Start();
-
+      OatWriter oat_writer(/*compiling_boot_image*/true, &timings);
       OutputStream* rodata = elf_writer->StartRoData();
+      for (const DexFile* dex_file : dex_files) {
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writer.AddRawDexFileSource(raw_dex_file,
+                                       dex_file->GetLocation().c_str(),
+                                       dex_file->GetLocationChecksum());
+      }
+      std::unique_ptr<MemMap> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      bool dex_files_ok = oat_writer.WriteAndOpenDexFiles(
+          rodata,
+          oat_file.GetFile(),
+          compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
+          &key_value_store,
+          &opened_dex_files_map,
+          &opened_dex_files);
+      ASSERT_TRUE(dex_files_ok);
+      oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files);
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
       bool rodata_ok = oat_writer.WriteRodata(rodata);
       ASSERT_TRUE(rodata_ok);
       elf_writer->EndRoData(rodata);
@@ -123,12 +135,15 @@
       ASSERT_TRUE(text_ok);
       elf_writer->EndText(text);
 
+      bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+      ASSERT_TRUE(header_ok);
+
       elf_writer->SetBssSize(oat_writer.GetBssSize());
       elf_writer->WriteDynamicSection();
       elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
       elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
-      success = elf_writer->End();
+      bool success = elf_writer->End();
 
       ASSERT_TRUE(success);
     }
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 7a2b74e..c0d15f3 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -38,6 +38,7 @@
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
+#include "utils/test_dex_file_builder.h"
 
 namespace art {
 
@@ -127,23 +128,74 @@
                 const std::vector<const DexFile*>& dex_files,
                 SafeMap<std::string, std::string>& key_value_store) {
     TimingLogger timings("WriteElf", false, false);
-    OatWriter oat_writer(dex_files,
-                         42U,
-                         4096U,
-                         0,
-                         compiler_driver_.get(),
-                         nullptr,
-                         /*compiling_boot_image*/false,
-                         &timings,
-                         &key_value_store);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const DexFile* dex_file : dex_files) {
+      ArrayRef<const uint8_t> raw_dex_file(
+          reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+          dex_file->GetHeader().file_size_);
+      if (!oat_writer.AddRawDexFileSource(raw_dex_file,
+                                          dex_file->GetLocation().c_str(),
+                                          dex_file->GetLocationChecksum())) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const char*>& dex_filenames,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    for (const char* dex_filename : dex_filenames) {
+      if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
+        return false;
+      }
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool WriteElf(File* file,
+                ScopedFd&& zip_fd,
+                const char* location,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+    if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
+      return false;
+    }
+    return DoWriteElf(file, oat_writer, key_value_store);
+  }
+
+  bool DoWriteElf(File* file,
+                  OatWriter& oat_writer,
+                  SafeMap<std::string, std::string>& key_value_store) {
     std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
         compiler_driver_->GetInstructionSet(),
         &compiler_driver_->GetCompilerOptions(),
         file);
-
     elf_writer->Start();
-
     OutputStream* rodata = elf_writer->StartRoData();
+    std::unique_ptr<MemMap> opened_dex_files_map;
+    std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+    if (!oat_writer.WriteAndOpenDexFiles(rodata,
+                                         file,
+                                         compiler_driver_->GetInstructionSet(),
+                                         compiler_driver_->GetInstructionSetFeatures(),
+                                         &key_value_store,
+                                         &opened_dex_files_map,
+                                         &opened_dex_files)) {
+      return false;
+    }
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    std::vector<const DexFile*> dex_files;
+    for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+      dex_files.push_back(dex_file.get());
+      ScopedObjectAccess soa(Thread::Current());
+      class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+    }
+    oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files);
     if (!oat_writer.WriteRodata(rodata)) {
       return false;
     }
@@ -155,6 +207,10 @@
     }
     elf_writer->EndText(text);
 
+    if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) {
+      return false;
+    }
+
     elf_writer->SetBssSize(oat_writer.GetBssSize());
     elf_writer->WriteDynamicSection();
     elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
@@ -167,6 +223,117 @@
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 };
 
+class ZipBuilder {
+ public:
+  explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { }
+
+  bool AddFile(const char* location, const void* data, size_t size) {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    ZipFileHeader file_header;
+    file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size);
+    file_header.compressed_size = size;
+    file_header.uncompressed_size = size;
+    file_header.filename_length = strlen(location);
+
+    if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) ||
+        !zip_file_->WriteFully(location, file_header.filename_length) ||
+        !zip_file_->WriteFully(data, size)) {
+      return false;
+    }
+
+    CentralDirectoryFileHeader cdfh;
+    cdfh.crc32 = file_header.crc32;
+    cdfh.compressed_size = size;
+    cdfh.uncompressed_size = size;
+    cdfh.filename_length = file_header.filename_length;
+    cdfh.relative_offset_of_local_file_header = offset;
+    file_data_.push_back(FileData { cdfh, location });
+    return true;
+  }
+
+  bool Finish() {
+    off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+    if (offset == static_cast<off_t>(-1)) {
+      return false;
+    }
+
+    size_t central_directory_size = 0u;
+    for (const FileData& file_data : file_data_) {
+      if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) ||
+          !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) {
+        return false;
+      }
+      central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length;
+    }
+    EndOfCentralDirectoryRecord eocd_record;
+    eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size();
+    eocd_record.total_number_of_central_directory_records = file_data_.size();
+    eocd_record.size_of_central_directory = central_directory_size;
+    eocd_record.offset_of_start_of_central_directory = offset;
+    return
+        zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) &&
+        zip_file_->Flush() == 0;
+  }
+
+ private:
+  struct PACKED(1) ZipFileHeader {
+    uint32_t signature = 0x04034b50;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+  };
+
+  struct PACKED(1) CentralDirectoryFileHeader {
+    uint32_t signature = 0x02014b50;
+    uint16_t version_made_by = 10;
+    uint16_t version_needed_to_extract = 10;
+    uint16_t general_purpose_bit_flag = 0;
+    uint16_t compression_method = 0;            // 0 = store only.
+    uint16_t file_last_modification_time = 0u;
+    uint16_t file_last_modification_date = 0u;
+    uint32_t crc32;
+    uint32_t compressed_size;
+    uint32_t uncompressed_size;
+    uint16_t filename_length;
+    uint16_t extra_field_length = 0u;           // No extra fields.
+    uint16_t file_comment_length = 0u;          // No file comment.
+    uint16_t disk_number_where_file_starts = 0u;
+    uint16_t internal_file_attributes = 0u;
+    uint32_t external_file_attributes = 0u;
+    uint32_t relative_offset_of_local_file_header;
+  };
+
+  struct PACKED(1) EndOfCentralDirectoryRecord {
+    uint32_t signature = 0x06054b50;
+    uint16_t number_of_this_disk = 0u;
+    uint16_t disk_where_central_directory_starts = 0u;
+    uint16_t number_of_central_directory_records_on_this_disk;
+    uint16_t total_number_of_central_directory_records;
+    uint32_t size_of_central_directory;
+    uint32_t offset_of_start_of_central_directory;
+    uint16_t comment_length = 0u;               // No file comment.
+  };
+
+  struct FileData {
+    CentralDirectoryFileHeader cdfh;
+    const char* location;
+  };
+
+  File* zip_file_;
+  std::vector<FileData> file_data_;
+};
+
 TEST_F(OatTest, WriteRead) {
   TimingLogger timings("OatTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -327,4 +494,189 @@
   EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
 }
 
+TEST_F(OatTest, DexFileInput) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  std::vector<const char*> input_filenames;
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "int", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()I", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file1.GetFilename().c_str());
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  input_filenames.push_back(dex_file2.GetFilename().c_str());
+
+  ScratchFile oat_file;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+  ASSERT_TRUE(success);
+
+  std::string error_msg;
+  std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                         oat_file.GetFilename(),
+                                                         nullptr,
+                                                         nullptr,
+                                                         false,
+                                                         nullptr,
+                                                         &error_msg));
+  ASSERT_TRUE(opened_oat_file != nullptr);
+  ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+  std::unique_ptr<const DexFile> opened_dex_file1 =
+      opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+  std::unique_ptr<const DexFile> opened_dex_file2 =
+      opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+  ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                      &opened_dex_file1->GetHeader(),
+                      dex_file1_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation());
+
+  ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+  ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                      &opened_dex_file2->GetHeader(),
+                      dex_file2_data->GetHeader().file_size_));
+  ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+}
+
+TEST_F(OatTest, ZipFileInput) {
+  TimingLogger timings("OatTest::DexFileInput", false, false);
+
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+
+  ScratchFile dex_file1;
+  TestDexFileBuilder builder1;
+  builder1.AddField("Lsome.TestClass;", "long", "someField");
+  builder1.AddMethod("Lsome.TestClass;", "()D", "foo");
+  std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+  bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+                                                 dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file1.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes.dex",
+                                &dex_file1_data->GetHeader(),
+                                dex_file1_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  ScratchFile dex_file2;
+  TestDexFileBuilder builder2;
+  builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+  builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+  std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+  success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+                                            dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+  success = dex_file2.GetFile()->Flush() == 0;
+  ASSERT_TRUE(success);
+  success = zip_builder.AddFile("classes2.dex",
+                                &dex_file2_data->GetHeader(),
+                                dex_file2_data->GetHeader().file_size_);
+  ASSERT_TRUE(success);
+
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  {
+    // Test using the AddDexFileSource() interface with the zip file.
+    std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+    ASSERT_TRUE(success);
+
+    std::string error_msg;
+    std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                           oat_file.GetFilename(),
+                                                           nullptr,
+                                                           nullptr,
+                                                           false,
+                                                           nullptr,
+                                                           &error_msg));
+    ASSERT_TRUE(opened_oat_file != nullptr);
+    ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+    std::unique_ptr<const DexFile> opened_dex_file1 =
+        opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> opened_dex_file2 =
+        opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+    ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                        &opened_dex_file1->GetHeader(),
+                        dex_file1_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+              opened_dex_file1->GetLocation());
+
+    ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                        &opened_dex_file2->GetHeader(),
+                        dex_file2_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+              opened_dex_file2->GetLocation());
+  }
+
+  {
+    // Test using the AddZipDexFileSource() interface with the zip file handle.
+    ScopedFd zip_fd(dup(zip_file.GetFd()));
+    ASSERT_NE(-1, zip_fd.get());
+
+    ScratchFile oat_file;
+    success = WriteElf(oat_file.GetFile(),
+                       std::move(zip_fd),
+                       zip_file.GetFilename().c_str(),
+                       key_value_store);
+    ASSERT_TRUE(success);
+
+    std::string error_msg;
+    std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+                                                           oat_file.GetFilename(),
+                                                           nullptr,
+                                                           nullptr,
+                                                           false,
+                                                           nullptr,
+                                                           &error_msg));
+    ASSERT_TRUE(opened_oat_file != nullptr);
+    ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+    std::unique_ptr<const DexFile> opened_dex_file1 =
+        opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> opened_dex_file2 =
+        opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+    ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+                        &opened_dex_file1->GetHeader(),
+                        dex_file1_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+              opened_dex_file1->GetLocation());
+
+    ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+    ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+                        &opened_dex_file2->GetHeader(),
+                        dex_file2_data->GetHeader().file_size_));
+    ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+              opened_dex_file2->GetLocation());
+  }
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 025e35e..c74c41f 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -16,12 +16,14 @@
 
 #include "oat_writer.h"
 
+#include <unistd.h>
 #include <zlib.h>
 
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method-inl.h"
 #include "base/allocator.h"
 #include "base/bit_vector.h"
+#include "base/file_magic.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -49,9 +51,77 @@
 #include "type_lookup_table.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
+#include "zip_archive.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
+
+const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
+    return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
+}
+
+}  // anonymous namespace
+
+// Defines the location of the raw dex file to write.
+class OatWriter::DexFileSource {
+ public:
+  explicit DexFileSource(ZipEntry* zip_entry)
+      : type_(kZipEntry), source_(zip_entry) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(File* raw_file)
+      : type_(kRawFile), source_(raw_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  explicit DexFileSource(const uint8_t* dex_file)
+      : type_(kRawData), source_(dex_file) {
+    DCHECK(source_ != nullptr);
+  }
+
+  bool IsZipEntry() const { return type_ == kZipEntry; }
+  bool IsRawFile() const { return type_ == kRawFile; }
+  bool IsRawData() const { return type_ == kRawData; }
+
+  ZipEntry* GetZipEntry() const {
+    DCHECK(IsZipEntry());
+    DCHECK(source_ != nullptr);
+    return static_cast<ZipEntry*>(const_cast<void*>(source_));
+  }
+
+  File* GetRawFile() const {
+    DCHECK(IsRawFile());
+    DCHECK(source_ != nullptr);
+    return static_cast<File*>(const_cast<void*>(source_));
+  }
+
+  const uint8_t* GetRawData() const {
+    DCHECK(IsRawData());
+    DCHECK(source_ != nullptr);
+    return static_cast<const uint8_t*>(source_);
+  }
+
+  void Clear() {
+    type_ = kNone;
+    source_ = nullptr;
+  }
+
+ private:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
+  Type type_;
+  const void* source_;
+};
+
 class OatWriter::OatClass {
  public:
   OatClass(size_t offset,
@@ -116,11 +186,30 @@
 
 class OatWriter::OatDexFile {
  public:
-  OatDexFile(size_t offset, const DexFile& dex_file);
+  OatDexFile(const char* dex_file_location,
+             DexFileSource source,
+             CreateTypeLookupTable create_type_lookup_table);
   OatDexFile(OatDexFile&& src) = default;
 
+  const char* GetLocation() const {
+    return dex_file_location_data_;
+  }
+
+  void ReserveTypeLookupTable(OatWriter* oat_writer);
+  void ReserveClassOffsets(OatWriter* oat_writer);
+
   size_t SizeOf() const;
-  bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+  bool Write(OatWriter* oat_writer, OutputStream* out) const;
+  bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
+
+  // The source of the dex file.
+  DexFileSource source_;
+
+  // Whether to create the type lookup table.
+  CreateTypeLookupTable create_type_lookup_table_;
+
+  // Dex file size. Initialized when writing the dex file.
+  size_t dex_file_size_;
 
   // Offset of start of OatDexFile from beginning of OatHeader. It is
   // used to validate file position when writing.
@@ -128,11 +217,13 @@
 
   // Data to write.
   uint32_t dex_file_location_size_;
-  const uint8_t* dex_file_location_data_;
+  const char* dex_file_location_data_;
   uint32_t dex_file_location_checksum_;
   uint32_t dex_file_offset_;
+  uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
-  TypeLookupTable* lookup_table_;  // Owned by the dex file.
+
+  // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
  private:
@@ -151,26 +242,20 @@
   DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " offset_=" << offset_
 
-OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
-                     uint32_t image_file_location_oat_checksum,
-                     uintptr_t image_file_location_oat_begin,
-                     int32_t image_patch_delta,
-                     const CompilerDriver* compiler,
-                     ImageWriter* image_writer,
-                     bool compiling_boot_image,
-                     TimingLogger* timings,
-                     SafeMap<std::string, std::string>* key_value_store)
-  : compiler_driver_(compiler),
-    image_writer_(image_writer),
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+  : write_state_(WriteState::kAddingDexFileSources),
+    timings_(timings),
+    raw_dex_files_(),
+    zip_archives_(),
+    zipped_dex_files_(),
+    zipped_dex_file_locations_(),
+    compiler_driver_(nullptr),
+    image_writer_(nullptr),
     compiling_boot_image_(compiling_boot_image),
-    dex_files_(&dex_files),
+    dex_files_(nullptr),
     size_(0u),
     bss_size_(0u),
     oat_data_offset_(0u),
-    image_file_location_oat_checksum_(image_file_location_oat_checksum),
-    image_file_location_oat_begin_(image_file_location_oat_begin),
-    image_patch_delta_(image_patch_delta),
-    key_value_store_(key_value_store),
     oat_header_(nullptr),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
@@ -197,55 +282,192 @@
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
     size_oat_dex_file_offset_(0),
+    size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
-    size_oat_dex_file_class_offsets_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
+    size_oat_class_offsets_alignment_(0),
+    size_oat_class_offsets_(0),
     size_oat_class_type_(0),
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
     method_offset_map_() {
-  CHECK(key_value_store != nullptr);
-  if (compiling_boot_image) {
-    CHECK(image_writer != nullptr);
+}
+
+bool OatWriter::AddDexFileSource(const char* filename,
+                                 const char* location,
+                                 CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  uint32_t magic;
+  std::string error_msg;
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+  if (fd.get() == -1) {
+    PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
+    return false;
+  } else if (IsDexMagic(magic)) {
+    // The file is open for reading, not writing, so it's OK to let the File destructor
+    // close it without checking for explicit Close(), so pass checkUsage = false.
+    raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+    oat_dex_files_.emplace_back(location,
+                                DexFileSource(raw_dex_files_.back().get()),
+                                create_type_lookup_table);
+  } else if (IsZipMagic(magic)) {
+    if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
+      return false;
+    }
+  } else {
+    LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'";
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source(s) from a zip file specified by a file handle.
+bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+                                        const char* location,
+                                        CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  std::string error_msg;
+  zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+  ZipArchive* zip_archive = zip_archives_.back().get();
+  if (zip_archive == nullptr) {
+    LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
+        << error_msg;
+    return false;
+  }
+  for (size_t i = 0; ; ++i) {
+    std::string entry_name = DexFile::GetMultiDexClassesDexName(i);
+    std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg));
+    if (entry == nullptr) {
+      break;
+    }
+    zipped_dex_files_.push_back(std::move(entry));
+    zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+    const char* full_location = zipped_dex_file_locations_.back().c_str();
+    oat_dex_files_.emplace_back(full_location,
+                                DexFileSource(zipped_dex_files_.back().get()),
+                                create_type_lookup_table);
+  }
+  if (zipped_dex_file_locations_.empty()) {
+    LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
+    return false;
+  }
+  return true;
+}
+
+// Add dex file source from raw memory.
+bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
+                                    const char* location,
+                                    uint32_t location_checksum,
+                                    CreateTypeLookupTable create_type_lookup_table) {
+  DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+  if (data.size() < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Provided data is shorter than dex file header. size: "
+               << data.size() << " File: " << location;
+    return false;
+  }
+  if (!ValidateDexFileHeader(data.data(), location)) {
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data());
+  if (data.size() < header->file_size_) {
+    LOG(ERROR) << "Truncated dex file data. Data size: " << data.size()
+               << " file size from header: " << header->file_size_ << " File: " << location;
+    return false;
+  }
+
+  oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
+  oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+  return true;
+}
+
+dchecked_vector<const char*> OatWriter::GetSourceLocations() const {
+  dchecked_vector<const char*> locations;
+  locations.reserve(oat_dex_files_.size());
+  for (const OatDexFile& oat_dex_file : oat_dex_files_) {
+    locations.push_back(oat_dex_file.GetLocation());
+  }
+  return locations;
+}
+
+bool OatWriter::WriteAndOpenDexFiles(
+    OutputStream* rodata,
+    File* file,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features,
+    SafeMap<std::string, std::string>* key_value_store,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  CHECK(write_state_ == WriteState::kAddingDexFileSources);
+
+  size_t offset = InitOatHeader(instruction_set,
+                                instruction_set_features,
+                                dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+                                key_value_store);
+  offset = InitOatDexFiles(offset);
+  size_ = offset;
+
+  std::unique_ptr<MemMap> dex_files_map;
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  if (!WriteDexFiles(rodata, file)) {
+    return false;
+  }
+  // Reserve space for type lookup tables and update type_lookup_table_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveTypeLookupTable(this);
+  }
+  size_t size_after_type_lookup_tables = size_;
+  // Reserve space for class offsets and update class_offsets_offset_.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.ReserveClassOffsets(this);
+  }
+  if (!WriteOatDexFiles(rodata) ||
+      !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
+      !OpenDexFiles(file, &dex_files_map, &dex_files) ||
+      !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
+    return false;
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  write_state_ = WriteState::kPrepareLayout;
+  return true;
+}
+
+void OatWriter::PrepareLayout(const CompilerDriver* compiler,
+                              ImageWriter* image_writer,
+                              const std::vector<const DexFile*>& dex_files) {
+  CHECK(write_state_ == WriteState::kPrepareLayout);
+
+  dex_files_ = &dex_files;
+
+  compiler_driver_ = compiler;
+  image_writer_ = image_writer;
+  if (compiling_boot_image_) {
+    CHECK(image_writer_ != nullptr);
   }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+  CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
                                                       &method_offset_map_);
 
-  size_t offset;
+  uint32_t offset = size_;
   {
-    TimingLogger::ScopedTiming split("InitOatHeader", timings);
-    offset = InitOatHeader();
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
-    offset = InitOatDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitDexFiles", timings);
-    offset = InitDexFiles(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitLookupTables", timings);
-    offset = InitLookupTables(offset);
-  }
-  {
-    TimingLogger::ScopedTiming split("InitOatClasses", timings);
+    TimingLogger::ScopedTiming split("InitOatClasses", timings_);
     offset = InitOatClasses(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatMaps", timings);
+    TimingLogger::ScopedTiming split("InitOatMaps", timings_);
     offset = InitOatMaps(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCode", timings);
+    TimingLogger::ScopedTiming split("InitOatCode", timings_);
     offset = InitOatCode(offset);
   }
   {
-    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_);
     offset = InitOatCodeDexFiles(offset);
   }
   size_ = offset;
@@ -255,7 +477,7 @@
     size_t bss_start = RoundUp(size_, kPageSize);
     size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
     bss_size_ = 0u;
-    for (const DexFile* dex_file : dex_files) {
+    for (const DexFile* dex_file : *dex_files_) {
       dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
       DexCacheArraysLayout layout(pointer_size, dex_file);
       bss_size_ += layout.Size();
@@ -265,9 +487,10 @@
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   if (compiling_boot_image_) {
     CHECK_EQ(image_writer_ != nullptr,
-             key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+             oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr);
   }
-  CHECK_ALIGNED(image_patch_delta_, kPageSize);
+
+  write_state_ = WriteState::kWriteRoData;
 }
 
 OatWriter::~OatWriter() {
@@ -1134,59 +1357,26 @@
   return true;
 }
 
-size_t OatWriter::InitOatHeader() {
-  oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
-                                      compiler_driver_->GetInstructionSetFeatures(),
-                                      dchecked_integral_cast<uint32_t>(dex_files_->size()),
-                                      key_value_store_));
-  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
-  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
-
+size_t OatWriter::InitOatHeader(InstructionSet instruction_set,
+                                const InstructionSetFeatures* instruction_set_features,
+                                uint32_t num_dex_files,
+                                SafeMap<std::string, std::string>* key_value_store) {
+  TimingLogger::ScopedTiming split("InitOatHeader", timings_);
+  oat_header_.reset(OatHeader::Create(instruction_set,
+                                      instruction_set_features,
+                                      num_dex_files,
+                                      key_value_store));
+  size_oat_header_ += sizeof(OatHeader);
+  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
   return oat_header_->GetHeaderSize();
 }
 
 size_t OatWriter::InitOatDexFiles(size_t offset) {
-  // create the OatDexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    const DexFile* dex_file = (*dex_files_)[i];
-    CHECK(dex_file != nullptr);
-    oat_dex_files_.emplace_back(offset, *dex_file);
-    offset += oat_dex_files_.back().SizeOf();
-  }
-  return offset;
-}
-
-size_t OatWriter::InitDexFiles(size_t offset) {
-  // calculate the offsets within OatDexFiles to the DexFiles
-  for (size_t i = 0; i != dex_files_->size(); ++i) {
-    // dex files are required to be 4 byte aligned
-    size_t original_offset = offset;
-    offset = RoundUp(offset, 4);
-    size_dex_file_alignment_ += offset - original_offset;
-
-    // set offset in OatDexFile to DexFile
-    oat_dex_files_[i].dex_file_offset_ = offset;
-
-    const DexFile* dex_file = (*dex_files_)[i];
-
-    // Initialize type lookup table
-    oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
-
-    offset += dex_file->GetHeader().file_size_;
-  }
-  return offset;
-}
-
-size_t OatWriter::InitLookupTables(size_t offset) {
+  TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+  // Initialize offsets of dex files.
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    if (oat_dex_file.lookup_table_ != nullptr) {
-      uint32_t aligned_offset = RoundUp(offset, 4);
-      oat_dex_file.lookup_table_offset_ = aligned_offset;
-      size_oat_lookup_table_alignment_ += aligned_offset - offset;
-      offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
-    } else {
-      oat_dex_file.lookup_table_offset_ = 0;
-    }
+    oat_dex_file.offset_ = offset;
+    offset += oat_dex_file.SizeOf();
   }
   return offset;
 }
@@ -1239,7 +1429,6 @@
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
   if (compiler_driver_->IsBootImage()) {
-    CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field, fn_name) \
@@ -1264,7 +1453,6 @@
     oat_header_->SetQuickImtConflictTrampolineOffset(0);
     oat_header_->SetQuickResolutionTrampolineOffset(0);
     oat_header_->SetQuickToInterpreterBridgeOffset(0);
-    oat_header_->SetImagePatchDelta(image_patch_delta_);
   }
   return offset;
 }
@@ -1289,22 +1477,15 @@
 }
 
 bool OatWriter::WriteRodata(OutputStream* out) {
-  if (!GetOatDataOffset(out)) {
+  CHECK(write_state_ == WriteState::kWriteRoData);
+
+  if (!WriteClassOffsets(out)) {
+    LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
   }
-  const size_t file_offset = oat_data_offset_;
 
-  // Reserve space for header. It will be written last - after updating the checksum.
-  size_t header_size = oat_header_->GetHeaderSize();
-  if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
-    return false;
-  }
-  size_oat_header_ += sizeof(OatHeader);
-  size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
-
-  if (!WriteTables(out, file_offset)) {
-    LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
+  if (!WriteClasses(out)) {
+    LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
     return false;
   }
 
@@ -1313,6 +1494,7 @@
     LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
     return false;
   }
+  size_t file_offset = oat_data_offset_;
   size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
@@ -1332,11 +1514,13 @@
   }
   DCHECK_OFFSET();
 
+  write_state_ = WriteState::kWriteText;
   return true;
 }
 
 bool OatWriter::WriteCode(OutputStream* out) {
-  size_t header_size = oat_header_->GetHeaderSize();
+  CHECK(write_state_ == WriteState::kWriteText);
+
   const size_t file_offset = oat_data_offset_;
   size_t relative_offset = oat_header_->GetExecutableOffset();
   DCHECK_OFFSET();
@@ -1390,10 +1574,12 @@
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
     DO_STAT(size_oat_dex_file_offset_);
+    DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
-    DO_STAT(size_oat_dex_file_class_offsets_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
+    DO_STAT(size_oat_class_offsets_alignment_);
+    DO_STAT(size_oat_class_offsets_);
     DO_STAT(size_oat_class_type_);
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
@@ -1408,88 +1594,90 @@
   CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
 
-  // Finalize the header checksum.
+  write_state_ = WriteState::kWriteHeader;
+  return true;
+}
+
+bool OatWriter::WriteHeader(OutputStream* out,
+                            uint32_t image_file_location_oat_checksum,
+                            uintptr_t image_file_location_oat_begin,
+                            int32_t image_patch_delta) {
+  CHECK(write_state_ == WriteState::kWriteHeader);
+
+  oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum);
+  oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin);
+  if (compiler_driver_->IsBootImage()) {
+    CHECK_EQ(image_patch_delta, 0);
+    CHECK_EQ(oat_header_->GetImagePatchDelta(), 0);
+  } else {
+    CHECK_ALIGNED(image_patch_delta, kPageSize);
+    oat_header_->SetImagePatchDelta(image_patch_delta);
+  }
   oat_header_->UpdateChecksumWithHeaderData();
 
-  // Write the header now that the checksum is final.
+  const size_t file_offset = oat_data_offset_;
+
+  off_t current_offset = out->Seek(0, kSeekCurrent);
+  if (current_offset == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation();
+    return false;
+  }
   if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
   DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
+
+  // Flush all other data before writing the header.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation();
+    return false;
+  }
+  // Write the header.
+  size_t header_size = oat_header_->GetHeaderSize();
   if (!out->WriteFully(oat_header_.get(), header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
   }
-  if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
-    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+  // Flush the header data.
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
 
+  if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent));
+
+  write_state_ = WriteState::kDone;
   return true;
 }
 
-bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    if (!oat_dex_files_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
-      return false;
-    }
-  }
-  for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
-    uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    const DexFile* dex_file = (*dex_files_)[i];
-    if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) {
-      PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation()
-                  << " to " << out->GetLocation();
-      return false;
-    }
-    size_dex_file_ += dex_file->GetHeader().file_size_;
-  }
-  if (!WriteLookupTables(out, file_offset)) {
-    return false;
-  }
-  for (size_t i = 0; i != oat_classes_.size(); ++i) {
-    if (!oat_classes_[i].Write(this, out, file_offset)) {
-      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
-      return false;
-    }
-  }
-  return true;
-}
-
-bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
-  for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
-    const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
-    const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
-    DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
-    if (lookup_table_offset == 0) {
-      continue;
-    }
-    const uint32_t expected_offset = file_offset + lookup_table_offset;
-    off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-    if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-      const DexFile* dex_file = (*dex_files_)[i];
-      PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset
-                  << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
-      return false;
-    }
-    if (table != nullptr) {
-      if (!WriteData(out, table->RawData(), table->RawDataLength())) {
-        const DexFile* dex_file = (*dex_files_)[i];
-        PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
-                    << " to " << out->GetLocation();
+bool OatWriter::WriteClassOffsets(OutputStream* out) {
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (oat_dex_file.class_offsets_offset_ != 0u) {
+      uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
+      off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+      if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+        PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
+                    << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
         return false;
       }
-      size_oat_lookup_table_ += table->RawDataLength();
+      if (!oat_dex_file.WriteClassOffsets(this, out)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool OatWriter::WriteClasses(OutputStream* out) {
+  for (OatClass& oat_class : oat_classes_) {
+    if (!oat_class.Write(this, out, oat_data_offset_)) {
+      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
+      return false;
     }
   }
   return true;
@@ -1585,6 +1773,455 @@
   return true;
 }
 
+bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
+  // Read the dex file header and perform minimal verification.
+  uint8_t raw_header[sizeof(DexFile::Header)];
+  if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
+    PLOG(ERROR) << "Failed to read dex file header. Actual: "
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
+    return false;
+  }
+
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->dex_file_location_checksum_ = header->checksum_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+  if (!DexFile::IsMagicValid(raw_header)) {
+    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+    return false;
+  }
+  if (!DexFile::IsVersionValid(raw_header)) {
+    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  if (header->file_size_ < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+               << " File: " << location;
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) {
+  TimingLogger::ScopedTiming split("WriteDexFiles", timings_);
+
+  // Get the elf file offset of the oat file.
+  if (!GetOatDataOffset(rodata)) {
+    return false;
+  }
+
+  // Write dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    if (!WriteDexFile(rodata, file, &oat_dex_file)) {
+      return false;
+    }
+  }
+
+  // Close sources.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.source_.Clear();  // Get rid of the reference, it's about to be invalidated.
+  }
+  zipped_dex_files_.clear();
+  zip_archives_.clear();
+  raw_dex_files_.clear();
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) {
+  if (!SeekToDexFile(rodata, file, oat_dex_file)) {
+    return false;
+  }
+  if (oat_dex_file->source_.IsZipEntry()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
+      return false;
+    }
+  } else if (oat_dex_file->source_.IsRawFile()) {
+    if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) {
+      return false;
+    }
+  } else {
+    DCHECK(oat_dex_file->source_.IsRawData());
+    if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) {
+      return false;
+    }
+  }
+
+  // Update current size and account for the written data.
+  DCHECK_EQ(size_, oat_dex_file->dex_file_offset_);
+  size_ += oat_dex_file->dex_file_size_;
+  size_dex_file_ += oat_dex_file->dex_file_size_;
+  return true;
+}
+
+bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) {
+  // Dex files are required to be 4 byte aligned.
+  size_t original_offset = size_;
+  size_t offset = RoundUp(original_offset, 4);
+  size_dex_file_alignment_ += offset - original_offset;
+
+  // Seek to the start of the dex file and flush any pending operations in the stream.
+  // Verify that, after flushing the stream, the file is at the same offset as the stream.
+  uint32_t start_offset = oat_data_offset_ + offset;
+  off_t actual_offset = out->Seek(start_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!out->Flush()) {
+    PLOG(ERROR) << "Failed to flush before writing dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  size_ = offset;
+  oat_dex_file->dex_file_offset_ = offset;
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             ZipEntry* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  // Extract the dex file and get the extracted size.
+  std::string error_msg;
+  if (!dex_file->ExtractToFile(*file, &error_msg)) {
+    LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR);
+  if (extracted_end == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (extracted_end < static_cast<off_t>(start_offset)) {
+    LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end
+               << " Start: " << start_offset
+               << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset);
+  if (extracted_size < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: "
+               << extracted_size << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Read the dex file header and extract required data to OatDexFile.
+  off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(file, oat_dex_file)) {
+    return false;
+  }
+  if (extracted_size < oat_dex_file->dex_file_size_) {
+    LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
+               << " file size from header: " << oat_dex_file->dex_file_size_
+               << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Override the checksum from header with the CRC from ZIP entry.
+  oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
+
+  // Seek both file and stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // If we extracted more than the size specified in the header, truncate the file.
+  if (extracted_size > oat_dex_file->dex_file_size_) {
+    if (file->SetLength(end_offset) != 0) {
+      PLOG(ERROR) << "Failed to truncate excessive dex file length."
+                  << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             File* file,
+                             OatDexFile* oat_dex_file,
+                             File* dex_file) {
+  size_t start_offset = oat_data_offset_ + size_;
+  DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+  off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET);
+  if (input_offset != static_cast<off_t>(0)) {
+    PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset
+                << " Expected: 0"
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
+    return false;
+  }
+
+  // Copy the input dex file using sendfile().
+  if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
+    PLOG(ERROR) << "Failed to copy dex file to oat file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  if (file->Flush() != 0) {
+    PLOG(ERROR) << "Failed to flush dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  // Check file position and seek the stream to the end offset.
+  size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+  off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+  actual_offset = rodata->Seek(end_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(end_offset)) {
+    PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+                << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+    return false;
+  }
+
+  return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+                             OatDexFile* oat_dex_file,
+                             const uint8_t* dex_file) {
+  // Note: The raw data has already been checked to contain the header
+  // and all the data that the header specifies as the file size.
+  DCHECK(dex_file != nullptr);
+  DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation()));
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file);
+
+  if (!rodata->WriteFully(dex_file, header->file_size_)) {
+    PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation()
+                << " to " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing dex file."
+                << " File: " << oat_dex_file->GetLocation();
+    return false;
+  }
+
+  // Update dex file size and resize class offsets in the OatDexFile.
+  // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+  oat_dex_file->dex_file_size_ = header->file_size_;
+  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+  return true;
+}
+
+bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
+  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+  // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader.  If there are
+  // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
+  // this Seek() ensures that we reserve the space for OatHeader in .rodata.
+  DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
+  uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
+  off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
+  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+    PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
+                << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
+    return false;
+  }
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+
+    DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
+              static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
+
+    // Write OatDexFile.
+    if (!oat_dex_file->Write(this, rodata)) {
+      PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) {
+  TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_);
+
+  int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset);
+  if (file->SetLength(new_length) != 0) {
+    PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length
+        << "File: " << file->GetPath();
+    return false;
+  }
+  off_t actual_offset = rodata->Seek(new_length, kSeekSet);
+  if (actual_offset != static_cast<off_t>(new_length)) {
+    PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables."
+                << " Actual: " << actual_offset << " Expected: " << new_length
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  if (!rodata->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables."
+                << " File: " << rodata->GetLocation();
+    return false;
+  }
+  return true;
+}
+
+bool OatWriter::OpenDexFiles(
+    File* file,
+    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+  TimingLogger::ScopedTiming split("OpenDexFiles", timings_);
+
+  if (oat_dex_files_.empty()) {
+    // Nothing to do.
+    return true;
+  }
+
+  size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+  size_t length = size_ - map_offset;
+  std::string error_msg;
+  std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length,
+                                                        PROT_READ | PROT_WRITE,
+                                                        MAP_SHARED,
+                                                        file->Fd(),
+                                                        oat_data_offset_ + map_offset,
+                                                        /* low_4gb */ false,
+                                                        file->GetPath().c_str(),
+                                                        &error_msg));
+  if (dex_files_map == nullptr) {
+    LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath()
+               << " error: " << error_msg;
+    return false;
+  }
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    // Make sure no one messed with input files while we were copying data.
+    // At the very least we need consistent file size and number of class definitions.
+    const uint8_t* raw_dex_file =
+        dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
+    if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
+      // Note: ValidateDexFileHeader() already logged an error message.
+      LOG(ERROR) << "Failed to verify written dex file header!"
+          << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
+          << " ~ " << static_cast<const void*>(raw_dex_file);
+      return false;
+    }
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+    if (header->file_size_ != oat_dex_file.dex_file_size_) {
+      LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+          << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+    if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
+      LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
+          << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
+          << " Output: " << file->GetPath();
+      return false;
+    }
+
+    // Now, open the dex file.
+    dex_files.emplace_back(DexFile::Open(raw_dex_file,
+                                         oat_dex_file.dex_file_size_,
+                                         oat_dex_file.GetLocation(),
+                                         oat_dex_file.dex_file_location_checksum_,
+                                         /* oat_dex_file */ nullptr,
+                                         &error_msg));
+    if (dex_files.back() == nullptr) {
+      LOG(ERROR) << "Failed to open dex file from oat file. File:" << oat_dex_file.GetLocation();
+      return false;
+    }
+  }
+
+  *opened_dex_files_map = std::move(dex_files_map);
+  *opened_dex_files = std::move(dex_files);
+  return true;
+}
+
+bool OatWriter::WriteTypeLookupTables(
+    MemMap* opened_dex_files_map,
+    const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+  TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_);
+
+  DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+  for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    if (oat_dex_file->lookup_table_offset_ != 0u) {
+      DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate);
+      DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u);
+      size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+      size_t lookup_table_offset = oat_dex_file->lookup_table_offset_;
+      uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset);
+      opened_dex_files[i]->CreateTypeLookupTable(lookup_table);
+    }
+  }
+
+  DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty());
+  if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) {
+    PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName();
+    return false;
+  }
+
+  return true;
+}
+
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1611,15 +2248,20 @@
   }
 }
 
-OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
-  offset_ = offset;
-  const std::string& location(dex_file.GetLocation());
-  dex_file_location_size_ = location.size();
-  dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
-  dex_file_location_checksum_ = dex_file.GetLocationChecksum();
-  dex_file_offset_ = 0;
-  lookup_table_offset_ = 0;
-  class_offsets_.resize(dex_file.NumClassDefs());
+OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
+                                  DexFileSource source,
+                                  CreateTypeLookupTable create_type_lookup_table)
+    : source_(source),
+      create_type_lookup_table_(create_type_lookup_table),
+      dex_file_size_(0),
+      offset_(0),
+      dex_file_location_size_(strlen(dex_file_location)),
+      dex_file_location_data_(dex_file_location),
+      dex_file_location_checksum_(0u),
+      dex_file_offset_(0u),
+      class_offsets_offset_(0u),
+      lookup_table_offset_(0u),
+      class_offsets_() {
 }
 
 size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1627,24 +2269,54 @@
           + dex_file_location_size_
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
-          + sizeof(lookup_table_offset_)
-          + (sizeof(class_offsets_[0]) * class_offsets_.size());
+          + sizeof(class_offsets_offset_)
+          + sizeof(lookup_table_offset_);
 }
 
-bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
-                                  OutputStream* out,
-                                  const size_t file_offset) const {
+void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) {
+  DCHECK_EQ(lookup_table_offset_, 0u);
+  if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) {
+    size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size());
+    if (table_size != 0u) {
+      // Type tables are required to be 4 byte aligned.
+      size_t original_offset = oat_writer->size_;
+      size_t offset = RoundUp(original_offset, 4);
+      oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset;
+      lookup_table_offset_ = offset;
+      oat_writer->size_ = offset + table_size;
+      oat_writer->size_oat_lookup_table_ += table_size;
+    }
+  }
+}
+
+void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
+  DCHECK_EQ(class_offsets_offset_, 0u);
+  if (!class_offsets_.empty()) {
+    // Class offsets are required to be 4 byte aligned.
+    size_t original_offset = oat_writer->size_;
+    size_t offset = RoundUp(original_offset, 4);
+    oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset;
+    class_offsets_offset_ = offset;
+    oat_writer->size_ = offset + GetClassOffsetsRawSize();
+  }
+}
+
+bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
+  const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
+
   if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
+
   if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
+
   if (!oat_writer->WriteData(out,
                              &dex_file_location_checksum_,
                              sizeof(dex_file_location_checksum_))) {
@@ -1652,21 +2324,35 @@
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
+
   if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
+
+  if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+    PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
+
   if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+
+  return true;
+}
+
+bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
   if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
-    PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
+    PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
+                << " to " << out->GetLocation();
     return false;
   }
-  oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
+  oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize();
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5feb5fc..d681998 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -27,7 +27,9 @@
 #include "method_reference.h"
 #include "mirror/class.h"
 #include "oat.h"
+#include "os.h"
 #include "safe_map.h"
+#include "ScopedFd.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -39,6 +41,7 @@
 class OutputStream;
 class TimingLogger;
 class TypeLookupTable;
+class ZipEntry;
 
 namespace dwarf {
 struct MethodDebugInfo;
@@ -61,6 +64,11 @@
 // ...
 // TypeLookupTable[D]
 //
+// ClassOffsets[0]   one table of OatClass offsets for each class def for each OatDexFile.
+// ClassOffsets[1]
+// ...
+// ClassOffsets[D]
+//
 // OatClass[0]       one variable sized OatClass for each of C DexFile::ClassDefs
 // OatClass[1]       contains OatClass entries with class status, offsets to code, etc.
 // ...
@@ -93,15 +101,65 @@
 //
 class OatWriter {
  public:
-  OatWriter(const std::vector<const DexFile*>& dex_files,
-            uint32_t image_file_location_oat_checksum,
-            uintptr_t image_file_location_oat_begin,
-            int32_t image_patch_delta,
-            const CompilerDriver* compiler,
-            ImageWriter* image_writer,
-            bool compiling_boot_image,
-            TimingLogger* timings,
-            SafeMap<std::string, std::string>* key_value_store);
+  enum class CreateTypeLookupTable {
+    kCreate,
+    kDontCreate,
+    kDefault = kCreate
+  };
+
+  OatWriter(bool compiling_boot_image, TimingLogger* timings);
+
+  // To produce a valid oat file, the user must first add sources with any combination of
+  //   - AddDexFileSource(),
+  //   - AddZippedDexFilesSource(),
+  //   - AddRawDexFileSource().
+  // Then the user must call in order
+  //   - WriteAndOpenDexFiles()
+  //   - PrepareLayout(),
+  //   - WriteRodata(),
+  //   - WriteCode(),
+  //   - WriteHeader().
+
+  // Add dex file source(s) from a file, either a plain dex file or
+  // a zip file with one or more dex files.
+  bool AddDexFileSource(
+      const char* filename,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source(s) from a zip file specified by a file handle.
+  bool AddZippedDexFilesSource(
+      ScopedFd&& zip_fd,
+      const char* location,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  // Add dex file source from raw memory.
+  bool AddRawDexFileSource(
+      const ArrayRef<const uint8_t>& data,
+      const char* location,
+      uint32_t location_checksum,
+      CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+  dchecked_vector<const char*> GetSourceLocations() const;
+
+  // Write raw dex files to the .rodata section and open them from the oat file.
+  bool WriteAndOpenDexFiles(OutputStream* rodata,
+                            File* file,
+                            InstructionSet instruction_set,
+                            const InstructionSetFeatures* instruction_set_features,
+                            SafeMap<std::string, std::string>* key_value_store,
+                            /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                            /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  // Prepare layout of remaining data.
+  void PrepareLayout(const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
+                     const std::vector<const DexFile*>& dex_files);
+  // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
+  bool WriteRodata(OutputStream* out);
+  // Write the code to the .text section.
+  bool WriteCode(OutputStream* out);
+  // Write the oat header. This finalizes the oat file.
+  bool WriteHeader(OutputStream* out,
+                   uint32_t image_file_location_oat_checksum,
+                   uintptr_t image_file_location_oat_begin,
+                   int32_t image_patch_delta);
 
   // Returns whether the oat file has an associated image.
   bool HasImage() const {
@@ -130,9 +188,6 @@
     return ArrayRef<const uintptr_t>(absolute_patch_locations_);
   }
 
-  bool WriteRodata(OutputStream* out);
-  bool WriteCode(OutputStream* out);
-
   ~OatWriter();
 
   ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
@@ -144,6 +199,7 @@
   }
 
  private:
+  class DexFileSource;
   class OatClass;
   class OatDexFile;
 
@@ -174,29 +230,65 @@
   // with a given DexMethodVisitor.
   bool VisitDexMethods(DexMethodVisitor* visitor);
 
-  size_t InitOatHeader();
+  size_t InitOatHeader(InstructionSet instruction_set,
+                       const InstructionSetFeatures* instruction_set_features,
+                       uint32_t num_dex_files,
+                       SafeMap<std::string, std::string>* key_value_store);
   size_t InitOatDexFiles(size_t offset);
-  size_t InitLookupTables(size_t offset);
-  size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
   size_t InitOatCode(size_t offset);
   size_t InitOatCodeDexFiles(size_t offset);
 
-  bool WriteTables(OutputStream* out, const size_t file_offset);
-  bool WriteLookupTables(OutputStream* out, const size_t file_offset);
+  bool WriteClassOffsets(OutputStream* out);
+  bool WriteClasses(OutputStream* out);
   size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
   size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
 
   bool GetOatDataOffset(OutputStream* out);
+  bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file);
+  bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
+  bool WriteDexFiles(OutputStream* rodata, File* file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file);
+  bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file);
+  bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file);
+  bool WriteOatDexFiles(OutputStream* rodata);
+  bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset);
+  bool OpenDexFiles(File* file,
+                    /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+                    /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
+                             const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
   bool WriteData(OutputStream* out, const void* data, size_t size);
 
+  enum class WriteState {
+    kAddingDexFileSources,
+    kPrepareLayout,
+    kWriteRoData,
+    kWriteText,
+    kWriteHeader,
+    kDone
+  };
+
+  WriteState write_state_;
+  TimingLogger* timings_;
+
+  std::vector<std::unique_ptr<File>> raw_dex_files_;
+  std::vector<std::unique_ptr<ZipArchive>> zip_archives_;
+  std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_;
+
+  // Using std::list<> which doesn't move elements around on push/emplace_back().
+  // We need this because we keep plain pointers to the strings' c_str().
+  std::list<std::string> zipped_dex_file_locations_;
+
   dchecked_vector<dwarf::MethodDebugInfo> method_info_;
 
-  const CompilerDriver* const compiler_driver_;
-  ImageWriter* const image_writer_;
+  const CompilerDriver* compiler_driver_;
+  ImageWriter* image_writer_;
   const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
@@ -215,13 +307,7 @@
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
-  // dependencies on the image.
-  uint32_t image_file_location_oat_checksum_;
-  uintptr_t image_file_location_oat_begin_;
-  int32_t image_patch_delta_;
-
   // data to write
-  SafeMap<std::string, std::string>* key_value_store_;
   std::unique_ptr<OatHeader> oat_header_;
   dchecked_vector<OatDexFile> oat_dex_files_;
   dchecked_vector<OatClass> oat_classes_;
@@ -257,10 +343,12 @@
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
   uint32_t size_oat_dex_file_offset_;
+  uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
-  uint32_t size_oat_dex_file_class_offsets_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
+  uint32_t size_oat_class_offsets_alignment_;
+  uint32_t size_oat_class_offsets_;
   uint32_t size_oat_class_type_;
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
@@ -269,7 +357,7 @@
   std::unique_ptr<linker::RelativePatcher> relative_patcher_;
 
   // The locations of absolute patches relative to the start of the executable section.
-  std::vector<uintptr_t> absolute_patch_locations_;
+  dchecked_vector<uintptr_t> absolute_patch_locations_;
 
   // Map method reference to assigned offset.
   // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2bf8404..f265a0c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -5483,7 +5483,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5693,8 +5693,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -5980,6 +5980,7 @@
           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
       codegen_->AddSlowPath(slow_path);
 
+      // IP = Thread::Current()->GetIsGcMarking()
       __ LoadFromOffset(
           kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
       __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6058,11 +6059,8 @@
   //   }
   //
   // Note: the original implementation in ReadBarrier::Barrier is
-  // slightly more complex as:
-  // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
-  // - it performs additional checks that we do not do here for
-  //   performance reasons.
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
 
   Register ref_reg = ref.AsRegister<Register>();
   Register temp_reg = temp.AsRegister<Register>();
@@ -6451,6 +6449,33 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
+void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations = instr->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
+  Register accumulator = locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex)
+                                      .AsRegister<Register>();
+  Register mul_left = locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex)
+                                  .AsRegister<Register>();
+  Register mul_right = locations->InAt(HMultiplyAccumulate::kInputMulRightIndex)
+                                    .AsRegister<Register>();
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ mla(res, mul_left, mul_right, accumulator);
+  } else {
+    __ mls(res, mul_left, mul_right, accumulator);
+  }
+}
+
 void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1183dda..df2126c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,6 +159,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -197,6 +198,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -442,7 +444,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -450,7 +452,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2cb2741..9449007 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -584,6 +584,56 @@
   }
 }
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
  public:
@@ -605,7 +655,7 @@
     // to be instrumented, e.g.:
     //
     //   __ Ldr(out, HeapOperand(out, class_offset);
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -621,7 +671,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
     // The read barrier instrumentation does not support the
     // HArm64IntermediateAddress instruction yet.
     DCHECK(!(instruction_->IsArrayGet() &&
@@ -769,14 +821,18 @@
 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1338,7 +1394,8 @@
 
 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
                                      CPURegister dst,
-                                     const MemOperand& src) {
+                                     const MemOperand& src,
+                                     bool needs_null_check) {
   MacroAssembler* masm = GetVIXLAssembler();
   BlockPoolsScope block_pools(masm);
   UseScratchRegisterScope temps(masm);
@@ -1354,20 +1411,28 @@
   switch (type) {
     case Primitive::kPrimBoolean:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimByte:
       __ Ldarb(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimChar:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimShort:
       __ Ldarh(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimInt:
@@ -1375,7 +1440,9 @@
     case Primitive::kPrimLong:
       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
       __ Ldar(Register(dst), base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
@@ -1384,7 +1451,9 @@
 
       Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
       __ Ldar(temp, base);
-      MaybeRecordImplicitNullCheck(instruction);
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
+      }
       __ Fmov(FPRegister(dst), temp);
       break;
     }
@@ -1505,7 +1574,7 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
   BarrierType type = BarrierAll;
 
   switch (kind) {
@@ -1641,33 +1710,62 @@
 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
                                                    const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  LocationSummary* locations = instruction->GetLocations();
+  Location base_loc = locations->InAt(0);
+  Location out = locations->Out();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
   bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  if (field_info.IsVolatile()) {
-    if (use_acquire_release) {
-      // NB: LoadAcquire will record the pc info if needed.
-      codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+  if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object FieldGet with Baker's read barrier case.
+    MacroAssembler* masm = GetVIXLAssembler();
+    UseScratchRegisterScope temps(masm);
+    // /* HeapReference<Object> */ out = *(base + offset)
+    Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
+    Register temp = temps.AcquireW();
+    // Note that potential implicit null checks are handled in this
+    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
+    codegen_->GenerateFieldLoadWithBakerReadBarrier(
+        instruction,
+        out,
+        base,
+        offset,
+        temp,
+        /* needs_null_check */ true,
+        field_info.IsVolatile() && use_acquire_release);
+    if (field_info.IsVolatile() && !use_acquire_release) {
+      // For IRIW sequential consistency kLoadAny is not sufficient.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    // General case.
+    if (field_info.IsVolatile()) {
+      if (use_acquire_release) {
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorARM64::LoadAcquire call.
+        // NB: LoadAcquire will record the pc info if needed.
+        codegen_->LoadAcquire(
+            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
+      } else {
+        codegen_->Load(field_type, OutputCPURegister(instruction), field);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        // For IRIW sequential consistency kLoadAny is not sufficient.
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+      }
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
-  } else {
-    codegen_->Load(field_type, OutputCPURegister(instruction), field);
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    LocationSummary* locations = instruction->GetLocations();
-    Location base = locations->InAt(0);
-    Location out = locations->Out();
-    uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // If read barriers are enabled, emit read barriers other than
+      // Baker's using a slow path (and also unpoison the loaded
+      // reference, if heap poisoning is enabled).
+      codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+    }
   }
 }
 
@@ -1713,10 +1811,10 @@
         codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
-        GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
         codegen_->Store(field_type, source, HeapOperand(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
       }
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
@@ -1952,21 +2050,27 @@
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
-                     Location::RequiresRegister());
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
-  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+  if (instr->GetOpKind() == HInstruction::kSub &&
+      accumulator->IsConstant() &&
+      accumulator->AsConstant()->IsZero()) {
+    // Don't allocate register for Mneg instruction.
+  } else {
+    locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                       Location::RequiresRegister());
+  }
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
   Register res = OutputRegister(instr);
-  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
-  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
-  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+  Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
 
   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
   // This fixup should be carried out for all multiply-accumulate instructions:
@@ -1986,10 +2090,18 @@
   }
 
   if (instr->GetOpKind() == HInstruction::kAdd) {
+    Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
     __ Madd(res, mul_left, mul_right, accumulator);
   } else {
     DCHECK(instr->GetOpKind() == HInstruction::kSub);
-    __ Msub(res, mul_left, mul_right, accumulator);
+    HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
+    if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) {
+      __ Mneg(res, mul_left, mul_right);
+    } else {
+      Register accumulator = InputRegisterAt(instr,
+                                             HMultiplyAccumulate::kInputAccumulatorIndex);
+      __ Msub(res, mul_left, mul_right, accumulator);
+    }
   }
 }
 
@@ -2021,50 +2133,62 @@
   LocationSummary* locations = instruction->GetLocations();
   Location index = locations->InAt(1);
   uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
-  MemOperand source = HeapOperand(obj);
-  CPURegister dest = OutputCPURegister(instruction);
+  Location out = locations->Out();
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
-  if (index.IsConstant()) {
-    offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-    source = HeapOperand(obj, offset);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Object ArrayGet with Baker's read barrier case.
+    Register temp = temps.AcquireW();
+    // The read barrier instrumentation does not support the
+    // HArm64IntermediateAddress instruction yet.
+    DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+    // Note that a potential implicit null check is handled in the
+    // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    codegen_->GenerateArrayLoadWithBakerReadBarrier(
+        instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
   } else {
-    Register temp = temps.AcquireSameSizeAs(obj);
-    if (instruction->GetArray()->IsArm64IntermediateAddress()) {
-      // The read barrier instrumentation does not support the
-      // HArm64IntermediateAddress instruction yet.
-      DCHECK(!kEmitCompilerReadBarrier);
-      // We do not need to compute the intermediate address from the array: the
-      // input instruction has done it already. See the comment in
-      // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
-      if (kIsDebugBuild) {
-        HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
-        DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
-      }
-      temp = obj;
-    } else {
-      __ Add(temp, obj, offset);
-    }
-    source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
-  }
-
-  codegen_->Load(type, dest, source);
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    Location obj_loc = locations->InAt(0);
-    Location out = locations->Out();
+    // General case.
+    MemOperand source = HeapOperand(obj);
     if (index.IsConstant()) {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+      source = HeapOperand(obj, offset);
     } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+      Register temp = temps.AcquireSameSizeAs(obj);
+      if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+        // The read barrier instrumentation does not support the
+        // HArm64IntermediateAddress instruction yet.
+        DCHECK(!kEmitCompilerReadBarrier);
+        // We do not need to compute the intermediate address from the array: the
+        // input instruction has done it already. See the comment in
+        // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+        if (kIsDebugBuild) {
+          HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+          DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
+        }
+        temp = obj;
+      } else {
+        __ Add(temp, obj, offset);
+      }
+      source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+    }
+
+    codegen_->Load(type, OutputCPURegister(instruction), source);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+    if (type == Primitive::kPrimNot) {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      Location obj_loc = locations->InAt(0);
+      if (index.IsConstant()) {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
+      } else {
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
+      }
     }
   }
 }
@@ -2194,12 +2318,12 @@
           //   __ Mov(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ Ldr(temp, HeapOperand(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = value->klass_
           //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
           //
           //   __ Cmp(temp, temp2);
@@ -2820,7 +2944,8 @@
         non_fallthrough_target = true_target;
       }
 
-      if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
+          rhs.IsImmediate() && (rhs.immediate() == 0)) {
         switch (arm64_cond) {
           case eq:
             __ Cbz(lhs, non_fallthrough_target);
@@ -2921,6 +3046,14 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -2947,21 +3080,22 @@
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -2977,10 +3111,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ Ldr(out, HeapOperand(obj.W(), class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       __ Cmp(out, cls);
       __ Cset(out, eq);
@@ -2995,17 +3128,8 @@
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, success;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ Ldr(out, HeapOperand(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -3023,17 +3147,8 @@
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ Ldr(out, HeapOperand(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -3051,17 +3166,8 @@
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = WRegisterFrom(temp_loc);
-        __ Mov(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ Ldr(out, HeapOperand(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -3100,6 +3206,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
                                                                       /* is_fatal */ false);
@@ -3152,30 +3265,29 @@
   locations->SetInAt(1, Location::RequiresRegister());
   // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
-     locations->AddTemp(Location::RequiresRegister());
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -3194,8 +3306,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ Ldr(temp, HeapOperand(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -3212,18 +3323,8 @@
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -3235,8 +3336,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -3252,18 +3352,8 @@
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ Ldr(temp, HeapOperand(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -3274,8 +3364,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -3287,19 +3376,8 @@
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = WRegisterFrom(temp2_loc);
-        __ Mov(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ Ldr(temp, HeapOperand(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -3312,8 +3390,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -3322,8 +3399,7 @@
       __ Cbz(temp, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -3340,6 +3416,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -3465,7 +3548,7 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  // On arm64 we support all dispatch types.
+  // On ARM64 we support all dispatch types.
   return desired_dispatch_info;
 }
 
@@ -3742,32 +3825,17 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ Add(out.X(), current_method.X(), declaring_class_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ Ldr(out, MemOperand(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ Add(out.X(), out.X(), cache_offset);
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ Ldr(out, MemOperand(out.X(), cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(
+        cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -3830,30 +3898,14 @@
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ Add(out.X(), current_method.X(), declaring_class_offset);
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ Ldr(out, MemOperand(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ Add(out.X(), out.X(), cache_offset);
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ Ldr(out, MemOperand(out.X(), cache_offset));
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
@@ -4222,7 +4274,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
@@ -4607,14 +4659,288 @@
   }
 }
 
-void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
-                                             Location out,
-                                             Location ref,
-                                             Location obj,
-                                             uint32_t offset,
-                                             Location index) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                     Location out,
+                                                                     uint32_t offset,
+                                                                     Location temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  Register temp_reg = RegisterFrom(temp, type);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mov(temp_reg, out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ Ldr(out_reg, HeapOperand(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ Ldr(out_reg, HeapOperand(out_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                      Location out,
+                                                                      Location obj,
+                                                                      uint32_t offset,
+                                                                      Location temp) {
+  Primitive::Type type = Primitive::kPrimNot;
+  Register out_reg = RegisterFrom(out, type);
+  Register obj_reg = RegisterFrom(obj, type);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      Register temp_reg = RegisterFrom(temp, type);
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      temp_reg,
+                                                      /* needs_null_check */ false,
+                                                      /* use_load_acquire */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                            Location root,
+                                                            vixl::Register obj,
+                                                            uint32_t offset) {
+  Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ Ldr(root_reg, MemOperand(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      MacroAssembler* masm = GetVIXLAssembler();
+      UseScratchRegisterScope temps(masm);
+      Register temp = temps.AcquireW();
+      // temp = Thread::Current()->GetIsGcMarking()
+      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value()));
+      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Add(root_reg.X(), obj.X(), offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ Ldr(root_reg, MemOperand(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               vixl::Register obj,
+                                                               uint32_t offset,
+                                                               Register temp,
+                                                               bool needs_null_check,
+                                                               bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                               Location ref,
+                                                               vixl::Register obj,
+                                                               uint32_t data_offset,
+                                                               Location index,
+                                                               Register temp,
+                                                               bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Array cells are never volatile variables, therefore array loads
+  // never use Load-Acquire instructions on ARM64.
+  const bool use_load_acquire = false;
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                   Location ref,
+                                                                   vixl::Register obj,
+                                                                   uint32_t offset,
+                                                                   Location index,
+                                                                   Register temp,
+                                                                   bool needs_null_check,
+                                                                   bool use_load_acquire) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+  // If `index` is a valid location, then we are emitting an array
+  // load, so we shouldn't be using a Load Acquire instruction.
+  // In other words: `index.IsValid()` => `!use_load_acquire`.
+  DCHECK(!index.IsValid() || !use_load_acquire);
+
+  MacroAssembler* masm = GetVIXLAssembler();
+  UseScratchRegisterScope temps(masm);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Primitive::Type type = Primitive::kPrimNot;
+  Register ref_reg = RegisterFrom(ref, type);
+  DCHECK(obj.IsW());
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ Ldr(temp, HeapOperand(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
+  __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Introduce a dependency on the high bits of rb_state, which shall
+  // be all zeroes, to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
+  Register temp2 = temps.AcquireW();
+  __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
+  // obj is unchanged by this operation, but its value now depends on
+  // temp2, which depends on temp.
+  __ Add(obj, obj, Operand(temp2));
+  temps.Release(temp2);
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    temp2 = temps.AcquireW();
+    // /* HeapReference<Object> */ ref =
+    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    MemOperand source = HeapOperand(obj);
+    if (index.IsConstant()) {
+      uint32_t computed_offset =
+          offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type));
+      source = HeapOperand(obj, computed_offset);
+    } else {
+      __ Add(temp2, obj, offset);
+      source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+    }
+    Load(type, ref_reg, source);
+    temps.Release(temp2);
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    MemOperand field = HeapOperand(obj, offset);
+    if (use_load_acquire) {
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+    } else {
+      Load(type, ref_reg, field);
+    }
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ Cmp(temp, ReadBarrier::gray_ptr_);
+  __ B(eq, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                 Location out,
+                                                 Location ref,
+                                                 Location obj,
+                                                 uint32_t offset,
+                                                 Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -4628,57 +4954,41 @@
       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                  Location out,
-                                                  Location ref,
-                                                  Location obj,
-                                                  uint32_t offset,
-                                                  Location index) {
+void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location ref,
+                                                      Location obj,
+                                                      uint32_t offset,
+                                                      Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
   }
 }
 
-void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                    Location out,
-                                                    Location root) {
+void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                        Location out,
+                                                        Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCodeARM64* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ B(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8eb9fcc..f860082 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -195,6 +195,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -208,14 +209,47 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleCondition(HCondition* instruction);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::Register obj,
+                               uint32_t offset);
+
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -245,6 +279,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -337,6 +372,8 @@
   // Emit a write barrier.
   void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Register allocation.
 
   void SetupBlockedRegisters() const OVERRIDE;
@@ -386,9 +423,12 @@
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
-  void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
-  void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
+  void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+  void LoadAcquire(HInstruction* instruction,
+                   vixl::CPURegister dst,
+                   const vixl::MemOperand& src,
+                   bool needs_null_check);
+  void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -423,7 +463,27 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::Register obj,
+                                             uint32_t offset,
+                                             vixl::Register temp,
+                                             bool needs_null_check,
+                                             bool use_load_acquire);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             vixl::Register temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -440,23 +500,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -466,9 +528,20 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 vixl::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
+
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
                                           vixl::Literal<uint64_t>*,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f95cb30..f7ccdd8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6009,7 +6009,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -6240,8 +6240,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6520,6 +6520,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6582,7 +6584,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 34f983f..43e9543 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -451,7 +451,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t offset,
                                              Location temp,
@@ -459,7 +459,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              Register obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 31f3660..2ce2d91 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4663,13 +4663,13 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool object_array_set_with_read_barrier =
       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      (may_need_runtime_call || object_array_set_with_read_barrier) ?
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
           LocationSummary::kCallOnSlowPath :
           LocationSummary::kNoCall);
 
@@ -4698,7 +4698,7 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4750,7 +4750,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4759,7 +4759,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4837,7 +4837,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -5626,7 +5626,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -5857,8 +5857,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6120,6 +6120,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6182,7 +6184,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+  //   here, which is a no-op thanks to the x86-64 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 9de9d9e..82aabb0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -400,7 +400,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t offset,
                                              Location temp,
@@ -408,7 +408,7 @@
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference array load when Baker's read barriers are used.
   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                             Location out,
+                                             Location ref,
                                              CpuRegister obj,
                                              uint32_t data_offset,
                                              Location index,
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 86a695b..e170e37 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -89,15 +89,18 @@
 }
 
 void HDeadCodeElimination::RemoveDeadBlocks() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not eliminate dead blocks if the graph has irreducible loops. We could
+    // support it, but that would require changes in our loop representation to handle
+    // multiple entry points. We decided it was not worth the complexity.
+    return;
+  }
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
 
   MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
-  // If the graph has irreducible loops we need to reset all graph analysis we have done
-  // before: the irreducible loop can be turned into a reducible one.
-  // For simplicity, we do the full computation regardless of the type of the loops.
   bool rerun_dominance_and_loop_analysis = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
@@ -105,9 +108,6 @@
   // inside out.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
-    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
-      rerun_dominance_and_loop_analysis = true;
-    }
     int id = block->GetBlockId();
     if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 9439ba0..3113677 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -484,6 +484,18 @@
         loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
+  if (loop_information->GetSuspendCheck() == nullptr) {
+    AddError(StringPrintf(
+        "Loop with header %d does not have a suspend check.",
+        loop_header->GetBlockId()));
+  }
+
+  if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+    AddError(StringPrintf(
+        "Loop header %d does not have the loop suspend check as the first instruction.",
+        loop_header->GetBlockId()));
+  }
+
   // Ensure the loop header has only one incoming branch and the remaining
   // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().size();
@@ -589,6 +601,14 @@
     }
   }
 
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
   for (HEnvironment* environment = instruction->GetEnvironment();
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 32c3a92..3c88ba7 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -426,6 +426,12 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+  void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_arm64
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
@@ -433,10 +439,6 @@
       StartAttributeStream("shift") << instruction->GetShiftAmount();
     }
   }
-
-  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
-    StartAttributeStream("kind") << instruction->GetOpKind();
-  }
 #endif
 
   bool IsPass(const char* name) {
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
new file mode 100644
index 0000000..db1f9a7
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_arm.h"
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+namespace arm {
+
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+  if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+    RecordSimplification();
+  }
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
new file mode 100644
index 0000000..379b95d
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class InstructionSimplifierArmVisitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
+
+ private:
+  void RecordSimplification() {
+    if (stats_ != nullptr) {
+      stats_->RecordStat(kInstructionSimplificationsArch);
+    }
+  }
+
+  void VisitMul(HMul* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
+};
+
+
+class InstructionSimplifierArm : public HOptimization {
+ public:
+  InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
+    : HOptimization(graph, "instruction_simplifier_arm", stats) {}
+
+  void Run() OVERRIDE {
+    InstructionSimplifierArmVisitor visitor(graph_, stats_);
+    visitor.VisitReversePostOrder();
+  }
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 4bcfc54..83126a5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -17,6 +17,7 @@
 #include "instruction_simplifier_arm64.h"
 
 #include "common_arm64.h"
+#include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
 
 namespace art {
@@ -179,67 +180,6 @@
   return true;
 }
 
-bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
-    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
-  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
-  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
-  DCHECK_NE(input_binop, input_other);
-  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
-    return false;
-  }
-
-  // Try to interpret patterns like
-  //    a * (b <+/-> 1)
-  // as
-  //    (a * b) <+/-> a
-  HInstruction* input_a = input_other;
-  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
-  HInstruction::InstructionKind op_kind;
-
-  if (input_binop->IsAdd()) {
-    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
-      // Interpret
-      //    a * (b + 1)
-      // as
-      //    (a * b) + a
-      input_b = input_binop->GetLeastConstantLeft();
-      op_kind = HInstruction::kAdd;
-    }
-  } else {
-    DCHECK(input_binop->IsSub());
-    if (input_binop->GetRight()->IsConstant() &&
-        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
-      // Interpret
-      //    a * (b - (-1))
-      // as
-      //    a + (a * b)
-      input_b = input_binop->GetLeft();
-      op_kind = HInstruction::kAdd;
-    } else if (input_binop->GetLeft()->IsConstant() &&
-               input_binop->GetLeft()->AsConstant()->IsOne()) {
-      // Interpret
-      //    a * (1 - b)
-      // as
-      //    a - (a * b)
-      input_b = input_binop->GetRight();
-      op_kind = HInstruction::kSub;
-    }
-  }
-
-  if (input_b == nullptr) {
-    // We did not find a pattern we can optimize.
-    return false;
-  }
-
-  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
-      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
-
-  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
-  input_binop->GetBlock()->RemoveInstruction(input_binop);
-
-  return false;
-}
-
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -255,75 +195,8 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
-  Primitive::Type type = instruction->GetType();
-  if (!Primitive::IsIntOrLongType(type)) {
-    return;
-  }
-
-  HInstruction* use = instruction->HasNonEnvironmentUses()
-      ? instruction->GetUses().GetFirst()->GetUser()
-      : nullptr;
-
-  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
-    // Replace code looking like
-    //    MUL tmp, x, y
-    //    SUB dst, acc, tmp
-    // with
-    //    MULSUB dst, acc, x, y
-    // Note that we do not want to (unconditionally) perform the merge when the
-    // multiplication has multiple uses and it can be merged in all of them.
-    // Multiple uses could happen on the same control-flow path, and we would
-    // then increase the amount of work. In the future we could try to evaluate
-    // whether all uses are on different control-flow paths (using dominance and
-    // reverse-dominance information) and only perform the merge when they are.
-    HInstruction* accumulator = nullptr;
-    HBinaryOperation* binop = use->AsBinaryOperation();
-    HInstruction* binop_left = binop->GetLeft();
-    HInstruction* binop_right = binop->GetRight();
-    // Be careful after GVN. This should not happen since the `HMul` has only
-    // one use.
-    DCHECK_NE(binop_left, binop_right);
-    if (binop_right == instruction) {
-      accumulator = binop_left;
-    } else if (use->IsAdd()) {
-      DCHECK_EQ(binop_left, instruction);
-      accumulator = binop_right;
-    }
-
-    if (accumulator != nullptr) {
-      HArm64MultiplyAccumulate* mulacc =
-          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
-                                                                binop->GetKind(),
-                                                                accumulator,
-                                                                instruction->GetLeft(),
-                                                                instruction->GetRight());
-
-      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-      DCHECK(!instruction->HasUses());
-      instruction->GetBlock()->RemoveInstruction(instruction);
-      RecordSimplification();
-      return;
-    }
-  }
-
-  // Use multiply accumulate instruction for a few simple patterns.
-  // We prefer not applying the following transformations if the left and
-  // right inputs perform the same operation.
-  // We rely on GVN having squashed the inputs if appropriate. However the
-  // results are still correct even if that did not happen.
-  if (instruction->GetLeft() == instruction->GetRight()) {
-    return;
-  }
-
-  HInstruction* left = instruction->GetLeft();
-  HInstruction* right = instruction->GetRight();
-  if ((right->IsAdd() || right->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
-    return;
-  }
-  if ((left->IsAdd() || left->IsSub()) &&
-      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
-    return;
+  if (TryCombineMultiplyAccumulate(instruction, kArm64)) {
+    RecordSimplification();
   }
 }
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index b7f490b..37a34c0 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,10 +51,6 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
-  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
-                                           HBinaryOperation* input_binop,
-                                           HInstruction* input_other);
-
   // HInstruction visitors, sorted alphabetically.
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
new file mode 100644
index 0000000..45d196f
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_shared.h"
+
+namespace art {
+
+namespace {
+
+bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                         HBinaryOperation* input_binop,
+                                         HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+  HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return true;
+}
+
+}  // namespace
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
+  Primitive::Type type = mul->GetType();
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      if (type != Primitive::kPrimInt) {
+        return false;
+      }
+      break;
+    case kArm64:
+      if (!Primitive::IsIntOrLongType(type)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  HInstruction* use = mul->HasNonEnvironmentUses()
+      ? mul->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    if (use->IsAdd() || use->IsSub()) {
+      // Replace code looking like
+      //    MUL tmp, x, y
+      //    SUB dst, acc, tmp
+      // with
+      //    MULSUB dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HBinaryOperation* binop = use->AsBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // Be careful after GVN. This should not happen since the `HMul` has only
+      // one use.
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      if (accumulator != nullptr) {
+        HMultiplyAccumulate* mulacc =
+            new (arena) HMultiplyAccumulate(type,
+                                            binop->GetKind(),
+                                            accumulator,
+                                            mul->GetLeft(),
+                                            mul->GetRight());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    } else if (use->IsNeg() && isa != kArm) {
+      HMultiplyAccumulate* mulacc =
+          new (arena) HMultiplyAccumulate(type,
+                                          HInstruction::kSub,
+                                          mul->GetBlock()->GetGraph()->GetConstant(type, 0),
+                                          mul->GetLeft(),
+                                          mul->GetRight());
+
+      use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
+      DCHECK(!mul->HasUses());
+      mul->GetBlock()->RemoveInstruction(mul);
+      return true;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (mul->GetLeft() == mul->GetRight()) {
+    return false;
+  }
+
+  HInstruction* left = mul->GetLeft();
+  HInstruction* right = mul->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
+    return true;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
+    return true;
+  }
+  return false;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
new file mode 100644
index 0000000..9832ecc
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+
+bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86..e857f6f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -752,21 +752,33 @@
   Register trg = RegisterFrom(trg_loc, type);
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  MemOperand mem_op(base.X(), offset);
-  if (is_volatile) {
-    if (use_acquire_release) {
-      codegen->LoadAcquire(invoke, trg, mem_op);
-    } else {
-      codegen->Load(type, trg, mem_op);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
+    UseScratchRegisterScope temps(masm);
+    Register temp = temps.AcquireW();
+    codegen->GenerateArrayLoadWithBakerReadBarrier(
+        invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+    if (is_volatile && !use_acquire_release) {
       __ Dmb(InnerShareable, BarrierReads);
     }
   } else {
-    codegen->Load(type, trg, mem_op);
-  }
+    // Other cases.
+    MemOperand mem_op(base.X(), offset);
+    if (is_volatile) {
+      if (use_acquire_release) {
+        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
+      } else {
+        codegen->Load(type, trg, mem_op);
+        __ Dmb(InnerShareable, BarrierReads);
+      }
+    } else {
+      codegen->Load(type, trg, mem_op);
+    }
 
-  if (type == Primitive::kPrimNot) {
-    DCHECK(trg.IsW());
-    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    if (type == Primitive::kPrimNot) {
+      DCHECK(trg.IsW());
+      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+    }
   }
 }
 
@@ -1026,10 +1038,15 @@
   vixl::Label loop_head, exit_loop;
   if (use_acquire_release) {
     __ Bind(&loop_head);
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+    // the reference stored in the object before attempting the CAS,
+    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+    // implementation.
+    //
     // Note that this code is not (yet) used when read barriers are
     // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 854d92a..2eabadf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -167,11 +167,7 @@
 void HGraph::ClearLoopInformation() {
   SetHasIrreducibleLoops(false);
   for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
-    if (current->IsLoopHeader()) {
-      current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck());
-    }
-    current->SetLoopInformation(nullptr);
+    it.Current()->SetLoopInformation(nullptr);
   }
 }
 
@@ -180,6 +176,14 @@
   dominator_ = nullptr;
 }
 
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+  HInstruction* instruction = GetFirstInstruction();
+  while (instruction->IsParallelMove()) {
+    instruction = instruction->GetNext();
+  }
+  return instruction;
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
@@ -457,6 +461,10 @@
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
+    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
+      // a loop got dismantled. Just remove the suspend check.
+      block->RemoveInstruction(block->GetFirstInstruction());
     }
   }
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 859d570..57fa558 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -860,6 +860,8 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  HInstruction* GetFirstInstructionDisregardMoves() const;
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.push_back(block);
     block->predecessors_.push_back(this);
@@ -1224,6 +1226,16 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+/*
+ * Instructions, shared across several (not all) architectures.
+ */
+#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
+  M(MultiplyAccumulate, Instruction)
+#endif
+
 #ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
 #else
@@ -1236,8 +1248,7 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
-  M(Arm64IntermediateAddress, Instruction)                              \
-  M(Arm64MultiplyAccumulate, Instruction)
+  M(Arm64IntermediateAddress, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1257,6 +1268,7 @@
 
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                               \
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                                \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
@@ -3687,19 +3699,13 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HNewInstance* GetThisArgumentOfStringInit() const {
+  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
     DCHECK(IsStringInit());
     size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
-    return InputAt(index)->AsNewInstance();
-  }
-
-  void RemoveThisArgumentOfStringInit() {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    DCHECK(InputAt(index)->IsNewInstance());
+    HInstruction* input = InputAt(index);
     RemoveAsUserOfInput(index);
     inputs_.pop_back();
+    return input;
   }
 
   // Is this a call to a static method whose declaring class has an
@@ -5722,6 +5728,9 @@
 
 }  // namespace art
 
+#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
+#include "nodes_shared.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "nodes_arm.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 445cdab..173852a 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,40 +118,6 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
-class HArm64MultiplyAccumulate : public HExpression<3> {
- public:
-  HArm64MultiplyAccumulate(Primitive::Type type,
-                           InstructionKind op,
-                           HInstruction* accumulator,
-                           HInstruction* mul_left,
-                           HInstruction* mul_right,
-                           uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
-    SetRawInputAt(kInputAccumulatorIndex, accumulator);
-    SetRawInputAt(kInputMulLeftIndex, mul_left);
-    SetRawInputAt(kInputMulRightIndex, mul_right);
-  }
-
-  static constexpr int kInputAccumulatorIndex = 0;
-  static constexpr int kInputMulLeftIndex = 1;
-  static constexpr int kInputMulRightIndex = 2;
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
-  }
-
-  InstructionKind GetOpKind() const { return op_kind_; }
-
-  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
-
- private:
-  // Indicates if this is a MADD or MSUB.
-  InstructionKind op_kind_;
-
-  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
new file mode 100644
index 0000000..b04b622
--- /dev/null
+++ b/compiler/optimizing/nodes_shared.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
+
+namespace art {
+
+class HMultiplyAccumulate : public HExpression<3> {
+ public:
+  HMultiplyAccumulate(Primitive::Type type,
+                      InstructionKind op,
+                      HInstruction* accumulator,
+                      HInstruction* mul_left,
+                      HInstruction* mul_right,
+                      uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index fffd005..4da48bd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -62,6 +62,7 @@
 #include "induction_var_analysis.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
+#include "instruction_simplifier_arm.h"
 #include "intrinsics.h"
 #include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
@@ -445,8 +446,11 @@
     case kThumb2:
     case kArm: {
       arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      arm::InstructionSimplifierArm* simplifier =
+          new (arena) arm::InstructionSimplifierArm(graph, stats);
       HOptimization* arm_optimizations[] = {
-        fixups
+        fixups,
+        simplifier
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3..be470cc 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@
 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
   // This function is used to reduce the dependencies in the graph after
   // (from -> to) has been performed. Since we ensure there is no move with the same
-  // destination, (to -> X) can not be blocked while (from -> X) might still be
+  // destination, (to -> X) cannot be blocked while (from -> X) might still be
   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
   // a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7494e33..165d09d 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@
   return true;
 }
 
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+  if (GetGraph()->IsDebuggable()) {
+    // Do not perform the optimization for consistency with the interpreter
+    // which always allocates an object for new-instance of String.
+    return;
+  }
+
+  for (HNewInstance* new_instance : uninitialized_strings_) {
+    DCHECK(new_instance->IsStringAlloc());
+
+    // Replace NewInstance of String with NullConstant if not used prior to
+    // calling StringFactory. In case of deoptimization, the interpreter is
+    // expected to skip null check on the `this` argument of the StringFactory call.
+    if (!new_instance->HasNonEnvironmentUses()) {
+      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+      // Remove LoadClass if not needed any more.
+      HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+      DCHECK(load_class != nullptr);
+      DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      if (!load_class->HasUses()) {
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      }
+    }
+  }
+}
+
 GraphAnalysisResult SsaBuilder::BuildSsa() {
   // 1) Visit in reverse post order. We need to have all predecessors of a block
   // visited (with the exception of loops) in order to create the right environment
@@ -487,7 +515,15 @@
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Clear locals.
+  // 11) Step 1) replaced uses of NewInstances of String with the results of
+  // their corresponding StringFactory calls. Unless the String objects are used
+  // before they are initialized, they can be replaced with NullConstant.
+  // Note that this optimization is valid only if unsimplified code does not use
+  // the uninitialized value because we assume execution can be deoptimized at
+  // any safepoint. We must therefore perform it before any other optimizations.
+  RemoveRedundantUninitializedStrings();
+
+  // 12) Clear locals.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
        it.Advance()) {
@@ -891,12 +927,21 @@
   if (invoke->IsStringInit()) {
     // This is a StringFactory call which acts as a String constructor. Its
     // result replaces the empty String pre-allocated by NewInstance.
-    HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
-    invoke->RemoveThisArgumentOfStringInit();
+    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
 
-    // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+    // Replacing the NewInstance might render it redundant. Keep a list of these
+    // to be visited once it is clear whether it is has remaining uses.
+    if (arg_this->IsNewInstance()) {
+      uninitialized_strings_.push_back(arg_this->AsNewInstance());
+    } else {
+      DCHECK(arg_this->IsPhi());
+      // NewInstance is not the direct input of the StringFactory call. It might
+      // be redundant but optimizing this case is not worth the effort.
+    }
+
+    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
     for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == new_instance) {
+      if ((*current_locals_)[vreg] == arg_this) {
         (*current_locals_)[vreg] = invoke;
       }
     }
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 28eef6a..ccef8ea 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -57,6 +57,7 @@
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
         locals_for_(graph->GetBlocks().size(),
                     ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
                     graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
@@ -105,6 +106,8 @@
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
   HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
 
+  void RemoveRedundantUninitializedStrings();
+
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
@@ -119,6 +122,7 @@
 
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
+  ArenaVector<HNewInstance*> uninitialized_strings_;
 
   // HEnvironment for each block.
   ArenaVector<ArenaVector<HInstruction*>> locals_for_;
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
index 81f2a56..85335ef 100644
--- a/compiler/profile_assistant.cc
+++ b/compiler/profile_assistant.cc
@@ -16,54 +16,154 @@
 
 #include "profile_assistant.h"
 
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
 namespace art {
 
 // Minimum number of new methods that profiles must contain to enable recompilation.
 static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
 
-bool ProfileAssistant::ProcessProfiles(
-      const std::vector<std::string>& profile_files,
-      const std::vector<std::string>& reference_profile_files,
-      /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+bool ProfileAssistant::ProcessProfilesInternal(
+        const std::vector<ScopedFlock>& profile_files,
+        const std::vector<ScopedFlock>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
   DCHECK(!profile_files.empty());
-  DCHECK(reference_profile_files.empty() ||
+  DCHECK(!reference_profile_files.empty() ||
       (profile_files.size() == reference_profile_files.size()));
 
   std::vector<ProfileCompilationInfo> new_info(profile_files.size());
   bool should_compile = false;
   // Read the main profile files.
-  for (size_t i = 0; i < profile_files.size(); i++) {
-    if (!new_info[i].Load(profile_files[i])) {
-      LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
       return false;
     }
     // Do we have enough new profiled methods that will make the compilation worthwhile?
     should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
   }
+
   if (!should_compile) {
-    *profile_compilation_info = nullptr;
     return true;
   }
 
   std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+  // Merge information.
   for (size_t i = 0; i < new_info.size(); i++) {
-    // Merge all data into a single object.
-    result->Load(new_info[i]);
-    // If we have any reference profile information merge their information with
-    // the current profiles and save them back to disk.
     if (!reference_profile_files.empty()) {
-      if (!new_info[i].Load(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+      if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not load reference profile file at index " << i;
         return false;
       }
-      if (!new_info[i].Save(reference_profile_files[i])) {
-        LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+    }
+    // Merge all data into a single object.
+    if (!result->Load(new_info[i])) {
+      LOG(WARNING) << "Could not merge profile data at index " << i;
+      return false;
+    }
+  }
+  // We were successful in merging all profile information. Update the files.
+  for (size_t i = 0; i < new_info.size(); i++) {
+    if (!reference_profile_files.empty()) {
+      if (!reference_profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear reference profile file at index " << i;
+        return false;
+      }
+      if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) {
+        LOG(WARNING) << "Could not save reference profile file at index " << i;
+        return false;
+      }
+      if (!profile_files[i].GetFile()->ClearContent()) {
+        PLOG(WARNING) << "Could not clear profile file at index " << i;
         return false;
       }
     }
   }
+
   *profile_compilation_info = result.release();
   return true;
 }
 
+class ScopedCollectionFlock {
+ public:
+  explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+    for (size_t i = 0; i < filenames.size(); i++) {
+      if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Will block until all the locks are acquired.
+  bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) {
+    for (size_t i = 0; i < fds.size(); i++) {
+      // We do not own the descriptor, so disable auto-close and don't check usage.
+      File file(fds[i], false);
+      file.DisableAutoClose();
+      if (!flocks_[i].Init(&file, error)) {
+        *error += " (index=" + std::to_string(i) + ")";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+  std::vector<ScopedFlock> flocks_;
+};
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<uint32_t>& profile_files_fd,
+        const std::vector<uint32_t>& reference_profile_files_fd,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+  if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
+bool ProfileAssistant::ProcessProfiles(
+        const std::vector<std::string>& profile_files,
+        const std::vector<std::string>& reference_profile_files,
+        /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+  *profile_compilation_info = nullptr;
+
+  std::string error;
+  ScopedCollectionFlock profile_files_flocks(profile_files.size());
+  if (!profile_files_flocks.Init(profile_files, &error)) {
+    LOG(WARNING) << "Could not lock profile files: " << error;
+    return false;
+  }
+  ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size());
+  if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) {
+    LOG(WARNING) << "Could not lock reference profile files: " << error;
+    return false;
+  }
+
+  return ProcessProfilesInternal(profile_files_flocks.Get(),
+                                 reference_profile_files_flocks.Get(),
+                                 profile_compilation_info);
+}
+
 }  // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
index 088c8bd..ad5e216 100644
--- a/compiler/profile_assistant.h
+++ b/compiler/profile_assistant.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 
+#include "base/scoped_flock.h"
 #include "jit/offline_profiling_info.cc"
 
 namespace art {
@@ -52,7 +53,17 @@
       const std::vector<std::string>& reference_profile_files,
       /*out*/ ProfileCompilationInfo** profile_compilation_info);
 
+  static bool ProcessProfiles(
+      const std::vector<uint32_t>& profile_files_fd_,
+      const std::vector<uint32_t>& reference_profile_files_fd_,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
  private:
+  static bool ProcessProfilesInternal(
+      const std::vector<ScopedFlock>& profile_files,
+      const std::vector<ScopedFlock>& reference_profile_files,
+      /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
 
diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc
new file mode 100644
index 0000000..58b7513
--- /dev/null
+++ b/compiler/profile_assistant_test.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "compiler/profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+  void SetupProfile(const std::string& id,
+                    uint32_t checksum,
+                    uint16_t number_of_methods,
+                    const ScratchFile& profile,
+                    ProfileCompilationInfo* info,
+                    uint16_t start_method_index = 0) {
+    std::string dex_location1 = "location1" + id;
+    uint32_t dex_location_checksum1 = checksum;
+    std::string dex_location2 = "location2" + id;
+    uint32_t dex_location_checksum2 = 10 * checksum;
+    for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+      ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
+      ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  uint32_t GetFd(const ScratchFile& file) const {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+
+  // The reference profile info will contain the methods with indices 50-150.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info1;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1,
+      &reference_info1, kNumberOfMethodsToEnableCompilation / 2);
+  ProfileCompilationInfo reference_info2;
+  SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2,
+      &reference_info2, kNumberOfMethodsToEnableCompilation / 2);
+
+  // We should advise compilation.
+  ProfileCompilationInfo* result;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result != nullptr);
+
+  // The resulting compilation info must be equal to the merge of the inputs
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.Load(info1));
+  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.Load(reference_info1));
+  ASSERT_TRUE(expected.Load(reference_info2));
+  ASSERT_TRUE(expected.Equals(*result));
+
+  // The information from profiles must be transfered to the reference profiles.
+  ProfileCompilationInfo file_info1;
+  ProfileCompilationInfo merge1;
+  ASSERT_TRUE(merge1.Load(info1));
+  ASSERT_TRUE(merge1.Load(reference_info1));
+  ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+  ASSERT_TRUE(file_info1.Equals(merge1));
+
+  ProfileCompilationInfo file_info2;
+  ProfileCompilationInfo merge2;
+  ASSERT_TRUE(merge2.Load(info2));
+  ASSERT_TRUE(merge2.Load(reference_info2));
+  ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+  ASSERT_TRUE(file_info2.Equals(merge2));
+
+  // Initial profiles must be cleared.
+  ASSERT_EQ(0, profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile1;
+  ScratchFile reference_profile2;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1),
+      GetFd(profile2)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile1),
+      GetFd(reference_profile2)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+  // We should fail processing.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+  ASSERT_TRUE(file_info2.Equals(info2));
+
+  // Reference profile files must still remain empty.
+  ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+  ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<uint32_t> profile_fds({
+      GetFd(profile1)});
+  std::vector<uint32_t> reference_profile_fds({
+      GetFd(reference_profile)});
+
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  // Assign different hashes for the same dex file. This will make merging of information to fail.
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+
+  // We should not advise compilation.
+  ProfileCompilationInfo* result = nullptr;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+  ASSERT_TRUE(result == nullptr);
+
+  // The information from profiles must still remain the same.
+  ProfileCompilationInfo file_info1;
+  ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+  ASSERT_TRUE(file_info1.Equals(info1));
+
+  ProfileCompilationInfo file_info2;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(file_info2.Load(GetFd(reference_profile)));
+  ASSERT_TRUE(file_info2.Equals(reference_info));
+}
+
+}  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b6a228c..e57a540 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -21,6 +21,7 @@
 #include <set>
 #include <map>
 #include <vector>
+#include <zlib.h>
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
@@ -161,7 +162,6 @@
     uint32_t total_size = data_section_offset + data_section_size;
 
     dex_file_data_.resize(total_size);
-    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     for (const auto& entry : strings_) {
       CHECK_LT(entry.first.size(), 128u);
@@ -210,7 +210,12 @@
       Write32(raw_offset + 4u, GetStringIdx(entry.first.name));
     }
 
-    // Leave checksum and signature as zeros.
+    // Leave signature as zeros.
+
+    header->file_size_ = dex_file_data_.size();
+    size_t skip = sizeof(header->magic_) + sizeof(header->checksum_);
+    header->checksum_ = adler32(0u, dex_file_data_.data() + skip, dex_file_data_.size() - skip);
+    std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index dbb7341..e495a27 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -340,6 +340,12 @@
   UsageError("      --profile-file will be merged into --reference-profile-file. Valid only when");
   UsageError("      specified together with --profile-file.");
   UsageError("");
+  UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
+  UsageError("      Cannot be used together with --profile-file.");
+  UsageError("");
+  UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
+  UsageError("      accepts a file descriptor. Cannot be used together with");
+  UsageError("       --reference-profile-file.");
   UsageError("  --print-pass-names: print a list of pass names");
   UsageError("");
   UsageError("  --disable-passes=<pass-names>:  disable one or more passes separated by comma.");
@@ -497,12 +503,24 @@
   return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
 }
 
+static void CloseAllFds(const std::vector<uint32_t>& fds, const char* descriptor) {
+  for (size_t i = 0; i < fds.size(); i++) {
+    if (close(fds[i]) < 0) {
+      PLOG(WARNING) << "Failed to close descriptor for " << descriptor << " at index " << i;
+    }
+  }
+}
+
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
       compiler_kind_(Compiler::kOptimizing),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
+      image_file_location_oat_checksum_(0),
+      image_file_location_oat_data_begin_(0),
+      image_patch_delta_(0),
+      key_value_store_(nullptr),
       verification_results_(nullptr),
       method_inliner_map_(),
       runtime_(nullptr),
@@ -522,8 +540,14 @@
       boot_image_(false),
       multi_image_(false),
       is_host_(false),
+      class_loader_(nullptr),
+      elf_writers_(),
+      oat_writers_(),
+      rodata_(),
       image_writer_(nullptr),
       driver_(nullptr),
+      opened_dex_files_maps_(),
+      opened_dex_files_(),
       dump_stats_(false),
       dump_passes_(false),
       dump_timing_(false),
@@ -533,11 +557,6 @@
       timings_(timings) {}
 
   ~Dex2Oat() {
-    // Free opened dex files before deleting the runtime_, because ~DexFile
-    // uses MemMap, which is shut down by ~Runtime.
-    class_path_files_.clear();
-    opened_dex_files_.clear();
-
     // Log completion time before deleting the runtime_, because this accesses
     // the runtime.
     LogCompletionTime();
@@ -550,6 +569,9 @@
       for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files_) {
         dex_file.release();
       }
+      for (std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+        map.release();
+      }
       for (std::unique_ptr<File>& oat_file : oat_files_) {
         oat_file.release();
       }
@@ -575,6 +597,14 @@
     ParseUintOption(option, "--oat-fd", &oat_fd_, Usage);
   }
 
+  void ParseFdForCollection(const StringPiece& option,
+                            const char* arg_name,
+                            std::vector<uint32_t>* fds) {
+    uint32_t fd;
+    ParseUintOption(option, arg_name, &fd, Usage);
+    fds->push_back(fd);
+  }
+
   void ParseJ(const StringPiece& option) {
     ParseUintOption(option, "-j", &thread_count_, Usage, /* is_long_option */ false);
   }
@@ -778,11 +808,25 @@
       }
     }
 
+    if (!profile_files_.empty() && !profile_files_fd_.empty()) {
+      Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
+    }
     if (!profile_files_.empty()) {
       if (!reference_profile_files_.empty() &&
           (reference_profile_files_.size() != profile_files_.size())) {
         Usage("If specified, --reference-profile-file should match the number of --profile-file.");
       }
+    } else if (!reference_profile_files_.empty()) {
+      Usage("--reference-profile-file should only be supplied with --profile-file");
+    }
+    if (!profile_files_fd_.empty()) {
+      if (!reference_profile_files_fd_.empty() &&
+          (reference_profile_files_fd_.size() != profile_files_fd_.size())) {
+        Usage("If specified, --reference-profile-file-fd should match the number",
+              " of --profile-file-fd.");
+      }
+    } else if (!reference_profile_files_fd_.empty()) {
+      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
     }
 
     if (!parser_options->oat_symbols.empty()) {
@@ -1076,6 +1120,10 @@
       } else if (option.starts_with("--reference-profile-file=")) {
         reference_profile_files_.push_back(
             option.substr(strlen("--reference-profile-file=")).ToString());
+      } else if (option.starts_with("--profile-file-fd=")) {
+        ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
+      } else if (option.starts_with("--reference-profile-file-fd=")) {
+        ParseFdForCollection(option, "--reference_profile-file-fd", &reference_profile_files_fd_);
       } else if (option == "--no-profile-file") {
         // No profile
       } else if (option == "--host") {
@@ -1125,6 +1173,9 @@
   // Check whether the oat output files are writable, and open them for later. Also open a swap
   // file, if a name is given.
   bool OpenFile() {
+    // Prune non-existent dex files now so that we don't create empty oat files for multi-image.
+    PruneNonExistentDexFiles();
+
     // Expand oat and image filenames for multi image.
     if (IsBootImage() && multi_image_) {
       ExpandOatAndImageFilenames();
@@ -1196,9 +1247,6 @@
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
 
-    // Organize inputs, handling multi-dex and multiple oat file outputs.
-    CreateDexOatMappings();
-
     return true;
   }
 
@@ -1241,89 +1289,135 @@
       return false;
     }
 
+    CreateOatWriters();
+    if (!AddDexFileSources()) {
+      return false;
+    }
+
+    if (IsBootImage() && image_filenames_.size() > 1) {
+      // If we're compiling the boot image, store the boot classpath into the Key-Value store.
+      // We need this for the multi-image case.
+      key_value_store_->Put(OatHeader::kBootClassPath, GetMultiImageBootClassPath());
+    }
+
+    if (!IsBootImage()) {
+      // When compiling an app, create the runtime early to retrieve
+      // the image location key needed for the oat header.
+      if (!CreateRuntime(std::move(runtime_options))) {
+        return false;
+      }
+
+      {
+        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
+        std::vector<gc::space::ImageSpace*> image_spaces =
+            Runtime::Current()->GetHeap()->GetBootImageSpaces();
+        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
+        image_file_location_oat_data_begin_ =
+            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
+        image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
+        // Store the boot image filename(s).
+        std::vector<std::string> image_filenames;
+        for (const gc::space::ImageSpace* image_space : image_spaces) {
+          image_filenames.push_back(image_space->GetImageFilename());
+        }
+        std::string image_file_location = Join(image_filenames, ':');
+        if (!image_file_location.empty()) {
+          key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
+        }
+      }
+
+      // Open dex files for class path.
+      const std::vector<std::string> class_path_locations =
+          GetClassPathLocations(runtime_->GetClassPathString());
+      OpenClassPathFiles(class_path_locations, &class_path_files_);
+
+      // Store the classpath we have right now.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+      key_value_store_->Put(OatHeader::kClassPathKey,
+                            OatFile::EncodeDexFileDependencies(class_path_files));
+    }
+
+    // Now that we have finalized key_value_store_, start writing the oat file.
     {
-      TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
+      TimingLogger::ScopedTiming t_dex("Writing and opening dex files", timings_);
+      rodata_.reserve(oat_writers_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        rodata_.push_back(elf_writers_[i]->StartRoData());
+        // Unzip or copy dex files straight to the oat file.
+        std::unique_ptr<MemMap> opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+        if (!oat_writers_[i]->WriteAndOpenDexFiles(rodata_.back(),
+                                                   oat_files_[i].get(),
+                                                   instruction_set_,
+                                                   instruction_set_features_.get(),
+                                                   key_value_store_.get(),
+                                                   &opened_dex_files_map,
+                                                   &opened_dex_files)) {
+          return false;
+        }
+        dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files));
+        if (opened_dex_files_map != nullptr) {
+          opened_dex_files_maps_.push_back(std::move(opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+            dex_file_oat_filename_map_.emplace(dex_file.get(), oat_filenames_[i]);
+            opened_dex_files_.push_back(std::move(dex_file));
+          }
+        } else {
+          DCHECK(opened_dex_files.empty());
+        }
+      }
+    }
+
+    dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
+    if (IsBootImage()) {
+      // For boot image, pass opened dex files to the Runtime::Create().
+      // Note: Runtime acquires ownership of these dex files.
+      runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_);
       if (!CreateRuntime(std::move(runtime_options))) {
         return false;
       }
     }
 
-    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-    // Runtime::Start, give it away now so that we don't starve GC.
-    Thread* self = Thread::Current();
-    self->TransitionFromRunnableToSuspended(kNative);
     // If we're doing the image, override the compiler filter to force full compilation. Must be
     // done ahead of WellKnownClasses::Init that causes verification.  Note: doesn't force
     // compilation of class initializers.
     // Whilst we're in native take the opportunity to initialize well known classes.
+    Thread* self = Thread::Current();
     WellKnownClasses::Init(self->GetJniEnv());
 
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    if (boot_image_filename_.empty()) {
-      dex_files_ = class_linker->GetBootClassPath();
-      // Prune invalid dex locations.
-      for (size_t i = 0; i < dex_locations_.size(); i++) {
-        const char* dex_location = dex_locations_[i];
-        bool contains = false;
-        for (const DexFile* dex_file : dex_files_) {
-          if (strcmp(dex_location, dex_file->GetLocation().c_str()) == 0) {
-            contains = true;
-            break;
-          }
-        }
-        if (!contains) {
-          dex_locations_.erase(dex_locations_.begin() + i);
-          i--;
-        }
-      }
-    } else {
-      TimingLogger::ScopedTiming t_dex("Opening dex files", timings_);
-      if (dex_filenames_.empty()) {
-        ATRACE_BEGIN("Opening zip archive from file descriptor");
-        std::string error_msg;
-        std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(zip_fd_,
-                                                                       zip_location_.c_str(),
-                                                                       &error_msg));
-        if (zip_archive.get() == nullptr) {
-          LOG(ERROR) << "Failed to open zip from file descriptor for '" << zip_location_ << "': "
-              << error_msg;
-          return false;
-        }
-        if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location_, &error_msg, &opened_dex_files_)) {
-          LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location_
-              << "': " << error_msg;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-        ATRACE_END();
-      } else {
-        size_t failure_count = OpenDexFiles(dex_filenames_, dex_locations_, &opened_dex_files_);
-        if (failure_count > 0) {
-          LOG(ERROR) << "Failed to open some dex files: " << failure_count;
-          return false;
-        }
-        for (auto& dex_file : opened_dex_files_) {
-          dex_files_.push_back(dex_file.get());
-        }
-      }
-
+    if (!IsBootImage()) {
       constexpr bool kSaveDexInput = false;
       if (kSaveDexInput) {
         SaveDexInput();
       }
+
+      // Handle and ClassLoader creation needs to come after Runtime::Create.
+      ScopedObjectAccess soa(self);
+
+      // Classpath: first the class-path given.
+      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
+
+      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
+      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+
+      class_loader_ = class_linker->CreatePathClassLoader(self, class_path_files);
     }
-    // Ensure opened dex files are writable for dex-to-dex transformations. Also ensure that
-    // the dex caches stay live since we don't want class unloading to occur during compilation.
-    for (const auto& dex_file : dex_files_) {
-      if (!dex_file->EnableWrite()) {
-        PLOG(ERROR) << "Failed to make .dex file writeable '" << dex_file->GetLocation() << "'\n";
+
+    // Ensure opened dex files are writable for dex-to-dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Protect(PROT_READ | PROT_WRITE)) {
+        PLOG(ERROR) << "Failed to make .dex files writeable.";
+        return false;
       }
+    }
+
+    // Ensure that the dex caches stay live since we don't want class unloading
+    // to occur during compilation.
+    for (const auto& dex_file : dex_files_) {
       ScopedObjectAccess soa(self);
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
           class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
-      dex_file->CreateTypeLookupTable();
     }
 
     /*
@@ -1348,59 +1442,11 @@
     return true;
   }
 
-  void CreateDexOatMappings() {
-    if (oat_files_.size() > 1) {
-      size_t index = 0;
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*> dex_files;
-        if (index < dex_files_.size()) {
-          dex_files.push_back(dex_files_[index]);
-          dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-          index++;
-          while (index < dex_files_.size() &&
-              (dex_files_[index]->GetBaseLocation() == dex_files_[index - 1]->GetBaseLocation())) {
-            dex_file_oat_filename_map_.emplace(dex_files_[index], oat_filenames_[i]);
-            dex_files.push_back(dex_files_[index]);
-            index++;
-          }
-        }
-        dex_files_per_oat_file_.push_back(std::move(dex_files));
-      }
-    } else {
-      dex_files_per_oat_file_.push_back(dex_files_);
-      for (const DexFile* dex_file : dex_files_) {
-        dex_file_oat_filename_map_.emplace(dex_file, oat_filenames_[0]);
-      }
-    }
-  }
-
   // Create and invoke the compiler driver. This will compile all the dex files.
   void Compile() {
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
     compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
-    // Handle and ClassLoader creation needs to come after Runtime::Create
-    jobject class_loader = nullptr;
-    Thread* self = Thread::Current();
-
-    if (!boot_image_filename_.empty()) {
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      OpenClassPathFiles(runtime_->GetClassPathString(), dex_files_, &class_path_files_);
-      ScopedObjectAccess soa(self);
-
-      // Classpath: first the class-path given.
-      std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
-
-      // Store the classpath we have right now.
-      key_value_store_->Put(OatHeader::kClassPathKey,
-                            OatFile::EncodeDexFileDependencies(class_path_files));
-
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
-
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
-    }
-
     // Find the dex file we should not inline from.
 
     // For now, on the host always have core-oj removed.
@@ -1448,49 +1494,6 @@
       }
     }
 
-    if (IsBootImage() && image_filenames_.size() > 1) {
-      // If we're compiling the boot image, store the boot classpath into the Key-Value store. If
-      // the image filename was adapted (e.g., for our tests), we need to change this here, too, but
-      // need to strip all path components (they will be re-established when loading).
-      // We need this for the multi-image case.
-      std::ostringstream bootcp_oss;
-      bool first_bootcp = true;
-      for (size_t i = 0; i < dex_locations_.size(); ++i) {
-        if (!first_bootcp) {
-          bootcp_oss << ":";
-        }
-
-        std::string dex_loc = dex_locations_[i];
-        std::string image_filename = image_filenames_[i];
-
-        // Use the dex_loc path, but the image_filename name (without path elements).
-        size_t dex_last_slash = dex_loc.rfind('/');
-
-        // npos is max(size_t). That makes this a bit ugly.
-        size_t image_last_slash = image_filename.rfind('/');
-        size_t image_last_at = image_filename.rfind('@');
-        size_t image_last_sep = (image_last_slash == std::string::npos)
-                                    ? image_last_at
-                                    : (image_last_at == std::string::npos)
-                                          ? std::string::npos
-                                          : std::max(image_last_slash, image_last_at);
-        // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
-
-        if (dex_last_slash == std::string::npos) {
-          dex_loc = image_filename.substr(image_last_sep + 1);
-        } else {
-          dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
-              image_filename.substr(image_last_sep + 1);
-        }
-
-        // Image filenames already end with .art, no need to replace.
-
-        bootcp_oss << dex_loc;
-        first_bootcp = false;
-      }
-      key_value_store_->Put(OatHeader::kBootClassPath, bootcp_oss.str());
-    }
-
     driver_.reset(new CompilerDriver(compiler_options_.get(),
                                      verification_results_.get(),
                                      &method_inliner_map_,
@@ -1509,7 +1512,7 @@
                                      &dex_file_oat_filename_map_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1577,19 +1580,18 @@
   // ImageWriter, if necessary.
   // Note: Flushing (and closing) the file is the caller's responsibility, except for the failure
   //       case (when the file will be explicitly erased).
-  bool CreateOatFiles() {
-    CHECK(key_value_store_.get() != nullptr);
-
+  bool WriteOatFiles() {
     TimingLogger::ScopedTiming t("dex2oat Oat", timings_);
 
-    std::vector<std::unique_ptr<OatWriter>> oat_writers;
-    {
-      TimingLogger::ScopedTiming t2("dex2oat OatWriter", timings_);
-      std::string image_file_location;
-      uint32_t image_file_location_oat_checksum = 0;
-      uintptr_t image_file_location_oat_data_begin = 0;
-      int32_t image_patch_delta = 0;
+    // Sync the data to the file, in case we did dex2dex transformations.
+    for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
+      if (!map->Sync()) {
+        PLOG(ERROR) << "Failed to Sync() dex2dex output. Map: " << map->GetName();
+        return false;
+      }
+    }
 
+    if (IsImage()) {
       if (app_image_ && image_base_ == 0) {
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
@@ -1601,47 +1603,15 @@
         VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
       }
 
-      if (IsImage()) {
-        PrepareImageWriter(image_base_);
-      }
+      image_writer_.reset(new ImageWriter(*driver_,
+                                          image_base_,
+                                          compiler_options_->GetCompilePic(),
+                                          IsAppImage(),
+                                          image_storage_mode_,
+                                          oat_filenames_,
+                                          dex_file_oat_filename_map_));
 
-      if (!IsBootImage()) {
-        TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        std::vector<gc::space::ImageSpace*> image_spaces =
-            Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum = image_spaces[0]->GetImageHeader().GetOatChecksum();
-        image_file_location_oat_data_begin =
-            reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
-        image_patch_delta = image_spaces[0]->GetImageHeader().GetPatchDelta();
-        std::vector<std::string> image_filenames;
-        for (const gc::space::ImageSpace* image_space : image_spaces) {
-          image_filenames.push_back(image_space->GetImageFilename());
-        }
-        image_file_location = Join(image_filenames, ':');
-      }
-
-      if (!image_file_location.empty()) {
-        key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
-      }
-
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
-        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
-        std::unique_ptr<OatWriter> oat_writer(new OatWriter(dex_files,
-                                                            image_file_location_oat_checksum,
-                                                            image_file_location_oat_data_begin,
-                                                            image_patch_delta,
-                                                            driver_.get(),
-                                                            image_writer_.get(),
-                                                            IsBootImage(),
-                                                            timings_,
-                                                            key_value_store_.get()));
-        oat_writers.push_back(std::move(oat_writer));
-      }
-    }
-
-    if (IsImage()) {
-      // The OatWriter constructor has already updated offsets in methods and we need to
-      // prepare method offsets in the image address space for direct method patching.
+      // We need to prepare method offsets in the image address space for direct method patching.
       TimingLogger::ScopedTiming t2("dex2oat Prepare image address space", timings_);
       if (!image_writer_->PrepareImageAddressSpace()) {
         LOG(ERROR) << "Failed to prepare image address space.";
@@ -1651,20 +1621,22 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      for (size_t i = 0; i < oat_files_.size(); ++i) {
+      for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         std::unique_ptr<File>& oat_file = oat_files_[i];
-        std::unique_ptr<OatWriter>& oat_writer = oat_writers[i];
-        std::unique_ptr<ElfWriter> elf_writer =
-            CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get());
+        std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
+        std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
-        elf_writer->Start();
+        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+        oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files);
 
-        OutputStream* rodata = elf_writer->StartRoData();
+        OutputStream*& rodata = rodata_[i];
+        DCHECK(rodata != nullptr);
         if (!oat_writer->WriteRodata(rodata)) {
           LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file->GetPath();
           return false;
         }
         elf_writer->EndRoData(rodata);
+        rodata = nullptr;
 
         OutputStream* text = elf_writer->StartText();
         if (!oat_writer->WriteCode(text)) {
@@ -1673,6 +1645,14 @@
         }
         elf_writer->EndText(text);
 
+        if (!oat_writer->WriteHeader(elf_writer->GetStream(),
+                                     image_file_location_oat_checksum_,
+                                     image_file_location_oat_data_begin_,
+                                     image_patch_delta_)) {
+          LOG(ERROR) << "Failed to write oat header to the ELF file " << oat_file->GetPath();
+          return false;
+        }
+
         elf_writer->SetBssSize(oat_writer->GetBssSize());
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
@@ -1698,6 +1678,9 @@
         }
 
         VLOG(compiler) << "Oat file written successfully: " << oat_filenames_[i];
+
+        oat_writer.reset();
+        elf_writer.reset();
       }
     }
 
@@ -1815,17 +1798,27 @@
   }
 
   bool UseProfileGuidedCompilation() const {
-    return !profile_files_.empty();
+    return !profile_files_.empty() || !profile_files_fd_.empty();
   }
 
   bool ProcessProfiles() {
     DCHECK(UseProfileGuidedCompilation());
     ProfileCompilationInfo* info = nullptr;
-    if (ProfileAssistant::ProcessProfiles(profile_files_, reference_profile_files_, &info)) {
-      profile_compilation_info_.reset(info);
-      return true;
+    bool result = false;
+    if (profile_files_.empty()) {
+      DCHECK(!profile_files_fd_.empty());
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_fd_, reference_profile_files_fd_, &info);
+      CloseAllFds(profile_files_fd_, "profile_files_fd_");
+      CloseAllFds(reference_profile_files_fd_, "reference_profile_files_fd_");
+    } else {
+      result = ProfileAssistant::ProcessProfiles(
+          profile_files_, reference_profile_files_, &info);
     }
-    return false;
+
+    profile_compilation_info_.reset(info);
+
+    return result;
   }
 
   bool ShouldCompileBasedOnProfiles() const {
@@ -1845,65 +1838,78 @@
     return result;
   }
 
-  static size_t OpenDexFiles(std::vector<const char*>& dex_filenames,
-                             std::vector<const char*>& dex_locations,
-                             std::vector<std::unique_ptr<const DexFile>>* dex_files) {
-    DCHECK(dex_files != nullptr) << "OpenDexFiles out-param is nullptr";
-    size_t failure_count = 0;
-    for (size_t i = 0; i < dex_filenames.size(); i++) {
-      const char* dex_filename = dex_filenames[i];
-      const char* dex_location = dex_locations[i];
-      ATRACE_BEGIN(StringPrintf("Opening dex file '%s'", dex_filenames[i]).c_str());
-      std::string error_msg;
-      if (!OS::FileExists(dex_filename)) {
-        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
-        dex_filenames.erase(dex_filenames.begin() + i);
-        dex_locations.erase(dex_locations.begin() + i);
-        i--;
-        continue;
+  std::string GetMultiImageBootClassPath() {
+    DCHECK(IsBootImage());
+    DCHECK_GT(oat_filenames_.size(), 1u);
+    // If the image filename was adapted (e.g., for our tests), we need to change this here,
+    // too, but need to strip all path components (they will be re-established when loading).
+    std::ostringstream bootcp_oss;
+    bool first_bootcp = true;
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      if (!first_bootcp) {
+        bootcp_oss << ":";
       }
-      if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
-        LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
-        ++failure_count;
+
+      std::string dex_loc = dex_locations_[i];
+      std::string image_filename = image_filenames_[i];
+
+      // Use the dex_loc path, but the image_filename name (without path elements).
+      size_t dex_last_slash = dex_loc.rfind('/');
+
+      // npos is max(size_t). That makes this a bit ugly.
+      size_t image_last_slash = image_filename.rfind('/');
+      size_t image_last_at = image_filename.rfind('@');
+      size_t image_last_sep = (image_last_slash == std::string::npos)
+                                  ? image_last_at
+                                  : (image_last_at == std::string::npos)
+                                        ? std::string::npos
+                                        : std::max(image_last_slash, image_last_at);
+      // Note: whenever image_last_sep == npos, +1 overflow means using the full string.
+
+      if (dex_last_slash == std::string::npos) {
+        dex_loc = image_filename.substr(image_last_sep + 1);
+      } else {
+        dex_loc = dex_loc.substr(0, dex_last_slash + 1) +
+            image_filename.substr(image_last_sep + 1);
       }
-      ATRACE_END();
+
+      // Image filenames already end with .art, no need to replace.
+
+      bootcp_oss << dex_loc;
+      first_bootcp = false;
     }
-    return failure_count;
+    return bootcp_oss.str();
   }
 
-  // Returns true if dex_files has a dex with the named location. We compare canonical locations,
-  // so that relative and absolute paths will match. Not caching for the dex_files isn't very
-  // efficient, but under normal circumstances the list is neither large nor is this part too
-  // sensitive.
-  static bool DexFilesContains(const std::vector<const DexFile*>& dex_files,
-                               const std::string& location) {
-    std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str()));
-    for (size_t i = 0; i < dex_files.size(); ++i) {
-      if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) ==
-          canonical_location) {
-        return true;
-      }
+  std::vector<std::string> GetClassPathLocations(const std::string& class_path) {
+    // This function is used only for apps and for an app we have exactly one oat file.
+    DCHECK(!IsBootImage());
+    DCHECK_EQ(oat_writers_.size(), 1u);
+    std::vector<std::string> dex_files_canonical_locations;
+    for (const char* location : oat_writers_[0]->GetSourceLocations()) {
+      dex_files_canonical_locations.push_back(DexFile::GetDexCanonicalLocation(location));
     }
-    return false;
-  }
 
-  // Appends to opened_dex_files any elements of class_path that dex_files
-  // doesn't already contain. This will open those dex files as necessary.
-  static void OpenClassPathFiles(const std::string& class_path,
-                                 std::vector<const DexFile*> dex_files,
-                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
-    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
     std::vector<std::string> parsed;
     Split(class_path, ':', &parsed);
-    // Take Locks::mutator_lock_ so that lock ordering on the ClassLinker::dex_lock_ is maintained.
-    ScopedObjectAccess soa(Thread::Current());
-    for (size_t i = 0; i < parsed.size(); ++i) {
-      if (DexFilesContains(dex_files, parsed[i])) {
-        continue;
-      }
+    auto kept_it = std::remove_if(parsed.begin(),
+                                  parsed.end(),
+                                  [dex_files_canonical_locations](const std::string& location) {
+      return ContainsElement(dex_files_canonical_locations,
+                             DexFile::GetDexCanonicalLocation(location.c_str()));
+    });
+    parsed.erase(kept_it, parsed.end());
+    return parsed;
+  }
+
+  // Opens requested class path files and appends them to opened_dex_files.
+  static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
+    for (const std::string& location : class_path_locations) {
       std::string error_msg;
-      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, opened_dex_files)) {
-        LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
+      if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
+        LOG(WARNING) << "Failed to open dex file '" << location << "': " << error_msg;
       }
     }
   }
@@ -1978,6 +1984,63 @@
     return true;
   }
 
+  void PruneNonExistentDexFiles() {
+    DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+    size_t kept = 0u;
+    for (size_t i = 0, size = dex_filenames_.size(); i != size; ++i) {
+      if (!OS::FileExists(dex_filenames_[i])) {
+        LOG(WARNING) << "Skipping non-existent dex file '" << dex_filenames_[i] << "'";
+      } else {
+        dex_filenames_[kept] = dex_filenames_[i];
+        dex_locations_[kept] = dex_locations_[i];
+        ++kept;
+      }
+    }
+    dex_filenames_.resize(kept);
+    dex_locations_.resize(kept);
+  }
+
+  bool AddDexFileSources() {
+    TimingLogger::ScopedTiming t2("AddDexFileSources", timings_);
+    if (zip_fd_ != -1) {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      if (!oat_writers_[0]->AddZippedDexFilesSource(ScopedFd(zip_fd_), zip_location_.c_str())) {
+        return false;
+      }
+    } else if (oat_writers_.size() > 1u) {
+      // Multi-image.
+      DCHECK_EQ(oat_writers_.size(), dex_filenames_.size());
+      DCHECK_EQ(oat_writers_.size(), dex_locations_.size());
+      for (size_t i = 0, size = oat_writers_.size(); i != size; ++i) {
+        if (!oat_writers_[i]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    } else {
+      DCHECK_EQ(oat_writers_.size(), 1u);
+      DCHECK_EQ(dex_filenames_.size(), dex_locations_.size());
+      DCHECK_NE(dex_filenames_.size(), 0u);
+      for (size_t i = 0; i != dex_filenames_.size(); ++i) {
+        if (!oat_writers_[0]->AddDexFileSource(dex_filenames_[i], dex_locations_[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  void CreateOatWriters() {
+    TimingLogger::ScopedTiming t2("CreateOatWriters", timings_);
+    elf_writers_.reserve(oat_files_.size());
+    oat_writers_.reserve(oat_files_.size());
+    for (const std::unique_ptr<File>& oat_file : oat_files_) {
+      elf_writers_.emplace_back(
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get()));
+      elf_writers_.back()->Start();
+      oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
+    }
+  }
+
   void SaveDexInput() {
     for (size_t i = 0; i < dex_files_.size(); ++i) {
       const DexFile* dex_file = dex_files_[i];
@@ -2037,8 +2100,8 @@
   }
 
   // Create a runtime necessary for compilation.
-  bool CreateRuntime(RuntimeArgumentMap&& runtime_options)
-      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
+  bool CreateRuntime(RuntimeArgumentMap&& runtime_options) {
+    TimingLogger::ScopedTiming t_runtime("Create runtime", timings_);
     if (!Runtime::Create(std::move(runtime_options))) {
       LOG(ERROR) << "Failed to create runtime";
       return false;
@@ -2059,18 +2122,12 @@
 
     runtime_->GetClassLinker()->RunRootClinits();
 
-    return true;
-  }
+    // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+    // Runtime::Start, give it away now so that we don't starve GC.
+    Thread* self = Thread::Current();
+    self->TransitionFromRunnableToSuspended(kNative);
 
-  void PrepareImageWriter(uintptr_t image_base) {
-    DCHECK(IsImage());
-    image_writer_.reset(new ImageWriter(*driver_,
-                                        image_base,
-                                        compiler_options_->GetCompilePic(),
-                                        IsAppImage(),
-                                        image_storage_mode_,
-                                        oat_filenames_,
-                                        dex_file_oat_filename_map_));
+    return true;
   }
 
   // Let the ImageWriter write the image files. If we do not compile PIC, also fix up the oat files.
@@ -2249,6 +2306,9 @@
   InstructionSet instruction_set_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
+  uint32_t image_file_location_oat_checksum_;
+  uintptr_t image_file_location_oat_data_begin_;
+  int32_t image_patch_delta_;
   std::unique_ptr<SafeMap<std::string, std::string> > key_value_store_;
 
   std::unique_ptr<VerificationResults> verification_results_;
@@ -2256,11 +2316,11 @@
   DexFileToMethodInlinerMap method_inliner_map_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 
+  std::unique_ptr<Runtime> runtime_;
+
   // Ownership for the class path files.
   std::vector<std::unique_ptr<const DexFile>> class_path_files_;
 
-  std::unique_ptr<Runtime> runtime_;
-
   size_t thread_count_;
   uint64_t start_ns_;
   std::unique_ptr<WatchDog> watchdog_;
@@ -2295,11 +2355,17 @@
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
-  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+  jobject class_loader_;
 
+  std::vector<std::unique_ptr<ElfWriter>> elf_writers_;
+  std::vector<std::unique_ptr<OatWriter>> oat_writers_;
+  std::vector<OutputStream*> rodata_;
   std::unique_ptr<ImageWriter> image_writer_;
   std::unique_ptr<CompilerDriver> driver_;
 
+  std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_;
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
+
   std::vector<std::string> verbose_methods_;
   bool dump_stats_;
   bool dump_passes_;
@@ -2311,6 +2377,8 @@
   int app_image_fd_;
   std::vector<std::string> profile_files_;
   std::vector<std::string> reference_profile_files_;
+  std::vector<uint32_t> profile_files_fd_;
+  std::vector<uint32_t> reference_profile_files_fd_;
   std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
@@ -2346,7 +2414,7 @@
 static int CompileImage(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2385,7 +2453,7 @@
 static int CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
-  if (!dex2oat.CreateOatFiles()) {
+  if (!dex2oat.WriteOatFiles()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
@@ -2442,6 +2510,11 @@
     }
   }
 
+  // Check early that the result of compilation can be written
+  if (!dex2oat.OpenFile()) {
+    return EXIT_FAILURE;
+  }
+
   // Print the complete line when any of the following is true:
   //   1) Debug build
   //   2) Compiling an image
@@ -2455,11 +2528,6 @@
   }
 
   if (!dex2oat.Setup()) {
-    return EXIT_FAILURE;
-  }
-
-  // Check early that the result of compilation can be written
-  if (!dex2oat.OpenFile()) {
     dex2oat.EraseOatFiles();
     return EXIT_FAILURE;
   }
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index 71e0590..814cbd0 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -26,16 +26,25 @@
 namespace art {
 
 bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
+  return Init(filename, O_CREAT | O_RDWR, true, error_msg);
+}
+
+bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) {
   while (true) {
     if (file_.get() != nullptr) {
       UNUSED(file_->FlushCloseOrErase());  // Ignore result.
     }
-    file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
+    file_.reset(OS::OpenFileWithFlags(filename, flags));
     if (file_.get() == nullptr) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
       return false;
     }
-    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
+    int operation = block ? LOCK_EX : (LOCK_EX | LOCK_NB);
+    int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), operation));
+    if (flock_result == EWOULDBLOCK) {
+      // File is locked by someone else and we are required not to block;
+      return false;
+    }
     if (flock_result != 0) {
       *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno));
       return false;
@@ -51,11 +60,23 @@
     if (stat_result != 0) {
       PLOG(WARNING) << "Failed to stat, will retry: " << filename;
       // ENOENT can happen if someone racing with us unlinks the file we created so just retry.
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // Note that in theory we could race with someone here for a long time and end up retrying
+        // over and over again. This potential behavior does not fit well in the non-blocking
+        // semantics. Thus, if we are not require to block return failure when racing.
+        return false;
+      }
     }
     if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) {
       LOG(WARNING) << "File changed while locking, will retry: " << filename;
-      continue;
+      if (block) {
+        continue;
+      } else {
+        // See comment above.
+        return false;
+      }
     }
     return true;
   }
@@ -78,7 +99,7 @@
   return true;
 }
 
-File* ScopedFlock::GetFile() {
+File* ScopedFlock::GetFile() const {
   CHECK(file_.get() != nullptr);
   return file_.get();
 }
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 08612e3..cc22056 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -32,10 +32,15 @@
   // Attempts to acquire an exclusive file lock (see flock(2)) on the file
   // at filename, and blocks until it can do so.
   //
-  // Returns true if the lock could be acquired, or false if an error
-  // occurred. It is an error if the file does not exist, or if its inode
-  // changed (usually due to a new file being created at the same path)
-  // between attempts to lock it.
+  // Returns true if the lock could be acquired, or false if an error occurred.
+  // It is an error if its inode changed (usually due to a new file being
+  // created at the same path) between attempts to lock it. In blocking mode,
+  // locking will be retried if the file changed. In non-blocking mode, false
+  // is returned and no attempt is made to re-acquire the lock.
+  //
+  // The file is opened with the provided flags.
+  bool Init(const char* filename, int flags, bool block, std::string* error_msg);
+  // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg)
   bool Init(const char* filename, std::string* error_msg);
   // Attempt to acquire an exclusive file lock (see flock(2)) on 'file'.
   // Returns true if the lock could be acquired or false if an error
@@ -43,7 +48,7 @@
   bool Init(File* file, std::string* error_msg);
 
   // Returns the (locked) file associated with this instance.
-  File* GetFile();
+  File* GetFile() const;
 
   // Returns whether a file is held.
   bool HasFile();
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 78bc3d5..e17bebb 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -316,4 +316,21 @@
   guard_state_ = GuardState::kNoCheck;
 }
 
+bool FdFile::ClearContent() {
+  if (SetLength(0) < 0) {
+    PLOG(art::ERROR) << "Failed to reset the length";
+    return false;
+  }
+  return ResetOffset();
+}
+
+bool FdFile::ResetOffset() {
+  off_t rc =  TEMP_FAILURE_RETRY(lseek(fd_, 0, SEEK_SET));
+  if (rc == static_cast<off_t>(-1)) {
+    PLOG(art::ERROR) << "Failed to reset the offset";
+    return false;
+  }
+  return true;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 231a1ab..1e2d8af 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -79,6 +79,11 @@
 
   // Copy data from another file.
   bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+  // Clears the file content and resets the file offset to 0.
+  // Returns true upon success, false otherwise.
+  bool ClearContent();
+  // Resets the file offset to the beginning of the file.
+  bool ResetOffset();
 
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index e89c5af..3df9101 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -232,7 +232,7 @@
   }
 
   if (founddir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory.";
+    ADD_FAILURE() << "Cannot find Android tools directory.";
   }
   return founddir;
 }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index bc8ba97..9b93c13 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -687,8 +687,8 @@
   return nullptr;
 }
 
-void DexFile::CreateTypeLookupTable() const {
-  lookup_table_.reset(TypeLookupTable::Create(*this));
+void DexFile::CreateTypeLookupTable(uint8_t* storage) const {
+  lookup_table_.reset(TypeLookupTable::Create(*this, storage));
 }
 
 // Given a signature place the type ids into the given vector
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8a3db6c..968b37b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1094,11 +1094,11 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
                             DexDebugNewLocalCb local_cb, void* context) const;
 
-  // Returns false if there is no debugging information or if it can not be decoded.
+  // Returns false if there is no debugging information or if it cannot be decoded.
   bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
                                void* context) const;
 
@@ -1157,7 +1157,7 @@
     return lookup_table_.get();
   }
 
-  void CreateTypeLookupTable() const;
+  void CreateTypeLookupTable(uint8_t* storage = nullptr) const;
 
  private:
   // Opens a .dex file
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 272249c..b67af53 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -686,31 +686,6 @@
 // Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
 // just easier to break up bad cases.
 
-// Interface with an instance constructor.
-//
-// .class public interface LInterfaceMethodFlags;
-// .super Ljava/lang/Object;
-//
-// .method public static constructor <clinit>()V
-// .registers 1
-//     return-void
-// .end method
-//
-// .method public constructor <init>()V
-// .registers 1
-//     return-void
-// .end method
-static const char kMethodFlagsInterfaceWithInit[] =
-    "ZGV4CjAzNQDRNt+hZ6X3I+xe66iVlCW7h9I38HmN4SvUAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAF"
-    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAAAIAQAAzAAAAMwA"
-    "AADWAAAA3gAAAPYAAAAKAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAQAA"
-    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAADoBAAAAAAAACDxjbGluaXQ+AAY8aW5pdD4AFkxJbnRl"
-    "cmZhY2VNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAAAAAAAAAAAQAAAAAAAAAA"
-    "AAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAIAAImABJQCAYGABKgCAAALAAAAAAAAAAEA"
-    "AAAAAAAAAQAAAAUAAABwAAAAAgAAAAMAAACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAA"
-    "AAEAAACsAAAAAiAAAAUAAADMAAAAAxAAAAEAAAAQAQAAASAAAAIAAAAUAQAAACAAAAEAAAA6AQAA"
-    "ABAAAAEAAABMAQAA";
-
 // Standard interface. Use declared-synchronized again for 3B encoding.
 //
 // .class public interface LInterfaceMethodFlags;
@@ -751,13 +726,6 @@
 }
 
 TEST_F(DexFileVerifierTest, MethodAccessFlagsInterfaces) {
-  // Reject interface with <init>.
-  VerifyModification(
-      kMethodFlagsInterfaceWithInit,
-      "method_flags_interface_with_init",
-      [](DexFile* dex_file ATTRIBUTE_UNUSED) {},
-      "Non-clinit interface method 1 should not have code");
-
   VerifyModification(
       kMethodFlagsInterface,
       "method_flags_interface_ok",
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 9a9f42b..0663b7e 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -193,10 +193,10 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    // Pass in false since the object can not be finalizable.
+    // Pass in false since the object cannot be finalizable.
     return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
   }
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -207,7 +207,7 @@
                                                       Thread* self,
                                                       gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
-  // Pass in false since the object can not be finalizable.
+  // Pass in false since the object cannot be finalizable.
   return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
@@ -410,10 +410,19 @@
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
   } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    // Maintain interpreter-like semantics where NullPointerException is thrown
-    // after potential NoSuchMethodError from class linker.
-    ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-    return nullptr;  // Failure.
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     bool can_access_resolved_method =
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index ce6467a..7727b2d 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -180,7 +180,7 @@
   t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), false, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99e98bb..2784693 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -227,7 +227,7 @@
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_, false, true);
-  // Clear the whole card table since we can not Get any additional dirty cards during the
+  // Clear the whole card table since we cannot get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 416510d..c8e913c 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,7 +34,7 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
-  // Mark compact colector.
+  // Mark compact collector.
   kCollectorTypeMC,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e7ea983..7b531ba 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -89,7 +89,6 @@
   class RegionSpace;
   class RosAllocSpace;
   class Space;
-  class SpaceTest;
   class ZygoteSpace;
 }  // namespace space
 
@@ -1335,7 +1334,6 @@
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
-  friend class space::SpaceTest;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 39ba743..5e7f1a2 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -86,7 +86,7 @@
     // it to the mutator as long as the GC is not preserving references.
     if (LIKELY(collector_ != nullptr)) {
       // If it's null it means not marked, but it could become marked if the referent is reachable
-      // by finalizer referents. So we can not return in this case and must block. Otherwise, we
+      // by finalizer referents. So we cannot return in this case and must block. Otherwise, we
       // can return it to the mutator as long as the GC is not preserving references, in which
       // case only black nodes can be safely returned. If the GC is preserving references, the
       // mutator could take a white field from a grey or white node and move it somewhere else
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
deleted file mode 100644
index 93fe155..0000000
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-#include "dlmalloc_space.h"
-#include "scoped_thread_state_change.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 2798b21..e70fe21 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -521,7 +521,7 @@
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(GetAddressForAllocationInfo(new_info));
-  // We always put our object at the start of the free block, there can not be another free block
+  // We always put our object at the start of the free block, there cannot be another free block
   // before it.
   if (kIsDebugBuild) {
     mprotect(obj, allocation_size, PROT_READ | PROT_WRITE);
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 05b484a..ad38724 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -22,7 +22,7 @@
 namespace gc {
 namespace space {
 
-class LargeObjectSpaceTest : public SpaceTest {
+class LargeObjectSpaceTest : public SpaceTest<CommonRuntimeTest> {
  public:
   void LargeObjectTest();
 
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
deleted file mode 100644
index 0c5be03..0000000
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "space_test.h"
-
-namespace art {
-namespace gc {
-namespace space {
-
-MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
-                                 size_t capacity, uint8_t* requested_begin) {
-  return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
-}
-
-TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
-
-
-}  // namespace space
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/space/space_create_test.cc b/runtime/gc/space/space_create_test.cc
new file mode 100644
index 0000000..aea2d9f
--- /dev/null
+++ b/runtime/gc/space/space_create_test.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "space_test.h"
+
+#include "dlmalloc_space.h"
+#include "rosalloc_space.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+enum MallocSpaceType {
+  kMallocSpaceDlMalloc,
+  kMallocSpaceRosAlloc,
+};
+
+class SpaceCreateTest : public SpaceTest<CommonRuntimeTestWithParam<MallocSpaceType>> {
+ public:
+  MallocSpace* CreateSpace(const std::string& name,
+                           size_t initial_size,
+                           size_t growth_limit,
+                           size_t capacity,
+                           uint8_t* requested_begin) {
+    const MallocSpaceType type = GetParam();
+    if (type == kMallocSpaceDlMalloc) {
+      return DlMallocSpace::Create(name,
+                                   initial_size,
+                                   growth_limit,
+                                   capacity,
+                                   requested_begin,
+                                   false);
+    }
+    DCHECK_EQ(static_cast<uint32_t>(type), static_cast<uint32_t>(kMallocSpaceRosAlloc));
+    return RosAllocSpace::Create(name,
+                                 initial_size,
+                                 growth_limit,
+                                 capacity,
+                                 requested_begin,
+                                 Runtime::Current()->GetHeap()->IsLowMemoryMode(),
+                                 false);
+  }
+};
+
+TEST_P(SpaceCreateTest, InitTestBody) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  {
+    // Init < max == growth
+    std::unique_ptr<Space> space(CreateSpace("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init == max == growth
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init > max == growth
+    space.reset(CreateSpace("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Growth == init < max
+    space.reset(CreateSpace("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Growth < init < max
+    space.reset(CreateSpace("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+    // Init < growth < max
+    space.reset(CreateSpace("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
+    EXPECT_TRUE(space != nullptr);
+    // Init < max < growth
+    space.reset(CreateSpace("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
+    EXPECT_TRUE(space == nullptr);
+  }
+}
+
+// TODO: This test is not very good, we should improve it.
+// The test should do more allocations before the creation of the ZygoteSpace, and then do
+// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
+// the GC works with the ZygoteSpace.
+TEST_P(SpaceCreateTest, ZygoteSpaceTestBody) {
+  size_t dummy;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+
+  // Make sure that the zygote space isn't directly at the start of the space.
+  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  heap->RemoveSpace(old_space);
+  heap->RevokeAllThreadLocalBuffers();
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space, false);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space, false);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  ptr1.Assign(Alloc(space,
+                    self,
+                    1 * MB,
+                    &ptr1_bytes_allocated,
+                    &ptr1_usable_size,
+                    &ptr1_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  ptr3.Assign(AllocWithGrowth(space,
+                              self,
+                              2 * MB,
+                              &ptr3_bytes_allocated,
+                              &ptr3_usable_size,
+                              &ptr3_bytes_tl_bulk_allocated));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(2U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+
+  // Final clean up.
+  free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeTestBody) {
+  size_t dummy = 0;
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+
+  // Succeeds, fits without adjusting the footprint limit.
+  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::Object> ptr1(hs.NewHandle(Alloc(space,
+                                                        self,
+                                                        1 * MB,
+                                                        &ptr1_bytes_allocated,
+                                                        &ptr1_usable_size,
+                                                        &ptr1_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr1.Get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
+  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr2 == nullptr);
+
+  // Succeeds, adjusts the footprint.
+  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
+  MutableHandle<mirror::Object> ptr3(hs.NewHandle(AllocWithGrowth(space,
+                                                                  self,
+                                                                  8 * MB,
+                                                                  &ptr3_bytes_allocated,
+                                                                  &ptr3_usable_size,
+                                                                  &ptr3_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr3.Get() != nullptr);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
+  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
+
+  // Fails, requires a higher footprint limit.
+  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr4 == nullptr);
+
+  // Also fails, requires a higher allowed footprint.
+  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
+  EXPECT_TRUE(ptr5 == nullptr);
+
+  // Release some memory.
+  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
+  space->Free(self, ptr3.Assign(nullptr));
+  EXPECT_LE(8U * MB, free3);
+
+  // Succeeds, now that memory has been freed.
+  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
+  Handle<mirror::Object> ptr6(hs.NewHandle(AllocWithGrowth(space,
+                                                           self,
+                                                           9 * MB,
+                                                           &ptr6_bytes_allocated,
+                                                           &ptr6_usable_size,
+                                                           &ptr6_bytes_tl_bulk_allocated)));
+  EXPECT_TRUE(ptr6.Get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
+  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
+
+  // Final clean up.
+  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
+  space->Free(self, ptr1.Assign(nullptr));
+  EXPECT_LE(1U * MB, free1);
+}
+
+TEST_P(SpaceCreateTest, AllocAndFreeListTestBody) {
+  MallocSpace* space(CreateSpace("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
+  ASSERT_TRUE(space != nullptr);
+
+  // Make space findable to the heap, will also delete space when runtime is cleaned up
+  AddSpace(space);
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Succeeds, fits without adjusting the max allowed footprint.
+  mirror::Object* lots_of_objects[1024];
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = Alloc(space,
+                               self,
+                               size_of_zero_length_byte_array,
+                               &allocation_size,
+                               &usable_size,
+                               &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+
+  // Succeeds, fits by adjusting the max allowed footprint.
+  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
+    lots_of_objects[i] = AllocWithGrowth(space,
+                                         self,
+                                         1024,
+                                         &allocation_size,
+                                         &usable_size,
+                                         &bytes_tl_bulk_allocated);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
+    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
+                bytes_tl_bulk_allocated >= allocation_size);
+  }
+
+  // Release memory.
+  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+}
+
+INSTANTIATE_TEST_CASE_P(CreateRosAllocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceRosAlloc));
+INSTANTIATE_TEST_CASE_P(CreateDlMallocSpace,
+                        SpaceCreateTest,
+                        testing::Values(kMallocSpaceDlMalloc));
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 4d2db11..e588eb3 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -33,12 +33,10 @@
 namespace gc {
 namespace space {
 
-class SpaceTest : public CommonRuntimeTest {
+template <class Super>
+class SpaceTest : public Super {
  public:
-  jobject byte_array_class_;
-
-  SpaceTest() : byte_array_class_(nullptr) {
-  }
+  jobject byte_array_class_ = nullptr;
 
   void AddSpace(ContinuousSpace* space, bool revoke = true) {
     Heap* heap = Runtime::Current()->GetHeap();
@@ -62,13 +60,19 @@
     return reinterpret_cast<mirror::Class*>(self->DecodeJObject(byte_array_class_));
   }
 
-  mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                        size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* Alloc(space::MallocSpace* alloc_space,
+                        Thread* self,
+                        size_t bytes,
+                        size_t* bytes_allocated,
+                        size_t* usable_size,
                         size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self)));
-    mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size,
+    mirror::Object* obj = alloc_space->Alloc(self,
+                                             bytes,
+                                             bytes_allocated,
+                                             usable_size,
                                              bytes_tl_bulk_allocated);
     if (obj != nullptr) {
       InstallClass(obj, byte_array_class.Get(), bytes);
@@ -76,8 +80,11 @@
     return obj;
   }
 
-  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes,
-                                  size_t* bytes_allocated, size_t* usable_size,
+  mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space,
+                                  Thread* self,
+                                  size_t bytes,
+                                  size_t* bytes_allocated,
+                                  size_t* usable_size,
                                   size_t* bytes_tl_bulk_allocated)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self);
@@ -117,10 +124,6 @@
 
   typedef MallocSpace* (*CreateSpaceFn)(const std::string& name, size_t initial_size, size_t growth_limit,
                                         size_t capacity, uint8_t* requested_begin);
-  void InitTestBody(CreateSpaceFn create_space);
-  void ZygoteSpaceTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeTestBody(CreateSpaceFn create_space);
-  void AllocAndFreeListTestBody(CreateSpaceFn create_space);
 
   void SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
                                            int round, size_t growth_limit);
@@ -132,278 +135,11 @@
   return *seed;
 }
 
-void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
-  // This will lead to error messages in the log.
-  ScopedLogSeverity sls(LogSeverity::FATAL);
-
-  {
-    // Init < max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init == max == growth
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init > max == growth
-    std::unique_ptr<Space> space(create_space("test", 32 * MB, 16 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Growth == init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Growth < init < max
-    std::unique_ptr<Space> space(create_space("test", 16 * MB, 8 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-  {
-    // Init < growth < max
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 16 * MB, 32 * MB, nullptr));
-    EXPECT_TRUE(space.get() != nullptr);
-  }
-  {
-    // Init < max < growth
-    std::unique_ptr<Space> space(create_space("test", 8 * MB, 32 * MB, 16 * MB, nullptr));
-    EXPECT_TRUE(space.get() == nullptr);
-  }
-}
-
-// TODO: This test is not very good, we should improve it.
-// The test should do more allocations before the creation of the ZygoteSpace, and then do
-// allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
-// the GC works with the ZygoteSpace.
-void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) {
-  size_t dummy;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  EXPECT_EQ(free3, space->Free(self, ptr3.Assign(nullptr)));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-
-  // Make sure that the zygote space isn't directly at the start of the space.
-  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr);
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  space::Space* old_space = space;
-  heap->RemoveSpace(old_space);
-  heap->RevokeAllThreadLocalBuffers();
-  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
-                                                              heap->IsLowMemoryMode(),
-                                                              &space);
-  delete old_space;
-  // Add the zygote space.
-  AddSpace(zygote_space, false);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space, false);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                    &ptr1_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                              &ptr3_bytes_tl_bulk_allocated));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(2U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-
-  // Final clean up.
-  free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) {
-  size_t dummy = 0;
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-
-  // Succeeds, fits without adjusting the footprint limit.
-  size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated;
-  StackHandleScope<3> hs(soa.Self());
-  MutableHandle<mirror::Object> ptr1(
-      hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size,
-                         &ptr1_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr1.Get() != nullptr);
-  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
-  EXPECT_LE(1U * MB, ptr1_usable_size);
-  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
-  EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr2 == nullptr);
-
-  // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated;
-  MutableHandle<mirror::Object> ptr3(
-      hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size,
-                                   &ptr3_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr3.Get() != nullptr);
-  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
-  EXPECT_LE(8U * MB, ptr3_usable_size);
-  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
-  EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated);
-
-  // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr4 == nullptr);
-
-  // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy);
-  EXPECT_TRUE(ptr5 == nullptr);
-
-  // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.Get(), nullptr);
-  EXPECT_EQ(free3, ptr3_bytes_allocated);
-  space->Free(self, ptr3.Assign(nullptr));
-  EXPECT_LE(8U * MB, free3);
-
-  // Succeeds, now that memory has been freed.
-  size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated;
-  Handle<mirror::Object> ptr6(
-      hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size,
-                                   &ptr6_bytes_tl_bulk_allocated)));
-  EXPECT_TRUE(ptr6.Get() != nullptr);
-  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
-  EXPECT_LE(9U * MB, ptr6_usable_size);
-  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
-  EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated);
-
-  // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.Get(), nullptr);
-  space->Free(self, ptr1.Assign(nullptr));
-  EXPECT_LE(1U * MB, free1);
-}
-
-void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) {
-  MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
-  ASSERT_TRUE(space != nullptr);
-
-  // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
-  Thread* self = Thread::Current();
-  ScopedObjectAccess soa(self);
-
-  // Succeeds, fits without adjusting the max allowed footprint.
-  mirror::Object* lots_of_objects[1024];
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
-    lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size,
-                               &usable_size, &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-
-  // Succeeds, fits by adjusting the max allowed footprint.
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size, usable_size, bytes_tl_bulk_allocated;
-    lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size,
-                                         &bytes_tl_bulk_allocated);
-    EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    size_t computed_usable_size;
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
-    EXPECT_EQ(usable_size, computed_usable_size);
-    EXPECT_TRUE(bytes_tl_bulk_allocated == 0 ||
-                bytes_tl_bulk_allocated >= allocation_size);
-  }
-
-  // Release memory.
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-}
-
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t object_size,
-                                                    int round, size_t growth_limit) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space,
+                                                           intptr_t object_size,
+                                                           int round,
+                                                           size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
       ((object_size < 0 && -object_size >= static_cast<intptr_t>(growth_limit)))) {
     // No allocation can succeed
@@ -576,7 +312,9 @@
   EXPECT_LE(space->Size(), growth_limit);
 }
 
-void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+template <class Super>
+void SpaceTest<Super>::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size,
+                                                             CreateSpaceFn create_space) {
   if (object_size < SizeOfZeroLengthByteArray()) {
     // Too small for the object layout/model.
     return;
@@ -614,25 +352,8 @@
     SizeFootPrintGrowthLimitAndTrimDriver(-size, spaceFn); \
   }
 
-#define TEST_SPACE_CREATE_FN_BASE(spaceName, spaceFn) \
-  class spaceName##BaseTest : public SpaceTest { \
-  }; \
-  \
-  TEST_F(spaceName##BaseTest, Init) { \
-    InitTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, ZygoteSpace) { \
-    ZygoteSpaceTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFree) { \
-    AllocAndFreeTestBody(spaceFn); \
-  } \
-  TEST_F(spaceName##BaseTest, AllocAndFreeList) { \
-    AllocAndFreeListTestBody(spaceFn); \
-  }
-
 #define TEST_SPACE_CREATE_FN_STATIC(spaceName, spaceFn) \
-  class spaceName##StaticTest : public SpaceTest { \
+  class spaceName##StaticTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(12B, spaceName, spaceFn, 12) \
@@ -648,7 +369,7 @@
   TEST_SizeFootPrintGrowthLimitAndTrimStatic(8MB, spaceName, spaceFn, 8 * MB)
 
 #define TEST_SPACE_CREATE_FN_RANDOM(spaceName, spaceFn) \
-  class spaceName##RandomTest : public SpaceTest { \
+  class spaceName##RandomTest : public SpaceTest<CommonRuntimeTest> { \
   }; \
   \
   TEST_SizeFootPrintGrowthLimitAndTrimRandom(16B, spaceName, spaceFn, 16) \
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index dfc1f5f..bb35ec7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -419,18 +419,13 @@
   Hprof(const char* output_filename, int fd, bool direct_to_ddms)
       : filename_(output_filename),
         fd_(fd),
-        direct_to_ddms_(direct_to_ddms),
-        start_ns_(NanoTime()),
-        output_(nullptr),
-        current_heap_(HPROF_HEAP_DEFAULT),
-        objects_in_segment_(0),
-        next_string_id_(0x400000),
-        next_class_serial_number_(1) {
+        direct_to_ddms_(direct_to_ddms) {
     LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting...";
   }
 
   void Dump()
-    REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
+    REQUIRES(Locks::mutator_lock_)
+    REQUIRES(!Locks::heap_bitmap_lock_, !Locks::alloc_tracker_lock_) {
     {
       MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
       if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) {
@@ -462,10 +457,11 @@
     }
 
     if (okay) {
-      uint64_t duration = NanoTime() - start_ns_;
-      LOG(INFO) << "hprof: heap dump completed ("
-          << PrettySize(RoundUp(overall_size, 1024))
-          << ") in " << PrettyDuration(duration);
+      const uint64_t duration = NanoTime() - start_ns_;
+      LOG(INFO) << "hprof: heap dump completed (" << PrettySize(RoundUp(overall_size, KB))
+                << ") in " << PrettyDuration(duration)
+                << " objects " << total_objects_
+                << " objects with stack traces " << total_objects_with_stack_trace_;
     }
   }
 
@@ -855,7 +851,7 @@
     }
     CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1);
     CHECK_EQ(frames_.size(), next_frame_id);
-    VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces";
+    total_objects_with_stack_trace_ = count;
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
@@ -865,16 +861,19 @@
   int fd_;
   bool direct_to_ddms_;
 
-  uint64_t start_ns_;
+  uint64_t start_ns_ = NanoTime();
 
-  EndianOutput* output_;
+  EndianOutput* output_ = nullptr;
 
-  HprofHeapId current_heap_;  // Which heap we're currently dumping.
-  size_t objects_in_segment_;
+  HprofHeapId current_heap_ = HPROF_HEAP_DEFAULT;  // Which heap we're currently dumping.
+  size_t objects_in_segment_ = 0;
 
-  HprofStringId next_string_id_;
+  size_t total_objects_ = 0u;
+  size_t total_objects_with_stack_trace_ = 0u;
+
+  HprofStringId next_string_id_ = 0x400000;
   SafeMap<std::string, HprofStringId> strings_;
-  HprofClassSerialNumber next_class_serial_number_;
+  HprofClassSerialNumber next_class_serial_number_ = 1;
   SafeMap<mirror::Class*, HprofClassSerialNumber> classes_;
 
   std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber,
@@ -1064,6 +1063,8 @@
     return;
   }
 
+  ++total_objects_;
+
   GcRootVisitor visitor(this);
   obj->VisitReferences(visitor, VoidFunctor());
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9df94f7..c57b1bb 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -933,7 +933,7 @@
                                                    ArtMethod* caller,
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
-  // We can not have thread suspension since that would cause the this_object parameter to
+  // We cannot have thread suspension since that would cause the this_object parameter to
   // potentially become a dangling pointer. An alternative could be to put it in a handle instead.
   ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 8f715a3..2b2176e 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -61,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
-  // can not be running while we are doing image writing. Maybe be called while while holding a
+  // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
   mirror::String* InternStrongImageString(mirror::String* s)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 18fb0d8..ecd4de9 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -592,6 +592,10 @@
   //
   // (at this point the ArtMethod has already been replaced,
   // so we just need to fix-up the arguments)
+  //
+  // Note that FindMethodFromCode in entrypoint_utils-inl.h was also special-cased
+  // to handle the compiler optimization of replacing `this` with null without
+  // throwing NullPointerException.
   uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
   if (UNLIKELY(string_init)) {
     DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 4fb6df1..1bc58ac 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -276,7 +276,12 @@
    */
 #if defined(__linux__)
   hostent he;
-  char auxBuf[128];
+  // The size of the work buffer used in the gethostbyname_r call
+  // below. It used to be 128, but this was not enough on some
+  // configurations (maybe because of IPv6?), causing failures in JDWP
+  // host testing; thus it was increased to 256.
+  static constexpr size_t kAuxBufSize = 256;
+  char auxBuf[kAuxBufSize];
   int error;
   int cc = gethostbyname_r(options->host.c_str(), &he, auxBuf, sizeof(auxBuf), &pEntry, &error);
   if (cc != 0) {
@@ -298,7 +303,8 @@
 
   addr.addrInet.sin_port = htons(options->port);
 
-  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+  LOG(INFO) << "Connecting out to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+            << ntohs(addr.addrInet.sin_port);
 
   /*
    * Create a socket.
@@ -313,13 +319,15 @@
    * Try to connect.
    */
   if (connect(clientSock, &addr.addrPlain, sizeof(addr)) != 0) {
-    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port);
+    PLOG(ERROR) << "Unable to connect to " << inet_ntoa(addr.addrInet.sin_addr) << ":"
+                << ntohs(addr.addrInet.sin_port);
     close(clientSock);
     clientSock = -1;
     return false;
   }
 
-  LOG(INFO) << "Connection established to " << options->host << " (" << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
+  LOG(INFO) << "Connection established to " << options->host << " ("
+            << inet_ntoa(addr.addrInet.sin_addr) << ":" << ntohs(addr.addrInet.sin_port) << ")";
   SetAwaitingHandshake(true);
   input_count_ = 0;
 
@@ -438,7 +446,8 @@
         }
       }
       if (clientSock >= 0 && FD_ISSET(clientSock, &readfds)) {
-        readCount = read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
+        readCount =
+            read(clientSock, input_buffer_ + input_count_, sizeof(input_buffer_) - input_count_);
         if (readCount < 0) {
           /* read failed */
           if (errno != EINTR) {
@@ -479,7 +488,8 @@
     errno = 0;
     int cc = TEMP_FAILURE_RETRY(write(clientSock, input_buffer_, kMagicHandshakeLen));
     if (cc != kMagicHandshakeLen) {
-      PLOG(ERROR) << "Failed writing handshake bytes (" << cc << " of " << kMagicHandshakeLen << ")";
+      PLOG(ERROR) << "Failed writing handshake bytes ("
+                  << cc << " of " << kMagicHandshakeLen << ")";
       goto fail;
     }
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index a132701..b4b872f 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -24,8 +24,11 @@
 
 #include "art_method-inl.h"
 #include "base/mutex.h"
+#include "base/scoped_flock.h"
 #include "base/stl_util.h"
+#include "base/unix_file/fd_file.h"
 #include "jit/profiling_info.h"
+#include "os.h"
 #include "safe_map.h"
 
 namespace art {
@@ -37,8 +40,17 @@
     return true;
   }
 
+  ScopedFlock flock;
+  std::string error;
+  if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) {
+    LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error;
+    return false;
+  }
+
+  int fd = flock.GetFile()->Fd();
+
   ProfileCompilationInfo info;
-  if (!info.Load(filename)) {
+  if (!info.Load(fd)) {
     LOG(WARNING) << "Could not load previous profile data from file " << filename;
     return false;
   }
@@ -54,9 +66,14 @@
     }
   }
 
+  if (!flock.GetFile()->ClearContent()) {
+    PLOG(WARNING) << "Could not clear profile file: " << filename;
+    return false;
+  }
+
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  bool result = info.Save(filename);
+  bool result = info.Save(fd);
   if (result) {
     VLOG(profiler) << "Successfully saved profile info to " << filename
         << " Size: " << GetFileSizeBytes(filename);
@@ -66,64 +83,20 @@
   return result;
 }
 
-enum OpenMode {
-  READ,
-  READ_WRITE
-};
-
-static int OpenFile(const std::string& filename, OpenMode open_mode) {
-  int fd = -1;
-  switch (open_mode) {
-    case READ:
-      fd = open(filename.c_str(), O_RDONLY);
-      break;
-    case READ_WRITE:
-      // TODO(calin) allow the shared uid of the app to access the file.
-      fd = open(filename.c_str(), O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
-      break;
-  }
-
-  if (fd < 0) {
-    PLOG(WARNING) << "Failed to open profile file " << filename;
-    return -1;
-  }
-
-  // Lock the file for exclusive access but don't wait if we can't lock it.
-  int err = flock(fd, LOCK_EX | LOCK_NB);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to lock profile file " << filename;
-    return -1;
-  }
-  return fd;
-}
-
-static bool CloseDescriptorForFile(int fd, const std::string& filename) {
-  // Now unlock the file, allowing another process in.
-  int err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to unlock profile file " << filename;
-    return false;
-  }
-
-  // Done, close the file.
-  err = ::close(fd);
-  if (err < 0) {
-    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
-    return false;
-  }
-
-  return true;
-}
-
-static void WriteToFile(int fd, const std::ostringstream& os) {
+static bool WriteToFile(int fd, const std::ostringstream& os) {
   std::string data(os.str());
   const char *p = data.c_str();
   size_t length = data.length();
   do {
-    int n = ::write(fd, p, length);
+    int n = TEMP_FAILURE_RETRY(write(fd, p, length));
+    if (n < 0) {
+      PLOG(WARNING) << "Failed to write to descriptor: " << fd;
+      return false;
+    }
     p += n;
     length -= n;
   } while (length > 0);
+  return true;
 }
 
 static constexpr const char kFieldSeparator = ',';
@@ -137,13 +110,8 @@
  *    /system/priv-app/app/app.apk,131232145,11,23,454,54
  *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
  **/
-bool ProfileCompilationInfo::Save(const std::string& filename) {
-  int fd = OpenFile(filename, READ_WRITE);
-  if (fd == -1) {
-    return false;
-  }
-
-  // TODO(calin): Merge with a previous existing profile.
+bool ProfileCompilationInfo::Save(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
   // TODO(calin): Profile this and see how much memory it takes. If too much,
   // write to file directly.
   std::ostringstream os;
@@ -158,9 +126,7 @@
     os << kLineSeparator;
   }
 
-  WriteToFile(fd, os);
-
-  return CloseDescriptorForFile(fd, filename);
+  return WriteToFile(fd, os);
 }
 
 // TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
@@ -222,7 +188,9 @@
       LOG(WARNING) << "Cannot parse method_idx " << parts[i];
       return false;
     }
-    AddData(dex_location, checksum, method_idx);
+    if (!AddData(dex_location, checksum, method_idx)) {
+      return false;
+    }
   }
   return true;
 }
@@ -249,23 +217,18 @@
   return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
 }
 
-bool ProfileCompilationInfo::Load(const std::string& filename) {
-  int fd = OpenFile(filename, READ);
-  if (fd == -1) {
-    return false;
-  }
+bool ProfileCompilationInfo::Load(uint32_t fd) {
+  DCHECK_GE(fd, 0u);
 
   std::string current_line;
   const int kBufferSize = 1024;
   char buffer[kBufferSize];
-  bool success = true;
 
-  while (success) {
-    int n = read(fd, buffer, kBufferSize);
+  while (true) {
+    int n = TEMP_FAILURE_RETRY(read(fd, buffer, kBufferSize));
     if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file " << filename;
-      success = false;
-      break;
+      PLOG(WARNING) << "Error when reading profile file";
+      return false;
     } else if (n == 0) {
       break;
     }
@@ -278,17 +241,13 @@
         break;
       }
       if (!ProcessLine(current_line)) {
-        success = false;
-        break;
+        return false;
       }
       // Reset the current line (we just processed it).
       current_line.clear();
     }
   }
-  if (!success) {
-    info_.clear();
-  }
-  return CloseDescriptorForFile(fd, filename) && success;
+  return true;
 }
 
 bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
@@ -369,4 +328,8 @@
   return os.str();
 }
 
+bool ProfileCompilationInfo::Equals(ProfileCompilationInfo& other) {
+  return info_.Equals(other.info_);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 26e1ac3..ffd1433 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -39,15 +39,18 @@
  */
 class ProfileCompilationInfo {
  public:
+  // Saves profile information about the given methods in the given file.
+  // Note that the saving proceeds only if the file can be locked for exclusive access.
+  // If not (the locking is not blocking), the function does not save and returns false.
   static bool SaveProfilingInfo(const std::string& filename,
                                 const std::vector<ArtMethod*>& methods);
 
-  // Loads profile information from the given file.
-  bool Load(const std::string& profile_filename);
+  // Loads profile information from the given file descriptor.
+  bool Load(uint32_t fd);
   // Loads the data from another ProfileCompilationInfo object.
   bool Load(const ProfileCompilationInfo& info);
-  // Saves the profile data to the given file.
-  bool Save(const std::string& profile_filename);
+  // Saves the profile data to the given file descriptor.
+  bool Save(uint32_t fd);
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
 
@@ -61,6 +64,9 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
+  // For testing purposes.
+  bool Equals(ProfileCompilationInfo& other);
+
  private:
   bool AddData(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
   bool ProcessLine(const std::string& line);
@@ -69,10 +75,18 @@
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
     std::set<uint16_t> method_set;
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_set == other.method_set;
+    }
   };
 
   using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
 
+  friend class ProfileCompilationInfoTest;
+  friend class CompilerDriverProfileTest;
+  friend class ProfileAssistantTest;
+
   DexFileToProfileInfoMap info_;
 };
 
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
new file mode 100644
index 0000000..482ea06
--- /dev/null
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ protected:
+  std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
+                                            const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    std::vector<ArtMethod*> methods;
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      methods.push_back(&m);
+    }
+    return methods;
+  }
+
+  bool AddData(const std::string& dex_location,
+               uint32_t checksum,
+               uint16_t method_index,
+               ProfileCompilationInfo* info) {
+    return info->AddData(dex_location, checksum, method_index);
+  }
+
+  uint32_t GetFd(const ScratchFile& file) {
+    return static_cast<uint32_t>(file.GetFd());
+  }
+};
+
+TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
+  ScratchFile profile;
+
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  // Save virtual methods from Main.
+  std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), main_methods));
+
+  // Check that what we saved is in the profile.
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(info1.Load(GetFd(profile)));
+  ASSERT_EQ(info1.GetNumberOfMethods(), main_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info1.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+
+  // Save virtual methods from Second.
+  std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
+  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(), second_methods));
+
+  // Check that what we saved is in the profile (methods form Main and Second).
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(info2.Load(GetFd(profile)));
+  ASSERT_EQ(info2.GetNumberOfMethods(), main_methods.size() + second_methods.size());
+  {
+    ScopedObjectAccess soa(self);
+    for (ArtMethod* m : main_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+    for (ArtMethod* m : second_methods) {
+      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+    }
+  }
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveFd) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  // Save more methods.
+  for (uint16_t i = 0; i < 100; i++) {
+    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddData("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back everything we saved.
+  ProfileCompilationInfo loaded_info2;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info2.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info2.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, AddDataFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  // Trying to add info for an existing file but with a different checksum.
+  ASSERT_FALSE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadFail) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo info1;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  // Use the same file, change the checksum.
+  ProfileCompilationInfo info2;
+  ASSERT_TRUE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+
+  ASSERT_FALSE(info1.Load(info2));
+}
+
+}  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index ec289ea..f3f5f95 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -22,16 +22,16 @@
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 500ms for now.
+// An arbitrary value to throttle save requests. Set to 2s for now.
 static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 500 * kMilisecondsToNano;
+static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 2000 * kMilisecondsToNano;
 
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 10 * 1000;  // 10 seconds
-static constexpr const uint64_t kMaxBackoffMs = 4 * 60 * 1000;  // 4 minutes
-static constexpr const uint64_t kSavePeriodMs = 4 * 1000;  // 4 seconds
+static constexpr const uint64_t kRandomDelayMaxMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kMaxBackoffMs = 5 * 60 * 1000;  // 5 minutes
+static constexpr const uint64_t kSavePeriodMs = 10 * 1000;  // 10 seconds
 static constexpr const double kBackoffCoef = 1.5;
 
 static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
@@ -86,12 +86,14 @@
 }
 
 bool ProfileSaver::ProcessProfilingInfo() {
-  VLOG(profiler) << "Initiating save profiling information to: " << output_filename_;
+  VLOG(profiler) << "Save profiling information to: " << output_filename_;
 
   uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
   if (last_update_time_ns - code_cache_last_update_time_ns_
-      > kMinimumTimeBetweenCodeCacheUpdatesNs) {
-    VLOG(profiler) << "Not enough time has passed since the last code cache update.";
+      < kMinimumTimeBetweenCodeCacheUpdatesNs) {
+    VLOG(profiler) << "Not enough time has passed since the last code cache update."
+        << "Last update: " << last_update_time_ns
+        << " Last save: " << code_cache_last_update_time_ns_;
     return false;
   }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3571edb..18c52e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -583,6 +583,10 @@
   }
 }
 
+bool MemMap::Sync() {
+  return msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+}
+
 bool MemMap::Protect(int prot) {
   if (base_begin_ == nullptr && base_size_ == 0) {
     prot_ = prot;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index ed21365..ebd550a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -126,6 +126,8 @@
     return name_;
   }
 
+  bool Sync();
+
   bool Protect(int prot);
 
   void MadviseDontNeedAndZero();
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index edcbcf2..81c855e 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -251,6 +251,10 @@
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
+  // Some MIPS32 hardware (namely the Creator Ci20 development board)
+  // cannot allocate in the 2GB-4GB region.
+  TEST_DISABLED_FOR_MIPS();
+
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
@@ -271,8 +275,8 @@
         break;
       }
     }
-    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(error_msg.empty());
     ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
diff --git a/runtime/oat.h b/runtime/oat.h
index 13fd6a4..989e3f9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -31,7 +31,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '4', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '5', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 83e594b..82b3933 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -46,6 +46,7 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
+#include "type_lookup_table.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "vmap_table.h"
@@ -266,16 +267,15 @@
                                 i);
       return false;
     }
-
-    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
-    oat += dex_file_location_size;
-    if (UNLIKELY(oat > End())) {
+    if (UNLIKELY(static_cast<size_t>(End() - oat) < dex_file_location_size)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file "
                                     "location",
                                 GetLocation().c_str(),
                                 i);
       return false;
     }
+    const char* dex_file_location_data = reinterpret_cast<const char*>(oat);
+    oat += dex_file_location_size;
 
     std::string dex_file_location = ResolveRelativeEncodedDexLocation(
         abs_dex_location,
@@ -318,6 +318,17 @@
                                 Size());
       return false;
     }
+    if (UNLIKELY(Size() - dex_file_offset < sizeof(DexFile::Header))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u of %zu but the size of dex file header is %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                Size(),
+                                sizeof(DexFile::Header));
+      return false;
+    }
 
     const uint8_t* dex_file_pointer = Begin() + dex_file_offset;
     if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) {
@@ -339,34 +350,75 @@
       return false;
     }
     const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer);
+    if (Size() - dex_file_offset < header->file_size_) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file "
+                                    "offset %u and size %u truncated at %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                dex_file_offset,
+                                header->file_size_,
+                                Size());
+      return false;
+    }
 
-    if (UNLIKELY(oat > End())) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table offset", GetLocation().c_str(), i,
+    uint32_t class_offsets_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated "
+                                    "after class offsets offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
-    uint32_t lookup_table_offset = *reinterpret_cast<const uint32_t*>(oat);
-    oat += sizeof(lookup_table_offset);
-    if (Begin() + lookup_table_offset > End()) {
-      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated "
-                                "lookup table", GetLocation().c_str(), i,
+    if (UNLIKELY(class_offsets_offset > Size()) ||
+        UNLIKELY((Size() - class_offsets_offset) / sizeof(uint32_t) < header->class_defs_size_)) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
+                                    "class offsets, offset %u of %zu, class defs %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset,
+                                Size(),
+                                header->class_defs_size_);
+      return false;
+    }
+    if (UNLIKELY(!IsAligned<alignof(uint32_t)>(class_offsets_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with unaligned "
+                                    "class offsets, offset %u",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                class_offsets_offset);
+      return false;
+    }
+    const uint32_t* class_offsets_pointer =
+        reinterpret_cast<const uint32_t*>(Begin() + class_offsets_offset);
+
+    uint32_t lookup_table_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &lookup_table_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after lookup table offset",
+                                GetLocation().c_str(),
+                                i,
                                 dex_file_location.c_str());
       return false;
     }
     const uint8_t* lookup_table_data = lookup_table_offset != 0u
         ? Begin() + lookup_table_offset
         : nullptr;
-
-    const uint32_t* methods_offsets_pointer = reinterpret_cast<const uint32_t*>(oat);
-
-    oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_);
-    if (UNLIKELY(oat > End())) {
+    if (lookup_table_offset != 0u &&
+        (UNLIKELY(lookup_table_offset > Size()) ||
+            UNLIKELY(Size() - lookup_table_offset <
+                     TypeLookupTable::RawDataLength(header->class_defs_size_)))) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated "
-                                    "method offsets",
+                                    "type lookup table, offset %u of %zu, class defs %u",
                                 GetLocation().c_str(),
                                 i,
-                                dex_file_location.c_str());
+                                dex_file_location.c_str(),
+                                lookup_table_offset,
+                                Size(),
+                                header->class_defs_size_);
       return false;
     }
 
@@ -398,7 +450,7 @@
                                               dex_file_checksum,
                                               dex_file_pointer,
                                               lookup_table_data,
-                                              methods_offsets_pointer,
+                                              class_offsets_pointer,
                                               current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
 
@@ -627,7 +679,7 @@
 
   if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
   }
 #endif
 }
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 2b92303..341be9a 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -309,8 +309,8 @@
     const std::string option(options[i].first);
       // TODO: support -Djava.class.path
     if (option == "bootclasspath") {
-      auto boot_class_path
-          = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
+      auto boot_class_path = static_cast<std::vector<std::unique_ptr<const DexFile>>*>(
+          const_cast<void*>(options[i].second));
 
       if (runtime_options != nullptr) {
         runtime_options->Set(M::BootClassPathDexList, boot_class_path);
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index a7f7bcd..eb226d4 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -34,7 +34,7 @@
     struct stat exec_st;
     std::string exec_path = tools_dir + tool;
     if (stat(exec_path.c_str(), &exec_st) != 0) {
-      ADD_FAILURE() << "Can not find " << tool << " in " << tools_dir;
+      ADD_FAILURE() << "Cannot find " << tool << " in " << tools_dir;
     }
   }
 }
@@ -42,7 +42,7 @@
 TEST_F(PrebuiltToolsTest, CheckHostTools) {
   std::string tools_dir = GetAndroidHostToolsDir();
   if (tools_dir.empty()) {
-    ADD_FAILURE() << "Can not find Android tools directory for host";
+    ADD_FAILURE() << "Cannot find Android tools directory for host";
   } else {
     CheckToolsExist(tools_dir);
   }
@@ -54,7 +54,7 @@
   for (InstructionSet isa : isas) {
     std::string tools_dir = GetAndroidTargetToolsDir(isa);
     if (tools_dir.empty()) {
-      ADD_FAILURE() << "Can not find Android tools directory for " << isa;
+      ADD_FAILURE() << "Cannot find Android tools directory for " << isa;
     } else {
       CheckToolsExist(tools_dir);
     }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1101acd..e30c26d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1131,10 +1131,14 @@
     }
 
     std::vector<std::unique_ptr<const DexFile>> boot_class_path;
-    OpenDexFiles(dex_filenames,
-                 dex_locations,
-                 runtime_options.GetOrDefault(Opt::Image),
-                 &boot_class_path);
+    if (runtime_options.Exists(Opt::BootClassPathDexList)) {
+      boot_class_path.swap(*runtime_options.GetOrDefault(Opt::BootClassPathDexList));
+    } else {
+      OpenDexFiles(dex_filenames,
+                   dex_locations,
+                   runtime_options.GetOrDefault(Opt::Image),
+                   &boot_class_path);
+    }
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
     std::string error_msg;
     if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
diff --git a/runtime/runtime_options.cc b/runtime/runtime_options.cc
index c54461e..e75481c 100644
--- a/runtime/runtime_options.cc
+++ b/runtime/runtime_options.cc
@@ -13,8 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "runtime_options.h"
 
+#include <memory>
+
 #include "gc/heap.h"
 #include "monitor.h"
 #include "runtime.h"
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 5624285..c5b009d 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -117,8 +117,8 @@
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
-RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
-                                          BootClassPathDexList)  // TODO: make unique_ptr
+RUNTIME_OPTIONS_KEY (std::vector<std::unique_ptr<const DexFile>>*, \
+                                          BootClassPathDexList)
 RUNTIME_OPTIONS_KEY (InstructionSet,      ImageInstructionSet,            kRuntimeISA)
 RUNTIME_OPTIONS_KEY (CompilerCallbacks*,  CompilerCallbacksPtr)  // TDOO: make unique_ptr
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
diff --git a/runtime/type_lookup_table.cc b/runtime/type_lookup_table.cc
index 0d40bb7..fc9faec 100644
--- a/runtime/type_lookup_table.cc
+++ b/runtime/type_lookup_table.cc
@@ -16,6 +16,7 @@
 
 #include "type_lookup_table.h"
 
+#include "base/bit_utils.h"
 #include "dex_file-inl.h"
 #include "utf-inl.h"
 #include "utils.h"
@@ -42,25 +43,39 @@
 }
 
 uint32_t TypeLookupTable::RawDataLength(const DexFile& dex_file) {
-  return RoundUpToPowerOfTwo(dex_file.NumClassDefs()) * sizeof(Entry);
+  return RawDataLength(dex_file.NumClassDefs());
 }
 
-TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file) {
+uint32_t TypeLookupTable::RawDataLength(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) * sizeof(Entry) : 0u;
+}
+
+uint32_t TypeLookupTable::CalculateMask(uint32_t num_class_defs) {
+  return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) - 1u : 0u;
+}
+
+bool TypeLookupTable::SupportedSize(uint32_t num_class_defs) {
+  return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max();
+}
+
+TypeLookupTable* TypeLookupTable::Create(const DexFile& dex_file, uint8_t* storage) {
   const uint32_t num_class_defs = dex_file.NumClassDefs();
-  return (num_class_defs == 0 || num_class_defs > std::numeric_limits<uint16_t>::max())
-      ? nullptr
-      : new TypeLookupTable(dex_file);
+  return SupportedSize(num_class_defs)
+      ? new TypeLookupTable(dex_file, storage)
+      : nullptr;
 }
 
 TypeLookupTable* TypeLookupTable::Open(const uint8_t* raw_data, const DexFile& dex_file) {
   return new TypeLookupTable(raw_data, dex_file);
 }
 
-TypeLookupTable::TypeLookupTable(const DexFile& dex_file)
+TypeLookupTable::TypeLookupTable(const DexFile& dex_file, uint8_t* storage)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
-      entries_(new Entry[mask_ + 1]),
-      owns_entries_(true) {
+      mask_(CalculateMask(dex_file.NumClassDefs())),
+      entries_(storage != nullptr ? reinterpret_cast<Entry*>(storage) : new Entry[mask_ + 1]),
+      owns_entries_(storage == nullptr) {
+  static_assert(alignof(Entry) == 4u, "Expecting Entry to be 4-byte aligned.");
+  DCHECK_ALIGNED(storage, alignof(Entry));
   std::vector<uint16_t> conflict_class_defs;
   // The first stage. Put elements on their initial positions. If an initial position is already
   // occupied then delay the insertion of the element to the second stage to reduce probing
@@ -93,7 +108,7 @@
 
 TypeLookupTable::TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file)
     : dex_file_(dex_file),
-      mask_(RoundUpToPowerOfTwo(dex_file.NumClassDefs()) - 1),
+      mask_(CalculateMask(dex_file.NumClassDefs())),
       entries_(reinterpret_cast<Entry*>(const_cast<uint8_t*>(raw_data))),
       owns_entries_(false) {}
 
diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h
index 3c2295c..d74d01d 100644
--- a/runtime/type_lookup_table.h
+++ b/runtime/type_lookup_table.h
@@ -60,7 +60,7 @@
   }
 
   // Method creates lookup table for dex file
-  static TypeLookupTable* Create(const DexFile& dex_file);
+  static TypeLookupTable* Create(const DexFile& dex_file, uint8_t* storage = nullptr);
 
   // Method opens lookup table from binary data. Lookup table does not owns binary data.
   static TypeLookupTable* Open(const uint8_t* raw_data, const DexFile& dex_file);
@@ -76,6 +76,9 @@
   // Method returns length of binary data for the specified dex file.
   static uint32_t RawDataLength(const DexFile& dex_file);
 
+  // Method returns length of binary data for the specified number of class definitions.
+  static uint32_t RawDataLength(uint32_t num_class_defs);
+
  private:
    /**
     * To find element we need to compare strings.
@@ -109,8 +112,11 @@
     }
   };
 
+  static uint32_t CalculateMask(uint32_t num_class_defs);
+  static bool SupportedSize(uint32_t num_class_defs);
+
   // Construct from a dex file.
-  explicit TypeLookupTable(const DexFile& dex_file);
+  explicit TypeLookupTable(const DexFile& dex_file, uint8_t* storage);
 
   // Construct from a dex file with existing data.
   TypeLookupTable(const uint8_t* raw_data, const DexFile& dex_file);
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 5239e40..c67879b 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -353,7 +353,7 @@
     if (codePoint <= 0xffff) {
       if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
         // According to the Unicode standard, no character will ever
-        // be assigned to these code points, and they can not be encoded
+        // be assigned to these code points, and they cannot be encoded
         // into either utf-16 or utf-8.
         continue;
       }
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 9daaf8e..d96fb42 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -133,4 +133,8 @@
   return new ZipEntry(handle_, zip_entry.release());
 }
 
+ZipArchive::~ZipArchive() {
+  CloseArchive(handle_);
+}
+
 }  // namespace art
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 717eb8c..42bf55c 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -63,9 +63,7 @@
 
   ZipEntry* Find(const char* name, std::string* error_msg) const;
 
-  ~ZipArchive() {
-    CloseArchive(handle_);
-  }
+  ~ZipArchive();
 
  private:
   explicit ZipArchive(ZipArchiveHandle handle) : handle_(handle) {}
diff --git a/test/127-checker-secondarydex/src/Test.java b/test/127-checker-secondarydex/src/Test.java
index ae8bac9..266ed19 100644
--- a/test/127-checker-secondarydex/src/Test.java
+++ b/test/127-checker-secondarydex/src/Test.java
@@ -23,8 +23,12 @@
         System.out.println("Test");
     }
 
-    /// CHECK-START: java.lang.String Test.toString() ssa_builder (after)
-    /// CHECK:         LoadClass needs_access_check:false klass:java.lang.String
+    /// CHECK-START: java.lang.Integer Test.toInteger() ssa_builder (after)
+    /// CHECK:         LoadClass needs_access_check:false klass:java.lang.Integer
+
+    public Integer toInteger() {
+        return new Integer(42);
+    }
 
     public String toString() {
         return new String("Test");
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 9bfe429..77301d2 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -76,7 +76,7 @@
     }
   }
 
-  printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str());
+  printf("Cannot find %s in backtrace:\n", seq[cur_search_index].c_str());
   for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) {
     if (BacktraceMap::IsValid(it->map)) {
       printf("  %s\n", it->func_name.c_str());
@@ -112,7 +112,7 @@
 
   std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid()));
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind in process.\n");
+    printf("Cannot unwind in process.\n");
     return JNI_FALSE;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind in process.\n");
@@ -205,7 +205,7 @@
   std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD));
   bool result = true;
   if (!bt->Unwind(0, nullptr)) {
-    printf("Can not unwind other process.\n");
+    printf("Cannot unwind other process.\n");
     result = false;
   } else if (bt->NumFrames() == 0) {
     printf("No frames for unwind of other process.\n");
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 0640b36..bcb697a 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -79,7 +79,7 @@
 
     private static void testUnloadClass(Constructor constructor) throws Exception {
         WeakReference<Class> klass = setUpUnloadClass(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         WeakReference<Class> klass2 = setUpUnloadClass(constructor);
         Runtime.getRuntime().gc();
@@ -91,7 +91,7 @@
     private static void testUnloadLoader(Constructor constructor)
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
-      // No strong refernces to class loader, should get unloaded.
+      // No strong references to class loader, should get unloaded.
       Runtime.getRuntime().gc();
       // If the weak reference is cleared, then it was unloaded.
       System.out.println(loader.get());
@@ -109,7 +109,7 @@
 
     private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
-        // No strong refernces to class loader, should get unloaded.
+        // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(loader.get());
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 2d0688d..549ed99 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -47,7 +47,7 @@
   /// CHECK:       <<Acc:i\d+>>         ParameterValue
   /// CHECK:       <<Left:i\d+>>        ParameterValue
   /// CHECK:       <<Right:i\d+>>       ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
   /// CHECK:                            Return [<<MulAdd>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
@@ -57,6 +57,28 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
   /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            mla w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
   public static int $opt$noinline$mulAdd(int acc, int left, int right) {
     if (doThrow) throw new Error();
     return acc + left * right;
@@ -78,7 +100,7 @@
   /// CHECK:       <<Acc:j\d+>>         ParameterValue
   /// CHECK:       <<Left:j\d+>>        ParameterValue
   /// CHECK:       <<Right:j\d+>>       ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
   /// CHECK:                            Return [<<MulSub>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
@@ -88,6 +110,28 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
   /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
 
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
+  /// CHECK:                            mls x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
   public static long $opt$noinline$mulSub(long acc, long left, long right) {
     if (doThrow) throw new Error();
     return acc - left * right;
@@ -117,7 +161,28 @@
   /// CHECK:                            Return [<<Or>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
 
   public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
     if (doThrow) throw new Error();
@@ -151,7 +216,30 @@
   /// CHECK:                            Return [<<Res>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
 
 
   public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
@@ -176,7 +264,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Acc:i\d+>>         ParameterValue
   /// CHECK:       <<Var:i\d+>>         ParameterValue
-  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
   /// CHECK:                            Return [<<MulAdd>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
@@ -186,6 +274,27 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
   /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            mla w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
   public static int $opt$noinline$mulPlusOne(int acc, int var) {
     if (doThrow) throw new Error();
     return acc * (var + 1);
@@ -207,7 +316,7 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Acc:j\d+>>         ParameterValue
   /// CHECK:       <<Var:j\d+>>         ParameterValue
-  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
   /// CHECK:                            Return [<<MulSub>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
@@ -217,11 +326,123 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
   /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
 
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
+  /// CHECK:                            mls x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
   public static long $opt$noinline$mulMinusOne(long acc, long var) {
     if (doThrow) throw new Error();
     return acc * (1 - var);
   }
 
+  /**
+   * Test basic merging of `MUL+NEG` into `MULNEG`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulNeg>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Neg
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulNeg(int, int) disassembly (after)
+  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: int Main.$opt$noinline$mulNeg(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static int $opt$noinline$mulNeg(int left, int right) {
+    if (doThrow) throw new Error();
+    return - (left * right);
+  }
+
+  /**
+   * Test basic merging of `MUL+NEG` into `MULNEG`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulNeg:j\d+>>      MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulNeg>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Neg
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulNeg(long, long) disassembly (after)
+  /// CHECK:                            mneg x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Neg:j\d+>>         Neg [<<Mul>>,<<Mul>>]
+  /// CHECK:                            Return [<<Neg>>]
+
+  /// CHECK-START-ARM: long Main.$opt$noinline$mulNeg(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        MultiplyAccumulate
+
+  public static long $opt$noinline$mulNeg(long left, long right) {
+    if (doThrow) throw new Error();
+    return - (left * right);
+  }
 
   public static void main(String[] args) {
     assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
@@ -230,5 +451,7 @@
     assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
     assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
     assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
+    assertIntEquals(-306, $opt$noinline$mulNeg(17, 18));
+    assertLongEquals(-380, $opt$noinline$mulNeg(19, 20));
   }
 }
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 30a648d..971ad84 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -91,9 +91,7 @@
    goto :other_loop_entry
 .end method
 
-# Check that if a irreducible loop entry is dead, the loop can become
-# natural.
-# We start with:
+# Check that dce does not apply for irreducible loops.
 #
 #        entry
 #       /    \
@@ -106,18 +104,8 @@
 ## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (before)
 ## CHECK: irreducible:true
 
-# And end with:
-#
-#        entry
-#       /
-#      /
-# loop_entry
-#    /    \-
-#  exit    \-
-#           other_loop_entry
-
 ## CHECK-START: int IrreducibleLoop.dce(int) dead_code_elimination (after)
-## CHECK-NOT: irreducible:true
+## CHECK: irreducible:true
 .method public static dce(I)I
    .registers 3
    const/16 v0, 42
diff --git a/test/563-checker-fakestring/smali/TestCase.smali b/test/563-checker-fakestring/smali/TestCase.smali
index cd52f3d..54312a4 100644
--- a/test/563-checker-fakestring/smali/TestCase.smali
+++ b/test/563-checker-fakestring/smali/TestCase.smali
@@ -64,9 +64,15 @@
 
 .end method
 
-# Test deoptimization between String's allocation and initialization.
+# Test deoptimization between String's allocation and initialization. When not
+# compiling --debuggable, the NewInstance will be optimized out.
 
 ## CHECK-START: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
+## CHECK:         <<Null:l\d+>>   NullConstant
+## CHECK:                         Deoptimize env:[[<<Null>>,{{.*]]}}
+## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
+
+## CHECK-START-DEBUGGABLE: int TestCase.deoptimizeNewInstance(int[], byte[]) register (after)
 ## CHECK:         <<String:l\d+>> NewInstance
 ## CHECK:                         Deoptimize env:[[<<String>>,{{.*]]}}
 ## CHECK:                         InvokeStaticOrDirect method_name:java.lang.String.<init>
@@ -98,3 +104,79 @@
    return v2
 
 .end method
+
+# Test that a redundant NewInstance is removed if not used and not compiling
+# --debuggable.
+
+## CHECK-START: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK-NOT:     NewInstance
+## CHECK-NOT:     LoadClass
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.removeNewInstance(byte[]) register (after)
+## CHECK:         NewInstance
+
+.method public static removeNewInstance([B)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+# Test that the compiler does not assume that the first argument of String.<init>
+# is a NewInstance by inserting an irreducible loop between them (b/26676472).
+
+# We verify the type of the input instruction (Phi) in debuggable mode, because
+# it is eliminated by later stages of SsaBuilder otherwise.
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance1(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance1([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   const v1, 0x1
+   xor-int p1, p1, v1
+   :loop_entry
+   if-eqz p1, :string_init
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
+
+## CHECK-START-DEBUGGABLE: java.lang.String TestCase.thisNotNewInstance2(byte[], boolean) register (after)
+## CHECK-DAG:                   InvokeStaticOrDirect env:[[<<Phi:l\d+>>,{{.*]]}}
+## CHECK-DAG:     <<Phi>>       Phi
+
+.method public static thisNotNewInstance2([BZ)Ljava/lang/String;
+   .registers 5
+
+   new-instance v0, Ljava/lang/String;
+
+   # Irreducible loop
+   if-eqz p1, :loop_entry
+   :loop_header
+   if-eqz p1, :string_init
+   :loop_entry
+   const v1, 0x1
+   xor-int p1, p1, v1
+   goto :loop_header
+
+   :string_init
+   const-string v1, "UTF8"
+   invoke-direct {v0, p0, v1}, Ljava/lang/String;-><init>([BLjava/lang/String;)V
+   return-object v0
+
+.end method
diff --git a/test/563-checker-fakestring/src/Main.java b/test/563-checker-fakestring/src/Main.java
index 750f718..1ac8a5b 100644
--- a/test/563-checker-fakestring/src/Main.java
+++ b/test/563-checker-fakestring/src/Main.java
@@ -57,5 +57,26 @@
         }
       }
     }
+
+    {
+      Method m = c.getMethod("removeNewInstance", byte[].class);
+      String result = (String) m.invoke(null, new Object[] { testData });
+      assertEqual(testString, result);
+    }
+
+    {
+      Method m = c.getMethod("thisNotNewInstance1", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
+    {
+      Method m = c.getMethod("thisNotNewInstance2", byte[].class, boolean.class);
+      String result = (String) m.invoke(null, new Object[] { testData, true });
+      assertEqual(testString, result);
+      result = (String) m.invoke(null, new Object[] { testData, false });
+      assertEqual(testString, result);
+    }
   }
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 586c805..36dd9f4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -70,6 +70,7 @@
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
 	  JILL_JAR=$(abspath $(JILL_JAR)) \
@@ -967,6 +968,7 @@
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
 	    JILL_JAR=$(abspath $(JILL_JAR)) \
diff --git a/test/ProfileTestMultiDex/Main.java b/test/ProfileTestMultiDex/Main.java
new file mode 100644
index 0000000..41532ea
--- /dev/null
+++ b/test/ProfileTestMultiDex/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  public String getA() {
+    return "A";
+  }
+  public String getB() {
+    return "B";
+  }
+  public String getC() {
+    return "C";
+  }
+}
diff --git a/test/ProfileTestMultiDex/Second.java b/test/ProfileTestMultiDex/Second.java
new file mode 100644
index 0000000..4ac5abc
--- /dev/null
+++ b/test/ProfileTestMultiDex/Second.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Second {
+  public String getX() {
+    return "X";
+  }
+  public String getY() {
+    return "Y";
+  }
+  public String getZ() {
+    return "Z";
+  }
+}
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
new file mode 100644
index 0000000..f2e3b4e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -0,0 +1,3 @@
+main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Second
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
new file mode 100644
index 0000000..44ba78e
--- /dev/null
+++ b/test/ProfileTestMultiDex/main.list
@@ -0,0 +1 @@
+Main.class