Merge "Update `ValidateInvokeRuntime()` and HDivZeroCheck."
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 953cfc0..a06f45a 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -16,10 +16,14 @@
 
 include art/build/Android.common_build.mk
 
-ART_CPPLINT := art/tools/cpplint.py
+ART_CPPLINT := $(LOCAL_PATH)/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
 ART_CPPLINT_FLAGS := --quiet
-ART_CPPLINT_SRC := $(shell find art -name "*.h" -o -name "*$(ART_CPP_EXTENSION)" | grep -v art/compiler/llvm/generated/ | grep -v art/runtime/elf\.h)
+# This:
+#  1) Gets a list of all .h & .cc files in the art directory.
+#  2) Prepends 'art/' to each of them to make the full name.
+#  3) removes art/runtime/elf.h from the list.
+ART_CPPLINT_SRC := $(filter-out $(LOCAL_PATH)/runtime/elf.h, $(addprefix $(LOCAL_PATH)/, $(call all-subdir-named-files,*.h) $(call all-subdir-named-files,*$(ART_CPP_EXTENSION))))
 
 # "mm cpplint-art" to verify we aren't regressing
 .PHONY: cpplint-art
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
index a391e4d..e8ba914 100644
--- a/compiler/dwarf/method_debug_info.h
+++ b/compiler/dwarf/method_debug_info.h
@@ -30,8 +30,8 @@
   uint32_t access_flags_;
   const DexFile::CodeItem* code_item_;
   bool deduped_;
-  uint32_t low_pc_;
-  uint32_t high_pc_;
+  uintptr_t low_pc_;
+  uintptr_t high_pc_;
   CompiledMethod* compiled_method_;
 };
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bb07cc2..a7461a5 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -148,6 +148,12 @@
       }
     }
 
+    // Returns true if the section was written to disk.
+    // (Used to check whether we have .text when writing JIT debug info)
+    bool Exists() const {
+      return finished_;
+    }
+
     // Get the location of this section in virtual memory.
     Elf_Addr GetAddress() const {
       CHECK(started_);
@@ -247,16 +253,18 @@
     }
 
     // Buffer symbol for this section.  It will be written later.
+    // If the symbol's section is null, it will be considered absolute (SHN_ABS).
+    // (we use this in JIT to reference code which is stored outside the debug ELF file)
     void Add(Elf_Word name, const Section* section,
              Elf_Addr addr, bool is_relative, Elf_Word size,
              uint8_t binding, uint8_t type, uint8_t other = 0) {
-      CHECK(section != nullptr);
       Elf_Sym sym = Elf_Sym();
       sym.st_name = name;
       sym.st_value = addr + (is_relative ? section->GetAddress() : 0);
       sym.st_size = size;
       sym.st_other = other;
-      sym.st_shndx = section->GetSectionIndex();
+      sym.st_shndx = (section != nullptr ? section->GetSectionIndex()
+                                         : static_cast<Elf_Word>(SHN_ABS));
       sym.st_info = (binding << 4) + (type & 0xf);
       symbols_.push_back(sym);
     }
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 2bc8c89..dd50f69 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -22,16 +22,20 @@
 #include "base/casts.h"
 #include "base/stl_util.h"
 #include "compiled_method.h"
-#include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
+#include "driver/compiler_driver.h"
 #include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
 #include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
+#include "linker/vector_output_stream.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
 #include "oat_writer.h"
-#include "utils.h"
 #include "stack_map.h"
+#include "utils.h"
 
 namespace art {
 namespace dwarf {
@@ -219,6 +223,10 @@
   CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
+  if (method_infos.empty()) {
+    return;
+  }
+
   std::vector<uint32_t> binary_search_table;
   std::vector<uintptr_t> patch_locations;
   if (format == DW_EH_FRAME_FORMAT) {
@@ -234,7 +242,9 @@
   {
     cfi_section->Start();
     const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
-    const Elf_Addr text_address = builder->GetText()->GetAddress();
+    const Elf_Addr text_address = builder->GetText()->Exists()
+        ? builder->GetText()->GetAddress()
+        : 0;
     const Elf_Addr cfi_address = cfi_section->GetAddress();
     const Elf_Addr cie_address = cfi_address;
     Elf_Addr buffer_address = cfi_address;
@@ -305,8 +315,8 @@
   struct CompilationUnit {
     std::vector<const MethodDebugInfo*> methods_;
     size_t debug_line_offset_ = 0;
-    uint32_t low_pc_ = 0xFFFFFFFFU;
-    uint32_t high_pc_ = 0;
+    uintptr_t low_pc_ = std::numeric_limits<uintptr_t>::max();
+    uintptr_t high_pc_ = 0;
   };
 
   typedef std::vector<DexFile::LocalInfo> LocalInfos;
@@ -439,14 +449,17 @@
 
     void Write(const CompilationUnit& compilation_unit) {
       CHECK(!compilation_unit.methods_.empty());
-      const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+      const Elf_Addr text_address = owner_->builder_->GetText()->Exists()
+          ? owner_->builder_->GetText()->GetAddress()
+          : 0;
+      const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_;
 
       info_.StartTag(DW_TAG_compile_unit);
       info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
       info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
-      info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+      info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
       info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
 
       const char* last_dex_class_desc = nullptr;
@@ -464,8 +477,16 @@
           if (last_dex_class_desc != nullptr) {
             EndClassTag(last_dex_class_desc);
           }
-          size_t offset = StartClassTag(dex_class_desc);
-          type_cache_.emplace(dex_class_desc, offset);
+          // Write reference tag for the class we are about to declare.
+          size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
+          type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset);
+          size_t type_attrib_offset = info_.size();
+          info_.WriteRef4(DW_AT_type, 0);
+          info_.EndTag();
+          // Declare the class that owns this method.
+          size_t class_offset = StartClassTag(dex_class_desc);
+          info_.UpdateUint32(type_attrib_offset, class_offset);
+          info_.WriteFlag(DW_AT_declaration, true);
           // Check that each class is defined only once.
           bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
           CHECK(unique) << "Redefinition of " << dex_class_desc;
@@ -476,7 +497,7 @@
         info_.StartTag(DW_TAG_subprogram);
         WriteName(dex->GetMethodName(dex_method));
         info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
-        info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+        info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_));
         uint8_t frame_base[] = { DW_OP_call_frame_cfa };
         info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
         WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
@@ -562,6 +583,92 @@
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
+    void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+      info_.StartTag(DW_TAG_compile_unit);
+      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteData1(DW_AT_language, DW_LANG_Java);
+
+      for (mirror::Class* type : types) {
+        if (type->IsPrimitive()) {
+          // For primitive types the definition and the declaration is the same.
+          if (type->GetPrimitiveType() != Primitive::kPrimVoid) {
+            WriteTypeDeclaration(type->GetDescriptor(nullptr));
+          }
+        } else if (type->IsArrayClass()) {
+          mirror::Class* element_type = type->GetComponentType();
+          uint32_t component_size = type->GetComponentSize();
+          uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+          uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+          info_.StartTag(DW_TAG_array_type);
+          std::string descriptor_string;
+          WriteLazyType(element_type->GetDescriptor(&descriptor_string));
+          info_.WriteUdata(DW_AT_data_member_location, data_offset);
+          info_.StartTag(DW_TAG_subrange_type);
+          DCHECK_LT(length_offset, 32u);
+          uint8_t count[] = {
+            DW_OP_push_object_address,
+            static_cast<uint8_t>(DW_OP_lit0 + length_offset),
+            DW_OP_plus,
+            DW_OP_deref_size,
+            4  // Array length is always 32-bit wide.
+          };
+          info_.WriteExprLoc(DW_AT_count, &count, sizeof(count));
+          info_.EndTag();  // DW_TAG_subrange_type.
+          info_.EndTag();  // DW_TAG_array_type.
+        } else {
+          std::string descriptor_string;
+          const char* desc = type->GetDescriptor(&descriptor_string);
+          StartClassTag(desc);
+
+          if (!type->IsVariableSize()) {
+            info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
+          }
+
+          // Base class.
+          mirror::Class* base_class = type->GetSuperClass();
+          if (base_class != nullptr) {
+            info_.StartTag(DW_TAG_inheritance);
+            WriteLazyType(base_class->GetDescriptor(&descriptor_string));
+            info_.WriteUdata(DW_AT_data_member_location, 0);
+            info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+            info_.EndTag();  // DW_TAG_inheritance.
+          }
+
+          // Member variables.
+          for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) {
+            ArtField* field = type->GetInstanceField(i);
+            info_.StartTag(DW_TAG_member);
+            WriteName(field->GetName());
+            WriteLazyType(field->GetTypeDescriptor());
+            info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value());
+            uint32_t access_flags = field->GetAccessFlags();
+            if (access_flags & kAccPublic) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+            } else if (access_flags & kAccProtected) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected);
+            } else if (access_flags & kAccPrivate) {
+              info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+            }
+            info_.EndTag();  // DW_TAG_member.
+          }
+
+          EndClassTag(desc);
+        }
+      }
+
+      CHECK_EQ(info_.Depth(), 1);
+      FinishLazyTypes();
+      info_.EndTag();  // DW_TAG_compile_unit.
+      std::vector<uint8_t> buffer;
+      buffer.reserve(info_.data()->size() + KB);
+      const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+      const size_t debug_abbrev_offset =
+          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+      owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+    }
+
     // Write table into .debug_loc which describes location of dex register.
     // The dex register might be valid only at some points and it might
     // move between machine registers and stack.
@@ -715,14 +822,14 @@
     // just define all types lazily at the end of compilation unit.
     void WriteLazyType(const char* type_descriptor) {
       if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
-        lazy_types_.emplace(type_descriptor, info_.size());
+        lazy_types_.emplace(std::string(type_descriptor), info_.size());
         info_.WriteRef4(DW_AT_type, 0);
       }
     }
 
     void FinishLazyTypes() {
       for (const auto& lazy_type : lazy_types_) {
-        info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+        info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first));
       }
       lazy_types_.clear();
     }
@@ -747,30 +854,39 @@
 
     // Convert dex type descriptor to DWARF.
     // Returns offset in the compilation unit.
-    size_t WriteType(const char* desc) {
+    size_t WriteTypeDeclaration(const std::string& desc) {
+      DCHECK(!desc.empty());
       const auto& it = type_cache_.find(desc);
       if (it != type_cache_.end()) {
         return it->second;
       }
 
       size_t offset;
-      if (*desc == 'L') {
+      if (desc[0] == 'L') {
         // Class type. For example: Lpackage/name;
-        offset = StartClassTag(desc);
+        size_t class_offset = StartClassTag(desc.c_str());
         info_.WriteFlag(DW_AT_declaration, true);
-        EndClassTag(desc);
-      } else if (*desc == '[') {
+        EndClassTag(desc.c_str());
+        // Reference to the class type.
+        offset = info_.StartTag(DW_TAG_reference_type);
+        info_.WriteRef(DW_AT_type, class_offset);
+        info_.EndTag();
+      } else if (desc[0] == '[') {
         // Array type.
-        size_t element_type = WriteType(desc + 1);
-        offset = info_.StartTag(DW_TAG_array_type);
+        size_t element_type = WriteTypeDeclaration(desc.substr(1));
+        size_t array_type = info_.StartTag(DW_TAG_array_type);
+        info_.WriteFlag(DW_AT_declaration, true);
         info_.WriteRef(DW_AT_type, element_type);
         info_.EndTag();
+        offset = info_.StartTag(DW_TAG_reference_type);
+        info_.WriteRef4(DW_AT_type, array_type);
+        info_.EndTag();
       } else {
         // Primitive types.
         const char* name;
         uint32_t encoding;
         uint32_t byte_size;
-        switch (*desc) {
+        switch (desc[0]) {
         case 'B':
           name = "byte";
           encoding = DW_ATE_signed;
@@ -815,7 +931,7 @@
           LOG(FATAL) << "Void type should not be encoded";
           UNREACHABLE();
         default:
-          LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+          LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
           UNREACHABLE();
         }
         offset = info_.StartTag(DW_TAG_base_type);
@@ -865,9 +981,10 @@
     // Temporary buffer to create and store the entries.
     DebugInfoEntryWriter<> info_;
     // Cache of already translated type descriptors.
-    std::map<const char*, size_t, CStringLess> type_cache_;  // type_desc -> definition_offset.
+    std::map<std::string, size_t> type_cache_;  // type_desc -> definition_offset.
     // 32-bit references which need to be resolved to a type later.
-    std::multimap<const char*, size_t, CStringLess> lazy_types_;  // type_desc -> patch_offset.
+    // Given type may be used multiple times.  Therefore we need a multimap.
+    std::multimap<std::string, size_t> lazy_types_;  // type_desc -> patch_offset.
   };
 
  public:
@@ -883,6 +1000,11 @@
     writer.Write(compilation_unit);
   }
 
+  void WriteTypes(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+    CompilationUnitWriter writer(this);
+    writer.Write(types);
+  }
+
   void End() {
     builder_->GetDebugInfo()->End();
     builder_->WritePatches(".debug_info.oat_patches",
@@ -924,7 +1046,9 @@
   // Returns the number of bytes written.
   size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
     const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
-    const Elf_Addr text_address = builder_->GetText()->GetAddress();
+    const Elf_Addr text_address = builder_->GetText()->Exists()
+        ? builder_->GetText()->GetAddress()
+        : 0;
 
     compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
 
@@ -1102,9 +1226,27 @@
   std::vector<uintptr_t> debug_line_patches;
 };
 
+// Get all types loaded by the runtime.
+static std::vector<mirror::Class*> GetLoadedRuntimeTypes() SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::vector<mirror::Class*> result;
+  class CollectClasses : public ClassVisitor {
+   public:
+    virtual bool Visit(mirror::Class* klass) {
+      classes_->push_back(klass);
+      return true;
+    }
+    std::vector<mirror::Class*>* classes_;
+  };
+  CollectClasses visitor;
+  visitor.classes_ = &result;
+  Runtime::Current()->GetClassLinker()->VisitClasses(&visitor);
+  return result;
+}
+
 template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos) {
+static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                               bool write_loaded_runtime_types,
+                               const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
@@ -1122,7 +1264,7 @@
   }
 
   // Write .debug_line section.
-  {
+  if (!compilation_units.empty()) {
     DebugLineWriter<ElfTypes> line_writer(builder);
     line_writer.Start();
     for (auto& compilation_unit : compilation_units) {
@@ -1132,12 +1274,19 @@
   }
 
   // Write .debug_info section.
-  {
+  if (!compilation_units.empty() || write_loaded_runtime_types) {
     DebugInfoWriter<ElfTypes> info_writer(builder);
     info_writer.Start();
     for (const auto& compilation_unit : compilation_units) {
       info_writer.WriteCompilationUnit(compilation_unit);
     }
+    if (write_loaded_runtime_types) {
+      Thread* self = Thread::Current();
+      // The lock prevents the classes being moved by the GC.
+      ReaderMutexLock mu(self, *Locks::mutator_lock_);
+      std::vector<mirror::Class*> types = GetLoadedRuntimeTypes();
+      info_writer.WriteTypes(ArrayRef<mirror::Class*>(types.data(), types.size()));
+    }
     info_writer.End();
   }
 }
@@ -1173,11 +1322,13 @@
       name += " [DEDUPED]";
     }
 
+    const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr;
+    const bool is_relative = (text != nullptr);
     uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += info.compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+    symtab->Add(strtab->Write(name), text, low_pc,
+                is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
@@ -1185,8 +1336,8 @@
     // requires it to match function symbol.  Just address 0 does not work.
     if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
-                    true, 0, STB_LOCAL, STT_NOTYPE);
+        symtab->Add(strtab->Write("$t"), text, info.low_pc_ & ~1,
+                    is_relative, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
     }
@@ -1202,25 +1353,89 @@
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    bool write_loaded_runtime_types,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format) {
-  if (!method_infos.empty()) {
-    // Add methods to .symtab.
-    WriteDebugSymbols(builder, method_infos);
-    // Generate CFI (stack unwinding information).
-    WriteCFISection(builder, method_infos, cfi_format);
-    // Write DWARF .debug_* sections.
-    WriteDebugSections(builder, method_infos);
+  // Add methods to .symtab.
+  WriteDebugSymbols(builder, method_infos);
+  // Generate CFI (stack unwinding information).
+  WriteCFISection(builder, method_infos, cfi_format);
+  // Write DWARF .debug_* sections.
+  WriteDebugSections(builder, write_loaded_runtime_types, method_infos);
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal(
+    const dwarf::MethodDebugInfo& method_info) {
+  const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+  WriteDebugInfo(builder.get(),
+                 false,
+                 ArrayRef<const MethodDebugInfo>(&method_info, 1),
+                 DW_DEBUG_FRAME_FORMAT);
+  builder->End();
+  CHECK(builder->Good());
+  // Make a copy of the buffer.  We want to shrink it anyway.
+  uint8_t* result = new uint8_t[buffer.size()];
+  CHECK(result != nullptr);
+  memcpy(result, buffer.data(), buffer.size());
+  return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info) {
+  const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info);
+  } else {
+    return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info);
+  }
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForClassInternal(const InstructionSet isa,
+                                                                 mirror::Class* type)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::vector<uint8_t> buffer;
+  buffer.reserve(KB);
+  VectorOutputStream out("Debug ELF file", &buffer);
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  builder->Start();
+
+  DebugInfoWriter<ElfTypes> info_writer(builder.get());
+  info_writer.Start();
+  info_writer.WriteTypes(ArrayRef<mirror::Class*>(&type, 1));
+  info_writer.End();
+
+  builder->End();
+  CHECK(builder->Good());
+  // Make a copy of the buffer.  We want to shrink it anyway.
+  uint8_t* result = new uint8_t[buffer.size()];
+  CHECK(result != nullptr);
+  memcpy(result, buffer.data(), buffer.size());
+  return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) {
+  if (Is64BitInstructionSet(isa)) {
+    return WriteDebugElfFileForClassInternal<ElfTypes64>(isa, type);
+  } else {
+    return WriteDebugElfFileForClassInternal<ElfTypes32>(isa, type);
   }
 }
 
 // Explicit instantiations
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
+    bool write_loaded_runtime_types,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
 template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
+    bool write_loaded_runtime_types,
     const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat cfi_format);
 
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 7ec0be1..91da00f 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,19 +17,30 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include "elf_builder.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "dwarf/dwarf_constants.h"
-#include "oat_writer.h"
+#include "elf_builder.h"
 #include "utils/array_ref.h"
 
 namespace art {
+namespace mirror {
+class Class;
+}
 namespace dwarf {
+struct MethodDebugInfo;
 
 template <typename ElfTypes>
 void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    bool write_loaded_runtime_types,
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     CFIFormat cfi_format);
 
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
 }  // namespace dwarf
 }  // namespace art
 
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 7b1bdd7..a67f3bd 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -152,7 +152,7 @@
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
-    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
+    dwarf::WriteDebugInfo(builder_.get(), /* write_types */ true, method_infos, kCFIFormat);
   }
 }
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 85216b7..bc51ed6 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -22,6 +22,7 @@
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
+#include "base/unix_file/fd_file.h"
 #include "compiler_callbacks.h"
 #include "dex/pass_manager.h"
 #include "dex/quick_compiler_callbacks.h"
@@ -42,11 +43,12 @@
   return new JitCompiler();
 }
 
-extern "C" void* jit_load(CompilerCallbacks** callbacks) {
+extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
   VLOG(jit) << "loading jit compiler";
   auto* const jit_compiler = JitCompiler::Create();
   CHECK(jit_compiler != nullptr);
   *callbacks = jit_compiler->GetCompilerCallbacks();
+  *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
   VLOG(jit) << "Done loading jit compiler";
   return jit_compiler;
 }
@@ -160,9 +162,28 @@
   // Disable dedupe so we can remove compiled methods.
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
+
+  if (compiler_options_->GetGenerateDebugInfo()) {
+#ifdef __ANDROID__
+    const char* prefix = GetAndroidData();
+#else
+    const char* prefix = "/tmp";
+#endif
+    DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u)
+        << "Generating debug info only works with one compiler thread";
+    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
+    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+    if (perf_file_ == nullptr) {
+      LOG(FATAL) << "Could not create perf file at " << perf_filename;
+    }
+  }
 }
 
 JitCompiler::~JitCompiler() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
 }
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
@@ -188,6 +209,20 @@
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
+    if (success && compiler_options_->GetGenerateDebugInfo()) {
+      const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
+      std::ostringstream stream;
+      stream << std::hex
+             << reinterpret_cast<uintptr_t>(ptr)
+             << " "
+             << code_cache->GetMemorySizeOfCodePointer(ptr)
+             << " "
+             << PrettyMethod(method_to_compile)
+             << std::endl;
+      std::string str = stream.str();
+      bool res = perf_file_->WriteFully(str.c_str(), str.size());
+      CHECK(res);
+    }
   }
 
   // Trim maps to reduce memory usage.
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 913a6d0..037a18a 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -43,6 +43,9 @@
   size_t GetTotalCompileTime() const {
     return total_time_;
   }
+  CompilerOptions* GetCompilerOptions() const {
+    return compiler_options_.get();
+  }
 
  private:
   uint64_t total_time_;
@@ -53,6 +56,7 @@
   std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<File> perf_file_;
 
   JitCompiler();
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index dc75ff1..d710747 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1142,7 +1142,7 @@
           loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
         SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
         if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
-          HoistToPreheaderOrDeoptBlock(loop, array_get);
+          HoistToPreHeaderOrDeoptBlock(loop, array_get);
         }
       }
     }
@@ -1280,7 +1280,8 @@
       // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
       // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
       // correctly guard against any possible OOB (including arithmetic wrap-around cases).
-      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      HBasicBlock* block = GetPreHeader(loop, instruction);
       induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
       if (lower != nullptr) {
         InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
@@ -1353,7 +1354,7 @@
       return true;
     } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
       if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
-        HoistToPreheaderOrDeoptBlock(loop, length);
+        HoistToPreHeaderOrDeoptBlock(loop, length);
         return true;
       }
     }
@@ -1371,7 +1372,8 @@
       HInstruction* array = check->InputAt(0);
       if (loop->IsDefinedOutOfTheLoop(array)) {
         // Generate: if (array == null) deoptimize;
-        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HBasicBlock* block = GetPreHeader(loop, check);
         HInstruction* cond =
             new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
         InsertDeopt(loop, block, cond);
@@ -1418,6 +1420,28 @@
     return true;
   }
 
+  /**
+   * Returns appropriate preheader for the loop, depending on whether the
+   * instruction appears in the loop header or proper loop-body.
+   */
+  HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    HBasicBlock* header = loop->GetHeader();
+    const uint32_t loop_id = header->GetBlockId();
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      HBasicBlock* block = it->second;
+      // If always taken, keep it that way by returning the original preheader,
+      // which can be found by following the predecessor of the true-block twice.
+      if (instruction->GetBlock() == header) {
+        return block->GetSinglePredecessor()->GetSinglePredecessor();
+      }
+      return block;
+    }
+    return loop->GetPreHeader();
+  }
+
   /** Inserts a deoptimization test. */
   void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
@@ -1432,28 +1456,17 @@
   }
 
   /** Hoists instruction out of the loop to preheader or deoptimization block. */
-  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
-    // Use preheader unless there is an earlier generated deoptimization block since
-    // hoisted expressions may depend on and/or used by the deoptimization tests.
-    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
-    HBasicBlock* preheader = loop->GetPreHeader();
-    HBasicBlock* block = preheader;
-    auto it = taken_test_loop_.find(loop_id);
-    if (it != taken_test_loop_.end()) {
-      block = it->second;
-    }
-    // Hoist the instruction.
+  void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    HBasicBlock* block = GetPreHeader(loop, instruction);
     DCHECK(!instruction->HasEnvironment());
     instruction->MoveBefore(block->GetLastInstruction());
   }
 
   /**
-   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * Adds a new taken-test structure to a loop if needed and not already done.
    * The taken-test protects range analysis evaluation code to avoid any
    * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
    *
-   * Returns block in which deoptimizations/invariants can be put.
-   *
    *          old_preheader
    *               |
    *            if_block          <- taken-test protects deoptimization block
@@ -1485,16 +1498,11 @@
    *     array[i] = 0;
    *   }
    */
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
-    // Not needed (can use preheader), or already done (can reuse)?
+  void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader) or already done (can reuse)?
     const uint32_t loop_id = loop->GetHeader()->GetBlockId();
-    if (!needs_taken_test) {
-      return loop->GetPreHeader();
-    } else {
-      auto it = taken_test_loop_.find(loop_id);
-      if (it != taken_test_loop_.end()) {
-        return it->second;
-      }
+    if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) {
+      return;
     }
 
     // Generate top test structure.
@@ -1523,7 +1531,6 @@
     if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
 
     taken_test_loop_.Put(loop_id, true_block);
-    return true_block;
   }
 
   /**
@@ -1538,7 +1545,7 @@
    *            \       /
    *           x_1 = phi(x_0, null)   <- synthetic phi
    *               |
-   *             header
+   *          new_preheader
    */
   void InsertPhiNodes() {
     // Scan all new deoptimization blocks.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 950043e..5958cd8 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -613,7 +613,7 @@
 
   ArenaVector<SlowPathCode*> slow_paths_;
 
-  // The current slow path that we're generating code for.
+  // The current slow-path that we're generating code for.
   SlowPathCode* current_slow_path_;
 
   // The current block index in `block_order_` of the block
@@ -674,6 +674,122 @@
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
 
+/**
+ * A templated class SlowPathGenerator with a templated method NewSlowPath()
+ * that can be used by any code generator to share equivalent slow-paths with
+ * the objective of reducing generated code size.
+ *
+ * InstructionType:  instruction that requires SlowPathCodeType
+ * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
+ */
+template <typename InstructionType>
+class SlowPathGenerator {
+  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
+                "InstructionType is not a subclass of art::HInstruction");
+
+ public:
+  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
+      : graph_(graph),
+        codegen_(codegen),
+        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
+
+  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
+  // Templating the method (rather than the whole class) on the slow-path type enables
+  // keeping this code at a generic, non architecture-specific place.
+  //
+  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
+  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
+  //       or template the class as a whole on SlowPathType.
+  template <typename SlowPathCodeType>
+  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
+    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
+                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
+    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
+                  "SlowPathCodeType is not constructible from InstructionType*");
+    // Iterate over potential candidates for sharing. Currently, only same-typed
+    // slow-paths with exactly the same dex-pc are viable candidates.
+    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
+    const uint32_t dex_pc = instruction->GetDexPc();
+    auto iter = slow_path_map_.find(dex_pc);
+    if (iter != slow_path_map_.end()) {
+      auto candidates = iter->second;
+      for (const auto& it : candidates) {
+        InstructionType* other_instruction = it.first;
+        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
+        // Determine if the instructions allow for slow-path sharing.
+        if (HaveSameLiveRegisters(instruction, other_instruction) &&
+            HaveSameStackMap(instruction, other_instruction)) {
+          // Can share: reuse existing one.
+          return other_slow_path;
+        }
+      }
+    } else {
+      // First time this dex-pc is seen.
+      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
+    }
+    // Cannot share: create and add new slow-path for this particular dex-pc.
+    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
+    iter->second.emplace_back(std::make_pair(instruction, slow_path));
+    codegen_->AddSlowPath(slow_path);
+    return slow_path;
+  }
+
+ private:
+  // Tests if both instructions have same set of live physical registers. This ensures
+  // the slow-path has exactly the same preamble on saving these registers to stack.
+  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
+    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
+    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
+    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
+    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
+    return (((live1->GetCoreRegisters() & core_spill) ==
+             (live2->GetCoreRegisters() & core_spill)) &&
+            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
+             (live2->GetFloatingPointRegisters() & fpu_spill)));
+  }
+
+  // Tests if both instructions have the same stack map. This ensures the interpreter
+  // will find exactly the same dex-registers at the same entries.
+  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
+    DCHECK(i1->HasEnvironment());
+    DCHECK(i2->HasEnvironment());
+    // We conservatively test if the two instructions find exactly the same instructions
+    // and location in each dex-register. This guarantees they will have the same stack map.
+    HEnvironment* e1 = i1->GetEnvironment();
+    HEnvironment* e2 = i2->GetEnvironment();
+    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
+      return false;
+    }
+    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
+      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
+          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  HGraph* const graph_;
+  CodeGenerator* const codegen_;
+
+  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
+  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
+};
+
+class InstructionCodeGenerator : public HGraphVisitor {
+ public:
+  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        deopt_slow_paths_(graph, codegen) {}
+
+ protected:
+  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
+  // TODO: under current regime, only deopt sharing make sense; extend later.
+  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0be1520..45520b4 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -350,24 +350,24 @@
 
 class DeoptimizationSlowPathARM : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+  explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
@@ -913,7 +913,7 @@
 }
 
 InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1655,8 +1655,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 26ca71e..26d6d63 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,7 +188,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
 
-class InstructionCodeGeneratorARM : public HGraphVisitor {
+class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1ad487d..a3150d3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -477,24 +477,24 @@
 
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
@@ -1605,7 +1605,7 @@
 
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -2939,9 +2939,8 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathARM64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeARM64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e90ac6..f2ff894 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -186,7 +186,7 @@
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
 };
 
-class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c51e62e..3229129 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -444,19 +444,16 @@
 
 class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
+  explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
     mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
                                 instruction_,
-                                dex_pc,
+                                instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
@@ -465,7 +462,7 @@
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
@@ -608,9 +605,9 @@
     // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
     // unpredictable and the following mfch1 will fail.
     __ Mfc1(TMP, f1);
-    __ Mfhc1(AT, f1);
+    __ MoveFromFpuHigh(AT, f1);
     __ Mtc1(r2_l, f1);
-    __ Mthc1(r2_h, f1);
+    __ MoveToFpuHigh(r2_h, f1);
     __ Move(r2_l, TMP);
     __ Move(r2_h, AT);
   } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
@@ -862,7 +859,7 @@
       Register dst_low =  destination.AsRegisterPairLow<Register>();
       FRegister src = source.AsFpuRegister<FRegister>();
       __ Mfc1(dst_low, src);
-      __ Mfhc1(dst_high, src);
+      __ MoveFromFpuHigh(dst_high, src);
     } else {
       DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
       int32_t off = source.GetStackIndex();
@@ -875,7 +872,7 @@
       Register src_high = source.AsRegisterPairHigh<Register>();
       Register src_low = source.AsRegisterPairLow<Register>();
       __ Mtc1(src_low, dst);
-      __ Mthc1(src_high, dst);
+      __ MoveToFpuHigh(src_high, dst);
     } else if (source.IsFpuRegister()) {
       __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
     } else {
@@ -1241,7 +1238,7 @@
 
 InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
                                                            CodeGeneratorMIPS* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1542,8 +1539,10 @@
   bool use_imm = rhs_location.IsConstant();
   Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
   int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
-  uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
-  uint32_t shift_value = rhs_imm & shift_mask;
+  const uint32_t shift_mask = (type == Primitive::kPrimInt)
+      ? kMaxIntShiftValue
+      : kMaxLongShiftValue;
+  const uint32_t shift_value = rhs_imm & shift_mask;
   // Are the INS (Insert Bit Field) and ROTR instructions supported?
   bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 
@@ -1583,6 +1582,11 @@
             __ Rotrv(dst, lhs, rhs_reg);
           } else {
             __ Subu(TMP, ZERO, rhs_reg);
+            // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+            // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+            // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+            // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+            // IOW, the OR'd values are equal.
             __ Sllv(TMP, lhs, TMP);
             __ Srlv(dst, lhs, rhs_reg);
             __ Or(dst, dst, TMP);
@@ -1646,33 +1650,33 @@
               }
             }
           } else {
-            shift_value -= kMipsBitsPerWord;
+            const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
             if (instr->IsShl()) {
-              __ Sll(dst_high, lhs_low, shift_value);
+              __ Sll(dst_high, lhs_low, shift_value_high);
               __ Move(dst_low, ZERO);
             } else if (instr->IsShr()) {
-              __ Sra(dst_low, lhs_high, shift_value);
+              __ Sra(dst_low, lhs_high, shift_value_high);
               __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
             } else if (instr->IsUShr()) {
-              __ Srl(dst_low, lhs_high, shift_value);
+              __ Srl(dst_low, lhs_high, shift_value_high);
               __ Move(dst_high, ZERO);
             } else {
-              if (shift_value == 0) {
+              if (shift_value == kMipsBitsPerWord) {
                 // 64-bit rotation by 32 is just a swap.
                 __ Move(dst_low, lhs_high);
                 __ Move(dst_high, lhs_low);
               } else {
                 if (has_ins_rotr) {
-                  __ Srl(dst_low, lhs_high, shift_value);
-                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
-                  __ Srl(dst_high, lhs_low, shift_value);
-                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
+                  __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
+                  __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
                 } else {
-                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value);
-                  __ Srl(dst_low, lhs_high, shift_value);
+                  __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_low, lhs_high, shift_value_high);
                   __ Or(dst_low, dst_low, TMP);
-                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
-                  __ Srl(dst_high, lhs_low, shift_value);
+                  __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+                  __ Srl(dst_high, lhs_low, shift_value_high);
                   __ Or(dst_high, dst_high, TMP);
                 }
               }
@@ -3425,8 +3429,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeMIPS* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -3528,8 +3532,8 @@
       // Need to move to FP regs since FP results are returned in core registers.
       __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
               locations->Out().AsFpuRegister<FRegister>());
-      __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->Out().AsFpuRegister<FRegister>());
+      __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->Out().AsFpuRegister<FRegister>());
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -3649,8 +3653,8 @@
       // Pass FP parameters in core registers.
       __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
               locations->InAt(1).AsFpuRegister<FRegister>());
-      __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
-               locations->InAt(1).AsFpuRegister<FRegister>());
+      __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         locations->InAt(1).AsFpuRegister<FRegister>());
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -4800,6 +4804,7 @@
   Primitive::Type input_type = conversion->GetInputType();
   Primitive::Type result_type = conversion->GetResultType();
   DCHECK_NE(input_type, result_type);
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -4807,8 +4812,9 @@
   }
 
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+  if (!isR6 &&
+      ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+       (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
     call_kind = LocationSummary::kCall;
   }
 
@@ -4846,6 +4852,8 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4891,7 +4899,37 @@
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
+    if (input_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+        Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+        FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+        __ Mtc1(src_low, FTMP);
+        __ Mthc1(src_high, FTMP);
+        if (result_type == Primitive::kPrimFloat) {
+          __ Cvtsl(dst, FTMP);
+        } else {
+          __ Cvtdl(dst, FTMP);
+        }
+      } else {
+        int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                      : QUICK_ENTRY_POINT(pL2d);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+                                                             : IsDirectEntrypoint(kQuickL2d);
+        codegen_->InvokeRuntime(entry_offset,
+                                conversion,
+                                conversion->GetDexPc(),
+                                nullptr,
+                                direct);
+        if (result_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+        } else {
+          CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+        }
+      }
+    } else {
       Register src = locations->InAt(0).AsRegister<Register>();
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       __ Mtc1(src, FTMP);
@@ -4900,54 +4938,168 @@
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
-                                                           : IsDirectEntrypoint(kQuickL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    bool direct;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
-                                                      : IsDirectEntrypoint(kQuickD2iz);
-    } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
-      direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
-                                                      : IsDirectEntrypoint(kQuickD2l);
-    }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr,
-                            direct);
-    if (result_type != Primitive::kPrimLong) {
-      if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+    if (result_type == Primitive::kPrimLong) {
+      if (isR6) {
+        // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+        // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+        FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+        MipsLabel truncate;
+        MipsLabel done;
+
+        // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+        // value when the input is either a NaN or is outside of the range of the output type
+        // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+        // the same result.
+        //
+        // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+        // value of the output type if the input is outside of the range after the truncation or
+        // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+        // results. This matches the desired float/double-to-int/long conversion exactly.
+        //
+        // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+        //
+        // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+        // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+        // even though it must be NAN2008=1 on R6.
+        //
+        // The code takes care of the different behaviors by first comparing the input to the
+        // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+        // If the input is greater than or equal to the minimum, it procedes to the truncate
+        // instruction, which will handle such an input the same way irrespective of NAN2008.
+        // Otherwise the input is compared to itself to determine whether it is a NaN or not
+        // in order to return either zero or the minimum value.
+        //
+        // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+        // truncate instruction for MIPS64R6.
+        if (input_type == Primitive::kPrimFloat) {
+          uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, min_val);
+          __ Mtc1(TMP, FTMP);
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+          __ LoadConst32(TMP, High32Bits(min_val));
+          __ Mtc1(ZERO, FTMP);
+          __ Mthc1(TMP, FTMP);
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ Move(dst_low, ZERO);
+        __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst_high, dst_high, TMP);
+
+        __ B(&done);
+
+        __ Bind(&truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ TruncLS(FTMP, src);
+        } else {
+          __ TruncLD(FTMP, src);
+        }
+        __ Mfc1(dst_low, FTMP);
+        __ Mfhc1(dst_high, FTMP);
+
+        __ Bind(&done);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                                     : QUICK_ENTRY_POINT(pD2l);
+        bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+                                                             : IsDirectEntrypoint(kQuickD2l);
+        codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+        if (input_type == Primitive::kPrimFloat) {
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        } else {
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        }
       }
     } else {
+      FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+      Register dst = locations->Out().AsRegister<Register>();
+      MipsLabel truncate;
+      MipsLabel done;
+
+      // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+      // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+      // even though it must be NAN2008=1 on R6.
+      //
+      // For details see the large comment above for the truncation of float/double to long on R6.
+      //
+      // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+      // truncate instruction for MIPS64R6.
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, min_val);
+        __ Mtc1(TMP, FTMP);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+        __ LoadConst32(TMP, High32Bits(min_val));
+        __ Mtc1(ZERO, FTMP);
+        if (fpu_32bit) {
+          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
+        } else {
+          __ Mthc1(TMP, FTMP);
+        }
       }
+
+      if (isR6) {
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpLeS(FTMP, FTMP, src);
+        } else {
+          __ CmpLeD(FTMP, FTMP, src);
+        }
+        __ Bc1nez(FTMP, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CmpEqS(FTMP, src, src);
+        } else {
+          __ CmpEqD(FTMP, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Mfc1(TMP, FTMP);
+        __ And(dst, dst, TMP);
+      } else {
+        if (input_type == Primitive::kPrimFloat) {
+          __ ColeS(0, FTMP, src);
+        } else {
+          __ ColeD(0, FTMP, src);
+        }
+        __ Bc1t(0, &truncate);
+
+        if (input_type == Primitive::kPrimFloat) {
+          __ CeqS(0, src, src);
+        } else {
+          __ CeqD(0, src, src);
+        }
+        __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+        __ Movf(dst, ZERO, 0);
+      }
+
+      __ B(&done);
+
+      __ Bind(&truncate);
+
+      if (input_type == Primitive::kPrimFloat) {
+        __ TruncWS(FTMP, src);
+      } else {
+        __ TruncWD(FTMP, src);
+      }
+      __ Mfc1(dst, FTMP);
+
+      __ Bind(&done);
     }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 38302ad..c3d4851 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -197,7 +197,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
 };
 
-class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 9390fa6..38c32ca 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -391,24 +391,24 @@
 
 class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    uint32_t dex_pc = deoptimize->GetDexPc();
-    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
@@ -1113,7 +1113,7 @@
 
 InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
                                                                CodeGeneratorMIPS64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -2749,9 +2749,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathMIPS64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCodeMIPS64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
@@ -3933,36 +3932,18 @@
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
   }
 
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
-      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
-    call_kind = LocationSummary::kCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
+
+  if (Primitive::IsFloatingPointType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
-
-  if (call_kind == LocationSummary::kNoCall) {
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(0, Location::RequiresRegister());
-    }
-
-    if (Primitive::IsFloatingPointType(result_type)) {
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-    } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-    }
+  if (Primitive::IsFloatingPointType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    InvokeRuntimeCallingConvention calling_convention;
-
-    if (Primitive::IsFloatingPointType(input_type)) {
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-    } else {
-      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    }
-
-    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
@@ -4007,55 +3988,107 @@
                    << " to " << result_type;
     }
   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
-    if (input_type != Primitive::kPrimLong) {
-      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+    if (input_type == Primitive::kPrimLong) {
+      __ Dmtc1(src, FTMP);
+      if (result_type == Primitive::kPrimFloat) {
+        __ Cvtsl(dst, FTMP);
+      } else {
+        __ Cvtdl(dst, FTMP);
+      }
+    } else {
       __ Mtc1(src, FTMP);
       if (result_type == Primitive::kPrimFloat) {
         __ Cvtsw(dst, FTMP);
       } else {
         __ Cvtdw(dst, FTMP);
       }
-    } else {
-      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
-                                                                    : QUICK_ENTRY_POINT(pL2d);
-      codegen_->InvokeRuntime(entry_offset,
-                              conversion,
-                              conversion->GetDexPc(),
-                              nullptr);
-      if (result_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
-      } else {
-        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
-      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
-    int32_t entry_offset;
-    if (result_type != Primitive::kPrimLong) {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
-                                                           : QUICK_ENTRY_POINT(pD2iz);
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    Mips64Label truncate;
+    Mips64Label done;
+
+    // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+    // value when the input is either a NaN or is outside of the range of the output type
+    // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+    // the same result.
+    //
+    // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+    // value of the output type if the input is outside of the range after the truncation or
+    // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+    // results. This matches the desired float/double-to-int/long conversion exactly.
+    //
+    // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+    //
+    // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+    // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+    // even though it must be NAN2008=1 on R6.
+    //
+    // The code takes care of the different behaviors by first comparing the input to the
+    // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+    // If the input is greater than or equal to the minimum, it procedes to the truncate
+    // instruction, which will handle such an input the same way irrespective of NAN2008.
+    // Otherwise the input is compared to itself to determine whether it is a NaN or not
+    // in order to return either zero or the minimum value.
+    //
+    // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+    // truncate instruction for MIPS64R6.
+    if (input_type == Primitive::kPrimFloat) {
+      uint32_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+      __ LoadConst32(TMP, min_val);
+      __ Mtc1(TMP, FTMP);
+      __ CmpLeS(FTMP, FTMP, src);
     } else {
-      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
-                                                           : QUICK_ENTRY_POINT(pD2l);
+      uint64_t min_val = (result_type == Primitive::kPrimLong)
+          ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+          : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+      __ LoadConst64(TMP, min_val);
+      __ Dmtc1(TMP, FTMP);
+      __ CmpLeD(FTMP, FTMP, src);
     }
-    codegen_->InvokeRuntime(entry_offset,
-                            conversion,
-                            conversion->GetDexPc(),
-                            nullptr);
-    if (result_type != Primitive::kPrimLong) {
+
+    __ Bc1nez(FTMP, &truncate);
+
+    if (input_type == Primitive::kPrimFloat) {
+      __ CmpEqS(FTMP, src, src);
+    } else {
+      __ CmpEqD(FTMP, src, src);
+    }
+    if (result_type == Primitive::kPrimLong) {
+      __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+    } else {
+      __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+    }
+    __ Mfc1(TMP, FTMP);
+    __ And(dst, dst, TMP);
+
+    __ Bc(&done);
+
+    __ Bind(&truncate);
+
+    if (result_type == Primitive::kPrimLong) {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+        __ TruncLS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+        __ TruncLD(FTMP, src);
       }
+      __ Dmfc1(dst, FTMP);
     } else {
       if (input_type == Primitive::kPrimFloat) {
-        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+        __ TruncWS(FTMP, src);
       } else {
-        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+        __ TruncWD(FTMP, src);
       }
+      __ Mfc1(dst, FTMP);
     }
+
+    __ Bind(&done);
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 60ff96d..7182e8e 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -201,7 +201,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
 };
 
-class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4a0c2f4..c24d258 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -365,11 +365,10 @@
 
 class DeoptimizationSlowPathX86 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+  explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
     : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    DCHECK(instruction_->IsDeoptimize());
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -383,7 +382,7 @@
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
@@ -892,7 +891,7 @@
 }
 
 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1611,9 +1610,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index df73476..c65c423 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -178,7 +178,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
 
-class InstructionCodeGeneratorX86 : public HGraphVisitor {
+class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ec62d84..294b40e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -387,18 +387,16 @@
 
 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+  explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    DCHECK(instruction_->IsDeoptimize());
-    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
-                                  deoptimize,
-                                  deoptimize->GetDexPc(),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
                                   this);
     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
@@ -406,7 +404,7 @@
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
 
  private:
-  HInstruction* const instruction_;
+  HDeoptimize* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
@@ -1000,7 +998,7 @@
 
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
                                                                CodeGeneratorX86_64* codegen)
-      : HGraphVisitor(graph),
+      : InstructionCodeGenerator(graph, codegen),
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
@@ -1594,9 +1592,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathX86_64(deoptimize);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
   GenerateTestAndBranch(deoptimize,
                         /* condition_input_index */ 0,
                         slow_path->GetEntryLabel(),
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c5e8a04..505c9dc 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
 
-class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
  public:
   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 06fab61..bc126a2 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,14 +43,18 @@
   return codegen_->GetGraph()->GetArena();
 }
 
-inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const {
   return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
 }
 
-inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() const {
   return codegen_->GetInstructionSetFeatures().IsR6();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
+  return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -162,7 +166,7 @@
     Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
 
     __ Mfc1(out_lo, in);
-    __ Mfhc1(out_hi, in);
+    __ MoveFromFpuHigh(out_hi, in);
   } else {
     Register out = locations->Out().AsRegister<Register>();
 
@@ -204,7 +208,7 @@
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
 
     __ Mtc1(in_lo, out);
-    __ Mthc1(in_hi, out);
+    __ MoveToFpuHigh(in_hi, out);
   } else {
     Register in = locations->InAt(0).AsRegister<Register>();
 
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index f86b0ef..575a7d0 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,8 +67,9 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
-  bool IsR2OrNewer(void);
-  bool IsR6(void);
+  bool IsR2OrNewer() const;
+  bool IsR6() const;
+  bool Is32BitFPU() const;
 
  private:
   MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3eb7274..988e32b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -17,6 +17,7 @@
 #include "optimizing_compiler.h"
 
 #include <fstream>
+#include <memory>
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
@@ -52,6 +53,8 @@
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer_debug.h"
 #include "elf_writer_quick.h"
 #include "graph_checker.h"
 #include "graph_visualizer.h"
@@ -60,6 +63,7 @@
 #include "inliner.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/debugger_interface.h"
 #include "jit/jit_code_cache.h"
 #include "licm.h"
 #include "jni/quick/jni_compiler.h"
@@ -68,6 +72,7 @@
 #include "prepare_for_register_allocation.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
+#include "oat_quick_method_header.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
@@ -968,6 +973,39 @@
     return false;
   }
 
+  if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) {
+    const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+    const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+    CompiledMethod compiled_method(
+        GetCompilerDriver(),
+        codegen->GetInstructionSet(),
+        ArrayRef<const uint8_t>(code_allocator.GetMemory()),
+        codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+        codegen->GetCoreSpillMask(),
+        codegen->GetFpuSpillMask(),
+        ArrayRef<const SrcMapElem>(),
+        ArrayRef<const uint8_t>(),  // mapping_table.
+        ArrayRef<const uint8_t>(stack_map_data, stack_map_size),
+        ArrayRef<const uint8_t>(),  // native_gc_map.
+        ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+        ArrayRef<const LinkerPatch>());
+    dwarf::MethodDebugInfo method_debug_info {
+        dex_file,
+        class_def_idx,
+        method_idx,
+        access_flags,
+        code_item,
+        false,  // deduped.
+        code_address,
+        code_address + code_allocator.GetSize(),
+        &compiled_method
+    };
+    ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info);
+    CreateJITCodeEntryForAddress(code_address,
+                                 std::unique_ptr<const uint8_t[]>(elf_file.data()),
+                                 elf_file.size());
+  }
+
   return true;
 }
 
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4ea..4784de1 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -270,7 +270,7 @@
       stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
     }
 
-    if (entry.num_dex_registers == 0) {
+    if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
       stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
     } else {
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502f..604787f 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -614,6 +614,10 @@
   stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.EndStackMapEntry();
 
+  number_of_dex_registers = 1;
+  stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
+
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
@@ -622,7 +626,7 @@
   CodeInfo code_info(region);
   StackMapEncoding encoding = code_info.ExtractEncoding();
   ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
-  ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
 
   uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
   ASSERT_EQ(0u, number_of_location_catalog_entries);
@@ -638,6 +642,16 @@
 
   ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
   ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+
+  stack_map = code_info.GetStackMapAt(1, encoding);
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+  ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+  ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
+  ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
+  ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+
+  ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
+  ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
 }
 
 TEST(StackMapTest, InlineTest) {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 0dc307c..ac9c097 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -1035,6 +1035,22 @@
   EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
 }
 
+void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
+void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -1051,6 +1067,14 @@
   EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
 }
 
+void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
+  EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
@@ -1067,6 +1091,24 @@
   EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
 }
 
+void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mfc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mfhc1(rt, fs);
+  }
+}
+
+void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) {
+  if (Is32BitFPU()) {
+    CHECK_EQ(fs % 2, 0) << fs;
+    Mtc1(rt, static_cast<FRegister>(fs + 1));
+  } else {
+    Mthc1(rt, fs);
+  }
+}
+
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
   EmitI(0x31, rs, static_cast<Register>(ft), imm16);
 }
@@ -1213,10 +1255,10 @@
     Mtc1(temp, rd);
   }
   if (high == 0) {
-    Mthc1(ZERO, rd);
+    MoveToFpuHigh(ZERO, rd);
   } else {
     LoadConst32(temp, high);
-    Mthc1(temp, rd);
+    MoveToFpuHigh(temp, rd);
   }
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 066e7b0..01c6490 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -265,15 +265,23 @@
   void Movf(Register rd, Register rs, int cc);  // R2
   void Movt(Register rd, Register rs, int cc);  // R2
 
+  void TruncLS(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncLD(FRegister fd, FRegister fs);  // R2+, FR=1
+  void TruncWS(FRegister fd, FRegister fs);
+  void TruncWD(FRegister fd, FRegister fs);
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
   void Cvtds(FRegister fd, FRegister fs);
+  void Cvtsl(FRegister fd, FRegister fs);  // R2+, FR=1
+  void Cvtdl(FRegister fd, FRegister fs);  // R2+, FR=1
 
   void Mfc1(Register rt, FRegister fs);
   void Mtc1(Register rt, FRegister fs);
   void Mfhc1(Register rt, FRegister fs);
   void Mthc1(Register rt, FRegister fs);
+  void MoveFromFpuHigh(Register rt, FRegister fs);
+  void MoveToFpuHigh(Register rt, FRegister fs);
   void Lwc1(FRegister ft, Register rs, uint16_t imm16);
   void Ldc1(FRegister ft, Register rs, uint16_t imm16);
   void Swc1(FRegister ft, Register rs, uint16_t imm16);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 4361843..5fc3dee 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -599,6 +599,14 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
 }
 
+TEST_F(AssemblerMIPSTest, CvtSL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDL) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSD) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
 }
@@ -607,6 +615,22 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
 }
 
+TEST_F(AssemblerMIPSTest, TruncWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD");
+}
+
 TEST_F(AssemblerMIPSTest, Mfc1) {
   DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
 }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index cfd8421..f9ff2df 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -771,6 +771,22 @@
   EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc);
 }
 
+void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
+void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
 void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 883f013..3262640 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -250,6 +250,10 @@
   void RoundLD(FpuRegister fd, FpuRegister fs);
   void RoundWS(FpuRegister fd, FpuRegister fs);
   void RoundWD(FpuRegister fd, FpuRegister fs);
+  void TruncLS(FpuRegister fd, FpuRegister fs);
+  void TruncLD(FpuRegister fd, FpuRegister fs);
+  void TruncWS(FpuRegister fd, FpuRegister fs);
+  void TruncWD(FpuRegister fd, FpuRegister fs);
   void CeilLS(FpuRegister fd, FpuRegister fs);
   void CeilLD(FpuRegister fd, FpuRegister fs);
   void CeilWS(FpuRegister fd, FpuRegister fs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bac4375..7d79be2 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -527,6 +527,22 @@
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
 }
 
+TEST_F(AssemblerMIPS64Test, TruncWS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncWD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLS) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLD) {
+  DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
+}
+
 ////////////////
 // CALL / JMP //
 ////////////////
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 0691f2a..699ab3e 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1312,7 +1312,114 @@
 .endm
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # t4: thread stack bottom offset
+    # v0: free list head
+    #
+    # t5, t6 : temps
+
+    lw    $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1)       # Load dex cache resolved types
+                                                               # array.
+
+    sll   $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT            # Shift the value.
+    addu  $t5, $t0, $t5                                        # Compute the index.
+    lw    $t0, 0($t5)                                          # Load class (t0).
+    beqz  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li    $t6, MIRROR_CLASS_STATUS_INITIALIZED
+    lw    $t5, MIRROR_CLASS_STATUS_OFFSET($t0)                 # Check class status.
+    bne   $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor   $t5, $t5, $t5
+    addu  $t0, $t0, $t5
+
+    lw    $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)           # Check if access flags has
+    li    $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE               # kAccClassIsFinalizable.
+    and   $t6, $t5, $t6
+    bnez  $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)        # Check if thread local allocation
+    lw    $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)        # stack has any room left.
+    bgeu  $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)            # Load object size (t1).
+    li    $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE          # Check if size is for a thread local
+                                                               # allocation.
+    bgtu  $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+
+    addiu $t1, $t1, -1                                         # Decrease obj size and shift right
+    srl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT        # by quantum.
+
+    sll   $t2, $t1, POINTER_SIZE_SHIFT
+    addu  $t2, $t2, $s1
+    lw    $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)                # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+
+    lw    $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqz  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+    nop
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+
+    lw    $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw    $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+
+    sw    $v0, 0($t3)
+    addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+
+    lw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu $t5, $t5, -1
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                                          # Fence.
+
+    jalr  $zero, $ra
+    nop
+
+  .Lart_quick_alloc_object_rosalloc_slow_path:
+
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    jal   artAllocObjectFromCodeRosAlloc
+    move  $a2 ,$s1                                                # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 2cb2212..7170f73 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -378,8 +378,8 @@
           "memory");  // clobber.
 #elif defined(__mips__) && defined(__LP64__)
     __asm__ __volatile__ (
-        // Spill a0-a7 which we say we don't clobber. May contain args.
-        "daddiu $sp, $sp, -64\n\t"
+        // Spill a0-a7 and t0-t3 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -96\n\t"
         "sd $a0, 0($sp)\n\t"
         "sd $a1, 8($sp)\n\t"
         "sd $a2, 16($sp)\n\t"
@@ -388,6 +388,10 @@
         "sd $a5, 40($sp)\n\t"
         "sd $a6, 48($sp)\n\t"
         "sd $a7, 56($sp)\n\t"
+        "sd $t0, 64($sp)\n\t"
+        "sd $t1, 72($sp)\n\t"
+        "sd $t2, 80($sp)\n\t"
+        "sd $t3, 88($sp)\n\t"
 
         "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
         "sd %[referrer], 0($sp)\n\t"
@@ -423,13 +427,17 @@
         "ld $a5, 40($sp)\n\t"
         "ld $a6, 48($sp)\n\t"
         "ld $a7, 56($sp)\n\t"
-        "daddiu $sp, $sp, 64\n\t"
+        "ld $t0, 64($sp)\n\t"
+        "ld $t1, 72($sp)\n\t"
+        "ld $t2, 80($sp)\n\t"
+        "ld $t3, 88($sp)\n\t"
+        "daddiu $sp, $sp, 96\n\t"
 
         "move %[result], $v0\n\t"   // Store the call result.
         : [result] "=r" (result)
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [hidden] "r"(hidden)
-        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
           "t8", "t9", "k0", "k1", "fp", "ra",
           "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
           "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 263f50d..f674a6f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -75,6 +75,7 @@
   kReferenceQueueWeakReferencesLock,
   kReferenceQueueClearedReferencesLock,
   kReferenceProcessorLock,
+  kJitDebugInterfaceLock,
   kJitCodeCacheLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 75e0037..617f572 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -128,9 +128,11 @@
  */
 MterpFallback:
     EXPORT_PC
+#if MTERP_LOGGING
     mov  r0, rSELF
     add  r1, rFP, #OFF_FP_SHADOWFRAME
     bl MterpLogFallback
+#endif
 MterpCommonFallback:
     mov     r0, #0                                  @ signal retry with reference interpreter.
     b       MterpDone
@@ -144,9 +146,6 @@
  *  uint32_t* rFP  (should still be live, pointer to base of vregs)
  */
 MterpExceptionReturn:
-    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
-    str     r0, [r2]
-    str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
     b MterpDone
 MterpReturn:
diff --git a/runtime/interpreter/mterp/arm/op_aget.S b/runtime/interpreter/mterp/arm/op_aget.S
index 2cc4d66..11f7079 100644
--- a/runtime/interpreter/mterp/arm/op_aget.S
+++ b/runtime/interpreter/mterp/arm/op_aget.S
@@ -1,11 +1,11 @@
-%default { "load":"ldr", "shift":"2", "is_object":"0", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
+%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" }
     /*
      * Array get, 32 bits or less.  vAA <- vBB[vCC].
      *
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -25,9 +25,5 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     $load   r2, [r0, #$data_offset]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if $is_object
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_object_quick.S b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
index 1f8dc5a..fe29106 100644
--- a/runtime/interpreter/mterp/arm/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iget_object_quick.S
@@ -1 +1,17 @@
-%include "arm/op_iget_quick.S" {"is_object":"1"}
+    /* For: iget-object-quick */
+    /* op vA, vB, offset@CCCC */
+    mov     r2, rINST, lsr #12          @ r2<- B
+    FETCH r1, 1                         @ r1<- field byte offset
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    cmp     r0, #0                      @ check object for null
+    beq     common_errNullObject        @ object was null
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
+    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
+    ADVANCE 2                           @ advance rPC
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_quick.S b/runtime/interpreter/mterp/arm/op_iget_quick.S
index 9229afc..0eaf364 100644
--- a/runtime/interpreter/mterp/arm/op_iget_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iget_quick.S
@@ -1,5 +1,5 @@
-%default { "load":"ldr", "is_object":"0" }
-    /* For: iget-quick, iget-object-quick */
+%default { "load":"ldr" }
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -9,10 +9,6 @@
     beq     common_errNullObject        @ object was null
     $load   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if $is_object
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 060fe76..9975458 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -607,5 +607,14 @@
   }
 }
 
+extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
+  SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (UNLIKELY(obj == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  }
+  return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 33036e6..2d6f057 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -2013,7 +2013,7 @@
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2033,11 +2033,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldr   r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -2106,7 +2102,7 @@
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2126,11 +2122,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrb   r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -2145,7 +2137,7 @@
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2165,11 +2157,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrsb   r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -2184,7 +2172,7 @@
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2204,11 +2192,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrh   r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -2223,7 +2207,7 @@
      * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17
      * instructions.  We use a pair of FETCH_Bs instead.
      *
-     * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short
+     * for: aget, aget-boolean, aget-byte, aget-char, aget-short
      *
      * NOTE: assumes data offset for arrays is the same for all non-wide types.
      * If this changes, specialize.
@@ -2243,11 +2227,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrsh   r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET]     @ r2<- vBB[vCC]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    .if 0
-    SET_VREG_OBJECT r2, r9              @ vAA<- r2
-    .else
     SET_VREG r2, r9                     @ vAA<- r2
-    .endif
     GOTO_OPCODE ip                      @ jump to next instruction
 
 
@@ -7127,7 +7107,7 @@
     .balign 128
 .L_op_iget_quick: /* 0xe3 */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7137,11 +7117,7 @@
     beq     common_errNullObject        @ object was null
     ldr   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7167,26 +7143,24 @@
     .balign 128
 .L_op_iget_object_quick: /* 0xe5 */
 /* File: arm/op_iget_object_quick.S */
-/* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-object-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
-    GET_VREG r3, r2                     @ r3<- object we're operating on
-    ubfx    r2, rINST, #8, #4           @ r2<- A
-    cmp     r3, #0                      @ check object for null
+    GET_VREG r0, r2                     @ r0<- object we're operating on
+    cmp     r0, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
-    ldr   r0, [r3, r1]                @ r0<- obj.field
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 1
+    bl      artIGetObjectFromMterp      @ (obj, offset)
+    ldr     r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx    r2, rINST, #8, #4           @ r2<- A
+    PREFETCH_INST 2
+    cmp     r3, #0
+    bne     MterpPossibleException      @ bail out
     SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
-    SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
+    ADVANCE 2                           @ advance rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_iput_quick: /* 0xe6 */
@@ -7373,7 +7347,7 @@
 .L_op_iget_boolean_quick: /* 0xef */
 /* File: arm/op_iget_boolean_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7383,11 +7357,7 @@
     beq     common_errNullObject        @ object was null
     ldrb   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7397,7 +7367,7 @@
 .L_op_iget_byte_quick: /* 0xf0 */
 /* File: arm/op_iget_byte_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7407,11 +7377,7 @@
     beq     common_errNullObject        @ object was null
     ldrsb   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7421,7 +7387,7 @@
 .L_op_iget_char_quick: /* 0xf1 */
 /* File: arm/op_iget_char_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7431,11 +7397,7 @@
     beq     common_errNullObject        @ object was null
     ldrh   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -7445,7 +7407,7 @@
 .L_op_iget_short_quick: /* 0xf2 */
 /* File: arm/op_iget_short_quick.S */
 /* File: arm/op_iget_quick.S */
-    /* For: iget-quick, iget-object-quick */
+    /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
     /* op vA, vB, offset@CCCC */
     mov     r2, rINST, lsr #12          @ r2<- B
     FETCH r1, 1                         @ r1<- field byte offset
@@ -7455,11 +7417,7 @@
     beq     common_errNullObject        @ object was null
     ldrsh   r0, [r3, r1]                @ r0<- obj.field
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    .if 0
-    SET_VREG_OBJECT r0, r2              @ fp[A]<- r0
-    .else
     SET_VREG r0, r2                     @ fp[A]<- r0
-    .endif
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -12204,9 +12162,11 @@
  */
 MterpFallback:
     EXPORT_PC
+#if MTERP_LOGGING
     mov  r0, rSELF
     add  r1, rFP, #OFF_FP_SHADOWFRAME
     bl MterpLogFallback
+#endif
 MterpCommonFallback:
     mov     r0, #0                                  @ signal retry with reference interpreter.
     b       MterpDone
@@ -12220,9 +12180,6 @@
  *  uint32_t* rFP  (should still be live, pointer to base of vregs)
  */
 MterpExceptionReturn:
-    ldr     r2, [rFP, #OFF_FP_RESULT_REGISTER]
-    str     r0, [r2]
-    str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
     b MterpDone
 MterpReturn:
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 3c2898b..f08a1a9 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -16,6 +16,13 @@
 
 #include "debugger_interface.h"
 
+#include "base/logging.h"
+#include "base/mutex.h"
+#include "thread-inl.h"
+#include "thread.h"
+
+#include <unordered_map>
+
 namespace art {
 
 // -------------------------------------------------------------------
@@ -57,13 +64,19 @@
   JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
 }
 
-JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size) {
+static Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
+
+static JITCodeEntry* CreateJITCodeEntryInternal(
+    std::unique_ptr<const uint8_t[]> symfile_addr,
+    uintptr_t symfile_size)
+    REQUIRES(g_jit_debug_mutex) {
+  DCHECK(symfile_addr.get() != nullptr);
+
   JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr;
+  entry->symfile_addr_ = symfile_addr.release();
   entry->symfile_size_ = symfile_size;
   entry->prev_ = nullptr;
 
-  // TODO: Do we need a lock here?
   entry->next_ = __jit_debug_descriptor.first_entry_;
   if (entry->next_ != nullptr) {
     entry->next_->prev_ = entry;
@@ -76,8 +89,7 @@
   return entry;
 }
 
-void DeleteJITCodeEntry(JITCodeEntry* entry) {
-  // TODO: Do we need a lock here?
+static void DeleteJITCodeEntryInternal(JITCodeEntry* entry) REQUIRES(g_jit_debug_mutex) {
   if (entry->prev_ != nullptr) {
     entry->prev_->next_ = entry->next_;
   } else {
@@ -91,7 +103,48 @@
   __jit_debug_descriptor.relevant_entry_ = entry;
   __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
   __jit_debug_register_code();
+  delete[] entry->symfile_addr_;
   delete entry;
 }
 
+JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
+                                 uintptr_t symfile_size) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  return CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+}
+
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  DeleteJITCodeEntryInternal(entry);
+}
+
+// Mapping from address to entry.  It takes ownership of the entries
+// so that the user of the JIT interface does not have to store them.
+static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries;
+
+void CreateJITCodeEntryForAddress(uintptr_t address,
+                                  std::unique_ptr<const uint8_t[]> symfile_addr,
+                                  uintptr_t symfile_size) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  DCHECK_NE(address, 0u);
+  DCHECK(g_jit_code_entries.find(address) == g_jit_code_entries.end());
+  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+  g_jit_code_entries.emplace(address, entry);
+}
+
+bool DeleteJITCodeEntryForAddress(uintptr_t address) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, g_jit_debug_mutex);
+  const auto& it = g_jit_code_entries.find(address);
+  if (it == g_jit_code_entries.end()) {
+    return false;
+  }
+  DeleteJITCodeEntryInternal(it->second);
+  g_jit_code_entries.erase(it);
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index a784ef5..74469a9 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
 
 #include <inttypes.h>
+#include <memory>
 
 namespace art {
 
@@ -26,11 +27,25 @@
 }
 
 // Notify native debugger about new JITed code by passing in-memory ELF.
-JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size);
+// It takes ownership of the in-memory ELF file.
+JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
+                                 uintptr_t symfile_size);
 
 // Notify native debugger that JITed code has been removed.
+// It also releases the associated in-memory ELF file.
 void DeleteJITCodeEntry(JITCodeEntry* entry);
 
+// Notify native debugger about new JITed code by passing in-memory ELF.
+// The address is used only to uniquely identify the entry.
+// It takes ownership of the in-memory ELF file.
+void CreateJITCodeEntryForAddress(uintptr_t address,
+                                  std::unique_ptr<const uint8_t[]> symfile_addr,
+                                  uintptr_t symfile_size);
+
+// Notify native debugger that JITed code has been removed.
+// Returns false if entry for the given address was not found.
+bool DeleteJITCodeEntryForAddress(uintptr_t address);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ab70f4c..05668a9 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -64,10 +64,14 @@
   cumulative_timings_.AddLogger(logger);
 }
 
-Jit::Jit()
-    : jit_library_handle_(nullptr), jit_compiler_handle_(nullptr), jit_load_(nullptr),
-      jit_compile_method_(nullptr), dump_info_on_shutdown_(false),
-      cumulative_timings_("JIT timings"), save_profiling_info_(false) {
+Jit::Jit() : jit_library_handle_(nullptr),
+             jit_compiler_handle_(nullptr),
+             jit_load_(nullptr),
+             jit_compile_method_(nullptr),
+             dump_info_on_shutdown_(false),
+             cumulative_timings_("JIT timings"),
+             save_profiling_info_(false),
+             generate_debug_info_(false) {
 }
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
@@ -77,7 +81,10 @@
     return nullptr;
   }
   jit->code_cache_.reset(JitCodeCache::Create(
-      options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg));
+      options->GetCodeCacheInitialCapacity(),
+      options->GetCodeCacheMaxCapacity(),
+      jit->generate_debug_info_,
+      error_msg));
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
@@ -99,7 +106,7 @@
     *error_msg = oss.str();
     return false;
   }
-  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**)>(
+  jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>(
       dlsym(jit_library_handle_, "jit_load"));
   if (jit_load_ == nullptr) {
     dlclose(jit_library_handle_);
@@ -121,9 +128,10 @@
     return false;
   }
   CompilerCallbacks* callbacks = nullptr;
+  bool will_generate_debug_symbols = false;
   VLOG(jit) << "Calling JitLoad interpreter_only="
       << Runtime::Current()->GetInstrumentation()->InterpretOnly();
-  jit_compiler_handle_ = (jit_load_)(&callbacks);
+  jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols);
   if (jit_compiler_handle_ == nullptr) {
     dlclose(jit_library_handle_);
     *error_msg = "JIT couldn't load compiler";
@@ -136,6 +144,7 @@
     return false;
   }
   compiler_callbacks_ = callbacks;
+  generate_debug_info_ = will_generate_debug_symbols;
   return true;
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 0edce2f..42bbbe7 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -86,7 +86,7 @@
   // JIT compiler
   void* jit_library_handle_;
   void* jit_compiler_handle_;
-  void* (*jit_load_)(CompilerCallbacks**);
+  void* (*jit_load_)(CompilerCallbacks**, bool*);
   void (*jit_unload_)(void*);
   bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
 
@@ -99,6 +99,7 @@
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
   bool save_profiling_info_;
+  bool generate_debug_info_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index c260ca4..5db699b 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -21,6 +21,7 @@
 #include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "base/time_utils.h"
+#include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "jit/profiling_info.h"
@@ -48,8 +49,16 @@
 
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
+                                   bool generate_debug_info,
                                    std::string* error_msg) {
   CHECK_GE(max_capacity, initial_capacity);
+
+  // Generating debug information is mostly for using the 'perf' tool, which does
+  // not work with ashmem.
+  bool use_ashmem = !generate_debug_info;
+  // With 'perf', we want a 1-1 mapping between an address and a method.
+  bool garbage_collect_code = !generate_debug_info;
+
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -64,7 +73,7 @@
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   MemMap* data_map = MemMap::MapAnonymous(
-    "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str);
+      "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem);
   if (data_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -83,7 +92,8 @@
   DCHECK_EQ(code_size + data_size, max_capacity);
   uint8_t* divider = data_map->Begin() + data_size;
 
-  MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
   if (code_map == nullptr) {
     std::ostringstream oss;
     oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
@@ -94,14 +104,16 @@
   data_size = initial_capacity / 2;
   code_size = initial_capacity - data_size;
   DCHECK_EQ(code_size + data_size, initial_capacity);
-  return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity);
+  return new JitCodeCache(
+      code_map, data_map, code_size, data_size, max_capacity, garbage_collect_code);
 }
 
 JitCodeCache::JitCodeCache(MemMap* code_map,
                            MemMap* data_map,
                            size_t initial_code_capacity,
                            size_t initial_data_capacity,
-                           size_t max_capacity)
+                           size_t max_capacity,
+                           bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache variable", lock_),
       collection_in_progress_(false),
@@ -112,8 +124,10 @@
       code_end_(initial_code_capacity),
       data_end_(initial_data_capacity),
       has_done_one_collection_(false),
-      last_update_time_ns_(0) {
+      last_update_time_ns_(0),
+      garbage_collect_code_(garbage_collect_code) {
 
+  DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
@@ -215,6 +229,9 @@
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   const uint8_t* data = method_header->GetNativeGcMap();
+  // Notify native debugger that we are about to remove the code.
+  // It does nothing if we are not using native debugger.
+  DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
   if (data != nullptr) {
     mspace_free(data_mspace_, const_cast<uint8_t*>(data));
   }
@@ -512,7 +529,11 @@
   // we hold the lock.
   {
     MutexLock mu(self, lock_);
-    if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
+    if (!garbage_collect_code_) {
+      IncreaseCodeCacheCapacity();
+      NotifyCollectionDone(self);
+      return;
+    } else if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
       has_done_one_collection_ = false;
       NotifyCollectionDone(self);
       return;
@@ -726,5 +747,10 @@
   info->SetIsMethodBeingCompiled(false);
 }
 
+size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
+  MutexLock mu(Thread::Current(), lock_);
+  return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 1c842e4..a152bcd 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -53,7 +53,10 @@
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
-  static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg);
+  static JitCodeCache* Create(size_t initial_capacity,
+                              size_t max_capacity,
+                              bool generate_debug_info,
+                              std::string* error_msg);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSize() REQUIRES(!lock_);
@@ -159,13 +162,16 @@
     return current_capacity_;
   }
 
+  size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
                MemMap* data_map,
                size_t initial_code_capacity,
                size_t initial_data_capacity,
-               size_t max_capacity);
+               size_t max_capacity,
+               bool garbage_collect_code);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -252,6 +258,9 @@
   // It is atomic to avoid locking when reading it.
   Atomic<uint64_t> last_update_time_ns_;
 
+  // Whether we can do garbage collection.
+  const bool garbage_collect_code_;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index e133847..3571edb 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -34,14 +34,11 @@
 #include "thread-inl.h"
 #include "utils.h"
 
-#define USE_ASHMEM 1
-
-#ifdef USE_ASHMEM
 #include <cutils/ashmem.h>
+
 #ifndef ANDROID_OS
 #include <sys/resource.h>
 #endif
-#endif
 
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
@@ -282,7 +279,8 @@
                              int prot,
                              bool low_4gb,
                              bool reuse,
-                             std::string* error_msg) {
+                             std::string* error_msg,
+                             bool use_ashmem) {
 #ifndef __LP64__
   UNUSED(low_4gb);
 #endif
@@ -303,17 +301,17 @@
 
   ScopedFd fd(-1);
 
-#ifdef USE_ASHMEM
-#ifdef __ANDROID__
-  const bool use_ashmem = true;
-#else
-  // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
-  // fail due to ulimit restrictions. If they will then use a regular mmap.
-  struct rlimit rlimit_fsize;
-  CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
-  const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
-      (page_aligned_byte_count < rlimit_fsize.rlim_cur);
-#endif
+  if (use_ashmem) {
+    if (!kIsTargetBuild) {
+      // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
+      // fail due to ulimit restrictions. If they will then use a regular mmap.
+      struct rlimit rlimit_fsize;
+      CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0);
+      use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) ||
+        (page_aligned_byte_count < rlimit_fsize.rlim_cur);
+    }
+  }
+
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
@@ -326,7 +324,6 @@
     }
     flags &= ~MAP_ANONYMOUS;
   }
-#endif
 
   // We need to store and potentially set an error number for pretty printing of errors
   int saved_errno = 0;
@@ -508,7 +505,7 @@
 }
 
 MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
-                           std::string* error_msg) {
+                           std::string* error_msg, bool use_ashmem) {
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
@@ -532,23 +529,22 @@
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
-#ifdef USE_ASHMEM
-  // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
-  // prefixed "dalvik-".
-  std::string debug_friendly_name("dalvik-");
-  debug_friendly_name += tail_name;
-  ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-  int flags = MAP_PRIVATE | MAP_FIXED;
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
-                              tail_name, strerror(errno));
-    return nullptr;
-  }
-#else
-  ScopedFd fd(-1);
+  int int_fd = -1;
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#endif
-
+  if (use_ashmem) {
+    // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
+    // prefixed "dalvik-".
+    std::string debug_friendly_name("dalvik-");
+    debug_friendly_name += tail_name;
+    int_fd = ashmem_create_region(debug_friendly_name.c_str(), tail_base_size);
+    flags = MAP_PRIVATE | MAP_FIXED;
+    if (int_fd == -1) {
+      *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
+                                tail_name, strerror(errno));
+      return nullptr;
+    }
+  }
+  ScopedFd fd(int_fd);
 
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Unmap/map the tail region.
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index efce09a..ed21365 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -57,17 +57,18 @@
   // "reuse" allows re-mapping an address range from an existing mapping.
   //
   // The word "anonymous" in this context means "not backed by a file". The supplied
-  // 'ashmem_name' will be used -- on systems that support it -- to give the mapping
+  // 'name' will be used -- on systems that support it -- to give the mapping
   // a name.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapAnonymous(const char* ashmem_name,
+  static MemMap* MapAnonymous(const char* name,
                               uint8_t* addr,
                               size_t byte_count,
                               int prot,
                               bool low_4gb,
                               bool reuse,
-                              std::string* error_msg);
+                              std::string* error_msg,
+                              bool use_ashmem = true);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -168,7 +169,8 @@
   MemMap* RemapAtEnd(uint8_t* new_end,
                      const char* tail_name,
                      int tail_prot,
-                     std::string* error_msg);
+                     std::string* error_msg,
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!Locks::mem_maps_lock_);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 9cb37ee..786cf06 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -221,18 +221,22 @@
   CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
+  // Find stack map of the catch block.
+  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
+  DCHECK(catch_stack_map.IsValid());
+  DexRegisterMap catch_vreg_map =
+      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  if (!catch_vreg_map.IsValid()) {
+    return;
+  }
+
   // Find stack map of the throwing instruction.
   StackMap throw_stack_map =
       code_info.GetStackMapForNativePcOffset(stack_visitor->GetNativePcOffset(), encoding);
   DCHECK(throw_stack_map.IsValid());
   DexRegisterMap throw_vreg_map =
       code_info.GetDexRegisterMapOf(throw_stack_map, encoding, number_of_vregs);
-
-  // Find stack map of the catch block.
-  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
-  DCHECK(catch_stack_map.IsValid());
-  DexRegisterMap catch_vreg_map =
-      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+  DCHECK(throw_vreg_map.IsValid());
 
   // Copy values between them.
   for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
@@ -387,6 +391,10 @@
                                              number_of_vregs)
         : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
 
+    if (!vreg_map.IsValid()) {
+      return;
+    }
+
     for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
       if (updated_vregs != nullptr && updated_vregs[vreg]) {
         // Keep the value set by debugger.
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9098d38..5faff93 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -322,6 +322,9 @@
                                            number_of_dex_registers)
       : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
 
+  if (!dex_register_map.IsValid()) {
+    return false;
+  }
   DexRegisterLocation::Kind location_kind =
       dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info, encoding);
   switch (location_kind) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a15a081..84185ce 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -473,6 +473,9 @@
 class DexRegisterMap {
  public:
   explicit DexRegisterMap(MemoryRegion region) : region_(region) {}
+  DexRegisterMap() {}
+
+  bool IsValid() const { return region_.pointer() != nullptr; }
 
   // Get the surface kind of Dex register `dex_register_number`.
   DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_number,
@@ -1136,11 +1139,14 @@
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
                                      const StackMapEncoding& encoding,
                                      uint32_t number_of_dex_registers) const {
-    DCHECK(stack_map.HasDexRegisterMap(encoding));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + stack_map.GetDexRegisterMapOffset(encoding);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!stack_map.HasDexRegisterMap(encoding)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + stack_map.GetDexRegisterMapOffset(encoding);
+      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`.
@@ -1148,11 +1154,14 @@
                                           InlineInfo inline_info,
                                           const StackMapEncoding& encoding,
                                           uint32_t number_of_dex_registers) const {
-    DCHECK(inline_info.HasDexRegisterMapAtDepth(depth));
-    uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                      + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
-    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
-    return DexRegisterMap(region_.Subregion(offset, size));
+    if (!inline_info.HasDexRegisterMapAtDepth(depth)) {
+      return DexRegisterMap();
+    } else {
+      uint32_t offset = GetDexRegisterMapsOffset(encoding)
+                        + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
+      size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+      return DexRegisterMap(region_.Subregion(offset, size));
+    }
   }
 
   InlineInfo GetInlineInfoOf(StackMap stack_map, const StackMapEncoding& encoding) const {
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
new file mode 100644
index 0000000..54879fb
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "base/casts.h"
+#include "base/macros.h"
+#include "java_vm_ext.h"
+#include "jni_env_ext.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+static volatile std::atomic<bool> vm_was_shutdown(false);
+
+extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait until the runtime is shutdown.
+  while (!vm_was_shutdown.load()) {
+    usleep(1000);
+  }
+  std::cout << "About to call exception check\n";
+  env->ExceptionCheck();
+  LOG(ERROR) << "Should not be reached!";
+}
+
+// NO_RETURN does not work with extern "C" for target builds.
+extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Fake up the managed stack so we can detach.
+  Thread* const self = Thread::Current();
+  self->SetTopOfStack(nullptr);
+  self->SetTopOfShadowStack(nullptr);
+  JavaVM* vm = down_cast<JNIEnvExt*>(env)->vm;
+  vm->DetachCurrentThread();
+  vm->DestroyJavaVM();
+  vm_was_shutdown.store(true);
+  // Give threads some time to get stuck in ExceptionCheck.
+  usleep(1000000);
+  if (env != nullptr) {
+    // Use env != nullptr to trick noreturn.
+    exit(0);
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/136-daemon-jni-shutdown/expected.txt b/test/136-daemon-jni-shutdown/expected.txt
new file mode 100644
index 0000000..f0b6353
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+About to call exception check
+About to call exception check
+About to call exception check
+About to call exception check
diff --git a/test/136-daemon-jni-shutdown/info.txt b/test/136-daemon-jni-shutdown/info.txt
new file mode 100644
index 0000000..06a12df
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/info.txt
@@ -0,0 +1 @@
+Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
\ No newline at end of file
diff --git a/test/136-daemon-jni-shutdown/src/Main.java b/test/136-daemon-jni-shutdown/src/Main.java
new file mode 100644
index 0000000..6eceb75
--- /dev/null
+++ b/test/136-daemon-jni-shutdown/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
+ */
+public class Main {
+
+    public final static int THREAD_COUNT = 4;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        for (int i = 0; i < THREAD_COUNT; i++) {
+            Thread t = new Thread(new DaemonRunnable());
+            t.setDaemon(true);
+            t.start();
+        }
+        // Give threads time to start and become stuck in waitAndCallIntoJniEnv.
+        Thread.sleep(1000);
+        destroyJavaVMAndExit();
+    }
+
+    static native void waitAndCallIntoJniEnv();
+    static native void destroyJavaVMAndExit();
+
+    private static class DaemonRunnable implements Runnable {
+        public void run() {
+            for (;;) {
+                waitAndCallIntoJniEnv();
+            }
+        }
+    }
+}
diff --git a/test/561-shared-slowpaths/expected.txt b/test/561-shared-slowpaths/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/561-shared-slowpaths/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/561-shared-slowpaths/info.txt b/test/561-shared-slowpaths/info.txt
new file mode 100644
index 0000000..c51e70b
--- /dev/null
+++ b/test/561-shared-slowpaths/info.txt
@@ -0,0 +1 @@
+Test on correctness while possibly sharing slow paths.
diff --git a/test/561-shared-slowpaths/src/Main.java b/test/561-shared-slowpaths/src/Main.java
new file mode 100644
index 0000000..718b875
--- /dev/null
+++ b/test/561-shared-slowpaths/src/Main.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on correctness in situations where slow paths may be shared
+// (actual sharing may vary between different code generators).
+//
+//
+public class Main {
+
+  // A method with two loops that can be optimized with dynamic BCE,
+  // resulting in a two times a deopt on null, a deopt on lower OOB,
+  // and a deopt on upper OOB.
+  private static void init(int[] x, int [] y, int l1, int h1, int l2, int h2) {
+    for (int i = l1; i < h1; i++) {
+      x[i] = i;
+    }
+    for (int i = l2; i < h2; i++) {
+      y[i] = i;
+    }
+  }
+
+  // Test that each of the six possible exceptions situations for init()
+  // are correctly handled by the deopt instructions.
+  public static void main(String[] args) {
+    int[] x = new int[100];
+    int[] y = new int[100];
+    int z;
+
+    // All is well.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, 0, 100);
+    } catch (Exception e) {
+      z = 1;
+    }
+    expectEquals(z, 0);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], i);
+    }
+
+    // Null deopt on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(null, y, 0, 100, 0, 100);
+    } catch (NullPointerException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], 0);
+      expectEquals(y[i], 0);
+    }
+
+    // Lower out-of-bounds on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, -1, 100, 0, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], 0);
+      expectEquals(y[i], 0);
+    }
+
+    // Upper out-of-bounds on x.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 101, 0, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Null deopt on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, null, 0, 100, 0, 100);
+    } catch (NullPointerException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Lower out-of-bounds on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, -1, 100);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], 0);
+    }
+
+    // Upper out-of-bounds on y.
+    z = 0;
+    reset(x, y);
+    try {
+      init(x, y, 0, 100, 0, 101);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      z = 1;
+    }
+    expectEquals(z, 1);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(x[i], i);
+      expectEquals(y[i], i);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void reset(int[] x, int[] y) {
+    for (int i = 0; i < x.length; i++) x[i] = 0;
+    for (int i = 0; i < y.length; i++) y[i] = 0;
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-bce-preheader/expected.txt b/test/562-bce-preheader/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/562-bce-preheader/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/562-bce-preheader/info.txt b/test/562-bce-preheader/info.txt
new file mode 100644
index 0000000..ae006ac
--- /dev/null
+++ b/test/562-bce-preheader/info.txt
@@ -0,0 +1 @@
+Regression test for correct placement of hoisting/deopting code.
diff --git a/test/562-bce-preheader/src/Main.java b/test/562-bce-preheader/src/Main.java
new file mode 100644
index 0000000..8de0533
--- /dev/null
+++ b/test/562-bce-preheader/src/Main.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /**
+   * Method with an outer countable loop and an inner do-while loop.
+   * Since all work is done in the header of the inner loop, any invariant hoisting
+   * and deopting should be done in its proper loop preheader, not the true-block
+   * of the newly generated taken-test after dynamic BCE.
+   */
+  public static int doit(int[][] x, int j) {
+    float f = 0;
+    int acc = 0;
+    for (int i = 0; i < 2; i++) {
+      // The full body of a do-while loop is the loop header.
+      do {
+        // Some "noise" to avoid hoisting the array reference
+        // before the dynamic BCE phase runs.
+        f++;
+        // The invariant array reference with corresponding bounds check
+        // is a candidate for hoisting when dynamic BCE runs. If it is
+        // not moved to the proper loop preheader, the wrong values
+        // cause the test to fail.
+        acc += x[i][i];
+      } while (++j < i);
+    }
+    return acc;
+  }
+
+  /**
+   * Single countable loop with a clear header and a loop body. In this case,
+   * after dynamic bce, some invariant hoisting and deopting must go to the
+   * proper loop preheader and some must go to the true-block.
+   */
+  public static int foo(int[] x, int[] y, int n) {
+    float f = 0;
+    int acc = 0;
+    int i = 0;
+    while (true) {
+      // This part is the loop header.
+      // Some "noise" to avoid hoisting the array reference
+      // before the dynamic BCE phase runs.
+      f++;
+      // The invariant array reference with corresponding bounds check
+      // is a candidate for hoisting when dynamic BCE runs. If it is
+      // not moved to the proper loop preheader, the wrong values
+      // cause the test to fail.
+      acc += y[0];
+      if (++i > n)
+        break;
+      // From here on, this part is the loop body.
+      // The unit-stride array reference is a candidate for dynamic BCE.
+      // The deopting appears in the true-block.
+      acc += x[i];
+    }
+    return acc;
+  }
+
+  public static void main(String args[]) {
+    int[][] x = new int[2][2];
+    int y;
+
+    x[0][0] = 1;
+    x[1][1] = 2;
+
+    expectEquals(8, doit(x, -6));
+    expectEquals(7, doit(x, -5));
+    expectEquals(6, doit(x, -4));
+    expectEquals(5, doit(x, -3));
+    expectEquals(4, doit(x, -2));
+    expectEquals(3, doit(x, -1));
+    expectEquals(3, doit(x,  0));
+    expectEquals(3, doit(x,  1));
+    expectEquals(3, doit(x, 22));
+
+    int a[] = { 1, 2, 3, 5 };
+    int b[] = { 7 };
+
+    expectEquals(7,  foo(a, b, -1));
+    expectEquals(7,  foo(a, b,  0));
+    expectEquals(16, foo(a, b,  1));
+    expectEquals(26, foo(a, b,  2));
+    expectEquals(38, foo(a, b,  3));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index f74a516..b922b45 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -30,6 +30,7 @@
   051-thread/thread_test.cc \
   117-nopatchoat/nopatchoat.cc \
   1337-gc-coverage/gc_coverage.cc \
+  136-daemon-jni-shutdown/daemon_jni_shutdown.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index ee6b7aa..c9343d4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -548,6 +548,11 @@
   484-checker-register-hints \
   537-checker-arraycopy
 
+# Tests that should fail in the read barrier configuration with JIT.
+# 141: Disabled because of intermittent failures on the ART Builtbot (b/25866001).
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := \
+  141-class-unload
+
 ifeq ($(ART_USE_READ_BARRIER),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))
     ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
@@ -562,10 +567,18 @@
         $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
+
+  ifneq (,$(filter jit,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
 TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS :=
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
 # 137: Quick has no support for read barriers and punts to the
@@ -873,20 +886,20 @@
         ifeq ($(9),multiimage)
           test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES
           run_test_options += --multi-image
-      		ifeq ($(1),host)
-        		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
-      		else
-        		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
-      		endif
+                ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13))
+                endif
         else
           ifeq ($(9),multipicimage)
             test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
-        		run_test_options += --pic-image --multi-image
-        		ifeq ($(1),host)
-          		prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
-        		else
-          		prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
-        		endif
+                        run_test_options += --pic-image --multi-image
+                        ifeq ($(1),host)
+                        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        else
+                        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13))
+                        endif
           else
             $$(error found $(9) expected $(IMAGE_TYPES))
           endif
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 3d25d5f..2cb2c50 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -24,10 +24,12 @@
   bug: 26155567
 },
 {
-  description: "TimeoutException on host-x86-concurrent-collector",
+  description: "TimeoutException on host-{x86,x86-64}-concurrent-collector",
   result: EXEC_FAILED,
-  names: ["libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries,
-           libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
+  modes: [host],
+  names: ["libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled",
+          "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries",
+          "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"],
   bug: 26507762
 }
 ]