Merge "Store class tables in the image"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index dcde5ab..a17da34 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -248,6 +248,7 @@
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
   compiler/jni/jni_compiler_test.cc \
+  compiler/linker/output_stream_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/bounds_check_elimination_test.cc \
   compiler/optimizing/dominator_test.cc \
@@ -266,7 +267,6 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/output_stream_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 0ed843b..348eabd 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -58,6 +58,10 @@
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
+	linker/buffered_output_stream.cc \
+	linker/file_output_stream.cc \
+	linker/output_stream.cc \
+	linker/vector_output_stream.cc \
 	linker/relative_patcher.cc \
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
@@ -100,16 +104,12 @@
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
-	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
 	elf_writer_debug.cc \
 	elf_writer_quick.cc \
-	file_output_stream.cc \
 	image_writer.cc \
-	oat_writer.cc \
-	output_stream.cc \
-	vector_output_stream.cc
+	oat_writer.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 18ce563..39f8ee8 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1430,4 +1430,8 @@
                                  method_lowering_infos_.data(), count);
 }
 
+bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
+  return cu_->compiler_driver->SkipCompilation(methodname);
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3191fe9..2da8a98 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -564,6 +564,11 @@
   bool SkipCompilation(std::string* skip_message);
 
   /*
+   * Should we skip the compilation of this method based on its name?
+   */
+  bool SkipCompilationByName(const std::string& methodname);
+
+  /*
    * Parse dex method and add MIR at current insert point.  Returns id (which is
    * actually the index of the method in the m_units_ array).
    */
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 3260a7a..05dde9f 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -780,6 +780,14 @@
   PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu);
   pass_driver.Launch();
 
+  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
+  if (cu.compiler_driver->ProfilePresent()
+      && !cu.mir_graph->MethodIsLeaf()
+      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
+    cu.EndTiming();
+    return nullptr;
+  }
+
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 6d317d8..9d3af16 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -80,9 +80,6 @@
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
 
-// Print additional info during profile guided compilation.
-static constexpr bool kDebugProfileGuidedCompilation = false;
-
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
 }
@@ -347,7 +344,8 @@
                                const std::string& dump_cfg_file_name, bool dump_cfg_append,
                                CumulativeLogger* timer, int swap_fd,
                                const std::string& profile_file)
-    : compiler_options_(compiler_options),
+    : profile_present_(false),
+      compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -385,8 +383,12 @@
 
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
-    profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
-    LOG(INFO) << "Using profile data from file " << profile_file;
+    profile_present_ = profile_file_.LoadFile(profile_file);
+    if (profile_present_) {
+      LOG(INFO) << "Using profile data form file " << profile_file;
+    } else {
+      LOG(INFO) << "Failed to load profile file " << profile_file;
+    }
   }
 }
 
@@ -567,9 +569,7 @@
         (verified_method->GetEncounteredVerificationFailures() &
             (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
         // Is eligable for compilation by methods-to-compile filter.
-        driver->IsMethodToCompile(method_ref) &&
-        driver->ShouldCompileBasedOnProfile(method_ref);
-
+        driver->IsMethodToCompile(method_ref);
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return null.
       compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
@@ -766,22 +766,6 @@
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
-bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
-  if (profile_compilation_info_ == nullptr) {
-    // If we miss profile information it means that we don't do a profile guided compilation.
-    // Return true, and let the other filters decide if the method should be compiled.
-    return true;
-  }
-  bool result = profile_compilation_info_->ContainsMethod(method_ref);
-
-  if (kDebugProfileGuidedCompilation) {
-    LOG(INFO) << "[ProfileGuidedCompilation] "
-        << (result ? "Compiled" : "Skipped") << " method:"
-        << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
-  }
-  return result;
-}
-
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
   ResolveCatchBlockExceptionsClassVisitor(
@@ -2289,16 +2273,6 @@
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
-  if (profile_compilation_info_ != nullptr) {
-    if (!profile_compilation_info_->Load(dex_files)) {
-      LOG(WARNING) << "Failed to load offline profile info from "
-          << profile_compilation_info_->GetFilename()
-          << ". No methods will be compiled";
-    } else if (kDebugProfileGuidedCompilation) {
-      LOG(INFO) << "[ProfileGuidedCompilation] "
-          << profile_compilation_info_->DumpInfo();
-    }
-  }
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
@@ -2536,6 +2510,39 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
+bool CompilerDriver::SkipCompilation(const std::string& method_name) {
+  if (!profile_present_) {
+    return false;
+  }
+  // First find the method in the profile file.
+  ProfileFile::ProfileData data;
+  if (!profile_file_.GetProfileData(&data, method_name)) {
+    // Not in profile, no information can be determined.
+    if (kIsDebugBuild) {
+      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    }
+    return true;
+  }
+
+  // Methods that comprise top_k_threshold % of the total samples will be compiled.
+  // Compare against the start of the topK percentage bucket just in case the threshold
+  // falls inside a bucket.
+  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
+                 <= compiler_options_->GetTopKProfileThreshold();
+  if (kIsDebugBuild) {
+    if (compile) {
+      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
+          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
+          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
+    } else {
+      VLOG(compiler) << "not compiling method " << method_name
+          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
+          << "% samples)";
+    }
+  }
+  return !compile;
+}
+
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index a351f6d..1347b37 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -31,11 +31,11 @@
 #include "compiler.h"
 #include "dex_file.h"
 #include "driver/compiled_method_storage.h"
-#include "jit/offline_profiling_info.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -147,6 +147,10 @@
     return compiler_.get();
   }
 
+  bool ProfilePresent() const {
+    return profile_present_;
+  }
+
   // Are we compiling and creating an image file?
   bool IsBootImage() const {
     return boot_image_;
@@ -441,10 +445,6 @@
   // Checks whether the provided method should be compiled, i.e., is in method_to_compile_.
   bool IsMethodToCompile(const MethodReference& method_ref) const;
 
-  // Checks whether profile guided compilation is enabled and if the method should be compiled
-  // according to the profile file.
-  bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const;
-
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       REQUIRES(!compiled_classes_lock_);
 
@@ -454,6 +454,9 @@
                                        uint16_t class_def_idx,
                                        const DexFile& dex_file) const;
 
+  // Should the compiler run on this method given profile information?
+  bool SkipCompilation(const std::string& method_name);
+
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
@@ -592,6 +595,9 @@
                       ThreadPool* thread_pool, TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
+  ProfileFile profile_file_;
+  bool profile_present_;
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -641,9 +647,6 @@
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
 
-  // Info for profile guided compilation.
-  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
-
   bool had_hard_verifier_failure_;
 
   size_t thread_count_;
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index 5464ed9..c3a3ca9 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -29,6 +29,7 @@
 #include "common_runtime_test.h"
 #include "elf_builder.h"
 #include "gtest/gtest.h"
+#include "linker/file_output_stream.h"
 #include "os.h"
 
 namespace art {
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index c19bc3d..bb07cc2 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -23,10 +23,9 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "elf_utils.h"
-#include "file_output_stream.h"
 #include "leb128.h"
+#include "linker/error_delaying_output_stream.h"
 #include "utils/array_ref.h"
 
 namespace art {
@@ -121,8 +120,8 @@
       sections.push_back(this);
       // Align file position.
       if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign);
-        owner_->Seek(header_.sh_offset, kSeekSet);
+        header_.sh_offset = RoundUp(owner_->stream_.Seek(0, kSeekCurrent), header_.sh_addralign);
+        owner_->stream_.Seek(header_.sh_offset, kSeekSet);
       }
       // Align virtual memory address.
       if ((header_.sh_flags & SHF_ALLOC) != 0) {
@@ -140,7 +139,7 @@
         CHECK_GT(header_.sh_size, 0u);
       } else {
         // Use the current file position to determine section size.
-        off_t file_offset = owner_->Seek(0, kSeekCurrent);
+        off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
         CHECK_GE(file_offset, (off_t)header_.sh_offset);
         header_.sh_size = file_offset - header_.sh_offset;
       }
@@ -162,7 +161,7 @@
       } else {
         CHECK(started_);
         CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
-        return owner_->Seek(0, kSeekCurrent) - header_.sh_offset;
+        return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset;
       }
     }
 
@@ -177,21 +176,20 @@
     bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
       CHECK(started_);
       CHECK(!finished_);
-      owner_->WriteFully(buffer, byte_count);
-      return true;
+      return owner_->stream_.WriteFully(buffer, byte_count);
     }
 
     // This function always succeeds to simplify code.
     // Use builder's Good() to check the actual status.
     off_t Seek(off_t offset, Whence whence) OVERRIDE {
       // Forward the seek as-is and trust the caller to use it reasonably.
-      return owner_->Seek(offset, whence);
+      return owner_->stream_.Seek(offset, whence);
     }
 
     // This function flushes the output and returns whether it succeeded.
     // If there was a previous failure, this does nothing and returns false, i.e. failed.
     bool Flush() OVERRIDE {
-      return owner_->Flush();
+      return owner_->stream_.Flush();
     }
 
     Elf_Word GetSectionIndex() const {
@@ -277,26 +275,24 @@
   };
 
   ElfBuilder(InstructionSet isa, OutputStream* output)
-    : isa_(isa),
-      output_(output),
-      output_good_(true),
-      output_offset_(0),
-      rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
-      bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
-      dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
-      dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
-      eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
-      strtab_(this, ".strtab", 0, kPageSize),
-      symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
-      debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
-      debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      shstrtab_(this, ".shstrtab", 0, 1),
-      virtual_address_(0) {
+      : isa_(isa),
+        stream_(output),
+        rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+        bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+        dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+        hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+        dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+        eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+        strtab_(this, ".strtab", 0, kPageSize),
+        symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+        debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+        debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        shstrtab_(this, ".shstrtab", 0, 1),
+        virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
     bss_.phdr_flags_ = PF_R | PF_W;
     dynamic_.phdr_flags_ = PF_R | PF_W;
@@ -353,7 +349,7 @@
     // We do not know the number of headers until later, so
     // it is easiest to just reserve a fixed amount of space.
     int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Seek(size, kSeekSet);
+    stream_.Seek(size, kSeekSet);
     virtual_address_ += size;
   }
 
@@ -377,9 +373,14 @@
       shdrs.push_back(section->header_);
     }
     Elf_Off section_headers_offset;
-    section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off));
-    Seek(section_headers_offset, kSeekSet);
-    WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+    section_headers_offset = RoundUp(stream_.Seek(0, kSeekCurrent), sizeof(Elf_Off));
+    stream_.Seek(section_headers_offset, kSeekSet);
+    stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Flush everything else before writing the program headers. This should prevent
+    // the OS from reordering writes, so that we don't end up with valid headers
+    // and partially written data if we suddenly lose power, for example.
+    stream_.Flush();
 
     // Write the initial file headers.
     std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
@@ -389,10 +390,10 @@
     elf_header.e_phnum = phdrs.size();
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
-    Seek(0, kSeekSet);
-    WriteFully(&elf_header, sizeof(elf_header));
-    WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
-    Flush();
+    stream_.Seek(0, kSeekSet);
+    stream_.WriteFully(&elf_header, sizeof(elf_header));
+    stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Flush();
   }
 
   // The running program does not have access to section headers
@@ -470,60 +471,15 @@
 
   // Returns true if all writes and seeks on the output stream succeeded.
   bool Good() {
-    return output_good_;
+    return stream_.Good();
+  }
+
+  // Returns the builder's internal stream.
+  OutputStream* GetStream() {
+    return &stream_;
   }
 
  private:
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  void WriteFully(const void* buffer, size_t byte_count) {
-    if (output_good_) {
-      if (!output_->WriteFully(buffer, byte_count)) {
-        PLOG(ERROR) << "Failed to write " << byte_count
-                    << " bytes to ELF file at offset " << output_offset_;
-        output_good_ = false;
-      }
-    }
-    output_offset_ += byte_count;
-  }
-
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  off_t Seek(off_t offset, Whence whence) {
-    // We keep shadow copy of the offset so that we return
-    // the expected value even if the output stream failed.
-    off_t new_offset;
-    switch (whence) {
-      case kSeekSet:
-        new_offset = offset;
-        break;
-      case kSeekCurrent:
-        new_offset = output_offset_ + offset;
-        break;
-      default:
-        LOG(FATAL) << "Unsupported seek type: " << whence;
-        UNREACHABLE();
-    }
-    if (output_good_) {
-      off_t actual_offset = output_->Seek(offset, whence);
-      if (actual_offset == (off_t)-1) {
-        PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset
-                    << " whence=" << whence << " new_offset=" << new_offset;
-        output_good_ = false;
-      }
-      DCHECK_EQ(actual_offset, new_offset);
-    }
-    output_offset_ = new_offset;
-    return new_offset;
-  }
-
-  bool Flush() {
-    if (output_good_) {
-      output_good_ = output_->Flush();
-    }
-    return output_good_;
-  }
-
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
@@ -675,9 +631,7 @@
 
   InstructionSet isa_;
 
-  OutputStream* output_;
-  bool output_good_;  // True if all writes to output succeeded.
-  off_t output_offset_;  // Keep track of the current position in the stream.
+  ErrorDelayingOutputStream stream_;
 
   Section rodata_;
   Section text_;
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 357d5f6..c5a0fd5 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -62,6 +62,11 @@
   virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
   virtual bool End() = 0;
 
+  // Get the ELF writer's stream. This stream can be used for writing data directly
+  // to a section after the section has been finished. When that's done, the user
+  // should Seek() back to the position where the stream was before this operation.
+  virtual OutputStream* GetStream() = 0;
+
  protected:
   ElfWriter() = default;
 };
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 9da2af8..e411496 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -31,6 +31,8 @@
 #include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "utils.h"
 
 namespace art {
@@ -72,6 +74,8 @@
   void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
   bool End() OVERRIDE;
 
+  virtual OutputStream* GetStream() OVERRIDE;
+
   static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
                                std::vector<uint8_t>* buffer);
 
@@ -191,6 +195,11 @@
 }
 
 template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::GetStream() {
+  return builder_->GetStream();
+}
+
+template <typename ElfTypes>
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
                               const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   bool generated_mapping_symbol = false;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 5f4a922..cda6240 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -34,7 +34,6 @@
 #include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 2125c9a..d001495 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -170,18 +170,6 @@
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Check if the method is already compiled.
-  if (runtime->GetJit()->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;
-  }
-
-  // Don't compile the method if we are supposed to be deoptimized.
-  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
-  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
-    return false;
-  }
-
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
@@ -190,13 +178,13 @@
   }
 
   // Do the compilation.
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
   bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+    JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 13754fd..73b0fac 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -17,9 +17,9 @@
 #include "linker/arm/relative_patcher_arm_base.h"
 
 #include "compiled_method.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 57018af..3d4c218 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -20,10 +20,10 @@
 #include "art_method.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
-#include "utils/arm64/assembler_arm64.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
+#include "utils/arm64/assembler_arm64.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/buffered_output_stream.cc b/compiler/linker/buffered_output_stream.cc
similarity index 100%
rename from compiler/buffered_output_stream.cc
rename to compiler/linker/buffered_output_stream.cc
diff --git a/compiler/buffered_output_stream.h b/compiler/linker/buffered_output_stream.h
similarity index 88%
rename from compiler/buffered_output_stream.h
rename to compiler/linker/buffered_output_stream.h
index 1da3a69..a2eefbb 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/linker/buffered_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
-#define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
 
 #include <memory>
 
@@ -51,4 +51,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h
new file mode 100644
index 0000000..99410e4
--- /dev/null
+++ b/compiler/linker/error_delaying_output_stream.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+// OutputStream wrapper that delays reporting an error until Flush().
+class ErrorDelayingOutputStream FINAL : public OutputStream {
+ public:
+  explicit ErrorDelayingOutputStream(OutputStream* output)
+      : OutputStream(output->GetLocation()),
+        output_(output),
+        output_good_(true),
+        output_offset_(0) { }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to " << GetLocation() << " at offset " << output_offset_;
+        output_good_ = false;
+      }
+    }
+    output_offset_ += byte_count;
+    return true;
+  }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == static_cast<off_t>(-1)) {
+        PLOG(ERROR) << "Failed to seek in " << GetLocation() << ". Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
+  }
+
+  // Flush the output and return whether all operations have succeeded.
+  // Do nothing if we already have a pending error.
+  bool Flush() OVERRIDE {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
+  }
+
+  // Check (without flushing) whether all operations have succeeded so far.
+  bool Good() const {
+    return output_good_;
+  }
+
+ private:
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
diff --git a/compiler/file_output_stream.cc b/compiler/linker/file_output_stream.cc
similarity index 100%
rename from compiler/file_output_stream.cc
rename to compiler/linker/file_output_stream.cc
diff --git a/compiler/file_output_stream.h b/compiler/linker/file_output_stream.h
similarity index 87%
rename from compiler/file_output_stream.h
rename to compiler/linker/file_output_stream.h
index 6917d83..f2d8453 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/linker/file_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_FILE_OUTPUT_STREAM_H_
-#define ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -43,4 +43,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream.cc b/compiler/linker/output_stream.cc
similarity index 100%
rename from compiler/output_stream.cc
rename to compiler/linker/output_stream.cc
diff --git a/compiler/output_stream.h b/compiler/linker/output_stream.h
similarity index 91%
rename from compiler/output_stream.h
rename to compiler/linker/output_stream.h
index 8f6b6d8..96a5f48 100644
--- a/compiler/output_stream.h
+++ b/compiler/linker/output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OUTPUT_STREAM_H_
-#define ART_COMPILER_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_OUTPUT_STREAM_H_
 
 #include <ostream>
 #include <string>
@@ -61,4 +61,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream_test.cc b/compiler/linker/output_stream_test.cc
similarity index 100%
rename from compiler/output_stream_test.cc
rename to compiler/linker/output_stream_test.cc
diff --git a/compiler/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc
similarity index 94%
rename from compiler/vector_output_stream.cc
rename to compiler/linker/vector_output_stream.cc
index 3d33673..f758005 100644
--- a/compiler/vector_output_stream.cc
+++ b/compiler/linker/vector_output_stream.cc
@@ -21,7 +21,7 @@
 namespace art {
 
 VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector)
-  : OutputStream(location), offset_(vector->size()), vector_(vector) {}
+    : OutputStream(location), offset_(vector->size()), vector_(vector) {}
 
 off_t VectorOutputStream::Seek(off_t offset, Whence whence) {
   CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence;
diff --git a/compiler/vector_output_stream.h b/compiler/linker/vector_output_stream.h
similarity index 91%
rename from compiler/vector_output_stream.h
rename to compiler/linker/vector_output_stream.h
index a3c58d0..3210143 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/linker/vector_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
-#define ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -66,4 +66,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index c305b12..b8610d0 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -31,13 +31,13 @@
 #include "elf_writer.h"
 #include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "linker/vector_output_stream.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index a6a49f9..0087a0d 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -36,6 +36,7 @@
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "image_writer.h"
+#include "linker/output_stream.h"
 #include "linker/relative_patcher.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -43,7 +44,6 @@
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "type_lookup_table.h"
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index a448302..7dbfd7c 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1228,19 +1228,26 @@
     InductionVarRange::Value v2;
     bool needs_finite_test = false;
     induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueRange index_range(GetGraph()->GetArena(),
-                             ValueBound(v1.instruction, v1.b_constant),
-                             ValueBound(v2.instruction, v2.b_constant));
-      // If analysis reveals a certain OOB, disable dynamic BCE.
-      *try_dynamic_bce = !index_range.GetLower().LessThan(array_range->GetLower()) &&
-                         !index_range.GetUpper().GreaterThan(array_range->GetUpper());
-      // Use analysis for static bce only if loop is finite.
-      return !needs_finite_test && index_range.FitsIn(array_range);
-    }
+    do {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+          return false;
+        }
+        // Use analysis for static bce only if loop is finite.
+        if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
+        }
+      }
+    } while (induction_range_.RefineOuter(&v1, &v2));
     return false;
   }
 
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 2ac1e15..9d0cde7 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -119,6 +119,17 @@
   }
 }
 
+bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) {
+  Value v1 = RefineOuter(*min_val, /* is_min */ true);
+  Value v2 = RefineOuter(*max_val, /* is_min */ false);
+  if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) {
+    *min_val = v1;
+    *max_val = v2;
+    return true;
+  }
+  return false;
+}
+
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
                                         /*out*/bool* needs_finite_test,
@@ -202,6 +213,8 @@
     } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
     }
+  } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+    return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
   } else if (is_min) {
     // Special case for finding minimum: minimum of trip-count in loop-body is 1.
     if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
@@ -404,6 +417,25 @@
   return Value();
 }
 
+InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) {
+  if (v.instruction != nullptr) {
+    HLoopInformation* loop =
+        v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
+    if (loop != nullptr) {
+      // Set up loop information.
+      bool in_body = true;  // use is always in body of outer loop
+      HInductionVarAnalysis::InductionInfo* info =
+          induction_analysis_->LookupInfo(loop, v.instruction);
+      HInductionVarAnalysis::InductionInfo* trip =
+          induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction());
+      // Try to refine "a x instruction + b" with outer loop range information on instruction.
+      return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)),
+                      Value(v.b_constant));
+    }
+  }
+  return v;
+}
+
 bool InductionVarRange::GenerateCode(HInstruction* context,
                                      HInstruction* instruction,
                                      HGraph* graph,
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7984871..71b0b1b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -68,6 +68,9 @@
                          /*out*/Value* max_val,
                          /*out*/bool* needs_finite_test);
 
+  /** Refines the values with induction of next outer loop. Returns true on change. */
+  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val);
+
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
    * bound expressions on the instruction in the given context. The need_finite_test
@@ -149,6 +152,12 @@
   static Value MergeVal(Value v1, Value v2, bool is_min);
 
   /**
+   * Returns refined value using induction of next outer loop or the input value if no
+   * further refinement is possible.
+   */
+  Value RefineOuter(Value val, bool is_min);
+
+  /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
    * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index c2ba157..128b5bb 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -473,16 +473,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -498,16 +501,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -527,16 +533,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(parameter, 1, -1), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(parameter, 1, 0), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
@@ -597,16 +606,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(parameter, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(parameter, 1, 0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 6d93be3..a4dcb3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -171,13 +171,37 @@
                                   const DexFile& dex_file,
                                   uint32_t referrer_index)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
     return method->GetDexMethodIndex();
   } else {
     return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index);
   }
 }
 
+static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls->GetDexCache() == nullptr) {
+    DCHECK(cls->IsArrayClass());
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    // TODO: deal with proxy classes.
+    return DexFile::kDexNoIndex;
+  } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    // Update the dex cache to ensure the class is in. The generated code will
+    // consider it is. We make it safe by updating the dex cache, as other
+    // dex files might also load the class, and there is no guarantee the dex
+    // cache of the dex file of the class will be updated.
+    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
+      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    }
+    return cls->GetDexTypeIndex();
+  } else {
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  }
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -214,53 +238,176 @@
     return false;
   }
 
-  if (!invoke_instruction->IsInvokeStaticOrDirect()) {
-    resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
-    if (resolved_method == nullptr) {
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    return TryInline(invoke_instruction, resolved_method);
+  }
+
+  // Check if we can statically find the method.
+  ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+  if (actual_method != nullptr) {
+    return TryInline(invoke_instruction, actual_method);
+  }
+
+  // Check if we can use an inline cache.
+  ArtMethod* caller = graph_->GetArtMethod();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  // Under JIT, we should always know the caller.
+  DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr));
+  if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) {
+    ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size);
+    const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+    if (ic.IsUnitialized()) {
       VLOG(compiler) << "Interface or virtual call to "
                      << PrettyMethod(method_index, caller_dex_file)
-                     << " could not be statically determined";
+                     << " is not hit and not inlined";
       return false;
-    }
-    // We have found a method, but we need to find where that method is for the caller's
-    // dex file.
-    method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index);
-    if (method_index == DexFile::kDexNoIndex) {
+    } else if (ic.IsMonomorphic()) {
+      MaybeRecordStat(kMonomorphicCall);
+      return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+    } else if (ic.IsPolymorphic()) {
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+    } else {
+      DCHECK(ic.IsMegamorphic());
       VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(resolved_method)
-                     << " cannot be inlined because unaccessible to caller";
+                     << PrettyMethod(method_index, caller_dex_file)
+                     << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
       return false;
     }
   }
 
-  bool same_dex_file =
-      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
+  VLOG(compiler) << "Interface or virtual call to "
+                 << PrettyMethod(method_index, caller_dex_file)
+                 << " could not be statically determined";
+  return false;
+}
 
-  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  if (class_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because its class is not"
+                   << " accessible to the caller";
+    return false;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+        resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+        resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      invoke_instruction->GetDexPc());
+
+  bool is_referrer =
+      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(load_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(field_get, load_class);
+  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed, and eventually propagate the
+  // type of the receiver.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedMonomorphicCall);
+  return true;
+}
+
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
+  // TODO
+  VLOG(compiler) << "Unimplemented polymorphic inlining for "
+                 << PrettyMethod(resolved_method);
+  return false;
+}
+
+bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t method_index = FindMethodIndexIn(
+      method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+  if (method_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to "
+                   << PrettyMethod(method)
+                   << " cannot be inlined because unaccessible to caller";
+    return false;
+  }
+
+  bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
+
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is too big to inline";
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of try block";
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
-    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+  if (!method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
     if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
+          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -277,7 +424,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) {
+  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
     return false;
   }
 
@@ -288,7 +435,8 @@
 
 bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
                                  HInvoke* invoke_instruction,
-                                 bool same_dex_file) {
+                                 bool same_dex_file,
+                                 bool do_rtp) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -341,6 +489,7 @@
       invoke_type,
       graph_->IsDebuggable(),
       graph_->GetCurrentInstructionId());
+  callee_graph->SetArtMethod(resolved_method);
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
@@ -422,6 +571,7 @@
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
+                     outermost_graph_,
                      codegen_,
                      outer_compilation_unit_,
                      dex_compilation_unit,
@@ -533,9 +683,9 @@
   HNullConstant* null_constant = graph_->GetNullConstant();
   if (!null_constant->GetReferenceTypeInfo().IsValid()) {
     ReferenceTypeInfo::TypeHandle obj_handle =
-            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
+        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
     null_constant->SetReferenceTypeInfo(
-            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
   }
 
   // Check the integrity of reference types and run another type propagation if needed.
@@ -554,14 +704,16 @@
          return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
 
-    // If the return type is a refinement of the declared type run the type propagation again.
-    ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-    ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-    if (invoke_rti.IsStrictSupertypeOf(return_rti)
-        || (return_rti.IsExact() && !invoke_rti.IsExact())
-        || !return_replacement->CanBeNull()) {
-      ReferenceTypePropagation rtp_fixup(graph_, handles_);
-      rtp_fixup.Run();
+    if (do_rtp) {
+      // If the return type is a refinement of the declared type run the type propagation again.
+      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+      if (invoke_rti.IsStrictSupertypeOf(return_rti)
+          || (return_rti.IsExact() && !invoke_rti.IsExact())
+          || !return_replacement->CanBeNull()) {
+        ReferenceTypePropagation rtp_fixup(graph_, handles_);
+        rtp_fixup.Run();
+      }
     }
   }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 0f6a945..7b9fb73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -27,11 +27,13 @@
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
+class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           HGraph* outermost_graph,
            CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
@@ -40,6 +42,7 @@
            OptimizingCompilerStats* stats,
            size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
+        outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
@@ -54,10 +57,33 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction);
+
+  // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
+  // reference type propagation can run after the inlining.
+  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline the target of a monomorphic call. If successful, the code
+  // in the graph will look like:
+  // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
+  // ... // inlined code
+  bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool TryBuildAndInline(ArtMethod* resolved_method,
                          HInvoke* invoke_instruction,
-                         bool same_dex_file);
+                         bool same_dex_file,
+                         bool do_rtp = true);
 
+  HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 3268445..9f16462 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -230,13 +230,16 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-static void GenReverseBytes(LocationSummary* locations,
-                            Primitive::Type type,
-                            MipsAssembler* assembler,
-                            bool isR2OrNewer) {
+static void GenReverse(LocationSummary* locations,
+                       Primitive::Type type,
+                       bool isR2OrNewer,
+                       bool isR6,
+                       bool reverseBits,
+                       MipsAssembler* assembler) {
   DCHECK(type == Primitive::kPrimShort ||
          type == Primitive::kPrimInt ||
          type == Primitive::kPrimLong);
+  DCHECK(type != Primitive::kPrimShort || !reverseBits);
 
   if (type == Primitive::kPrimShort) {
     Register in = locations->InAt(0).AsRegister<Register>();
@@ -273,6 +276,30 @@
       __ And(out, out, AT);
       __ Or(out, out, TMP);
     }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out, out);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out, out, 4);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out, out, 2);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out, out, 1);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+      }
+    }
   } else if (type == Primitive::kPrimLong) {
     Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
@@ -314,6 +341,46 @@
       __ And(out_lo, out_lo, AT);
       __ Or(out_lo, out_lo, TMP);
     }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out_hi, out_hi);
+        __ Bitswap(out_lo, out_lo);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_hi, out_hi, 4);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_lo, out_lo, 4);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_hi, out_hi, 2);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_lo, out_lo, 2);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_hi, out_hi, 1);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_lo, out_lo, 1);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+      }
+    }
   }
 }
 
@@ -323,10 +390,12 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(),
-                  Primitive::kPrimInt,
-                  GetAssembler(),
-                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(),
+             codegen_->GetInstructionSetFeatures().IsR6(),
+             false,
+             GetAssembler());
 }
 
 // long java.lang.Long.reverseBytes(long)
@@ -335,10 +404,12 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(),
-                  Primitive::kPrimLong,
-                  GetAssembler(),
-                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(),
+             codegen_->GetInstructionSetFeatures().IsR6(),
+             false,
+             GetAssembler());
 }
 
 // short java.lang.Short.reverseBytes(short)
@@ -347,10 +418,40 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(),
-                  Primitive::kPrimShort,
-                  GetAssembler(),
-                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimShort,
+             codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(),
+             codegen_->GetInstructionSetFeatures().IsR6(),
+             false,
+             GetAssembler());
+}
+
+// int java.lang.Integer.reverse(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(),
+             codegen_->GetInstructionSetFeatures().IsR6(),
+             true,
+             GetAssembler());
+}
+
+// long java.lang.Long.reverse(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(),
+             codegen_->GetInstructionSetFeatures().IsR6(),
+             true,
+             GetAssembler());
 }
 
 // boolean java.lang.String.equals(Object anObject)
@@ -463,8 +564,6 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..02befc0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -121,6 +121,8 @@
           // phi in it.
           if (instruction->NeedsEnvironment()) {
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
+          } else {
+            DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
         } else if (instruction->CanThrow()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 19614f1..9d3c88c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -371,6 +371,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  ArtMethod* GetArtMethod() const { return art_method_; }
+  void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+
   // Returns an instruction with the opposite boolean value from 'cond'.
   // The instruction has been inserted into the graph, either as a constant, or
   // before cursor.
@@ -479,6 +482,11 @@
 
   HCurrentMethod* cached_current_method_;
 
+  // The ArtMethod this graph is for. Note that for AOT, it may be null,
+  // for example for methods whose declaring class could not be resolved
+  // (such as when the superclass could not be found).
+  ArtMethod* art_method_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -2462,11 +2470,15 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
-  explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cae2d3f..3495603 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -427,7 +427,7 @@
     return;
   }
   HInliner* inliner = new (graph->GetArena()) HInliner(
-    graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -763,8 +763,8 @@
     ArtMethod* art_method = compiler_driver->ResolveMethod(
         soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
     // We may not get a method, for example if its class is erroneous.
-    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
     if (art_method != nullptr) {
+      graph->SetArtMethod(art_method);
       interpreter_metadata = art_method->GetQuickenedInfo();
     }
   }
@@ -948,6 +948,7 @@
   if (stack_map_data == nullptr) {
     return false;
   }
+  MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
   const void* code = code_cache->CommitCode(
       self,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index e5ea0f5..6296eed 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -49,6 +49,10 @@
   kNotCompiledUnsupportedIsa,
   kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
+  kInlinedMonomorphicCall,
+  kMonomorphicCall,
+  kPolymorphicCall,
+  kMegamorphicCall,
   kLastStat
 };
 
@@ -111,6 +115,10 @@
       case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
       case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
       case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kMonomorphicCall: name = "MonomorphicCall"; break;
+      case kPolymorphicCall: name = "PolymorphicCall"; break;
+      case kMegamorphicCall: name = "kMegamorphicCall"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index fc7ac70..86e5762 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -314,6 +314,11 @@
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
 }
 
+void MipsAssembler::Bitswap(Register rd, Register rt) {
+  CHECK(IsR6());
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20);
+}
+
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
   CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 1ef0992..6a37cc9 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -136,6 +136,7 @@
   void Seb(Register rd, Register rt);  // R2+
   void Seh(Register rd, Register rt);  // R2+
   void Wsbh(Register rd, Register rt);  // R2+
+  void Bitswap(Register rd, Register rt);  // R6
 
   void Sll(Register rd, Register rt, int shamt);
   void Srl(Register rd, Register rt, int shamt);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 77211ce..a1485e4 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -74,7 +74,6 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d6a8c9..a926ca5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,11 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+// from kernel <include/linux/threads.h>
+#define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
+
+static const pid_t kImgDiagGuaranteedBadPid = (PID_MAX_LIMIT + 1);
+
 class ImgDiagTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
@@ -132,7 +137,8 @@
 
   // Run imgdiag --image-diff-pid=some_bad_pid and wait until it's done with a 0 exit code.
   std::string error_msg;
-  ASSERT_FALSE(ExecDefaultBootImage(-12345, &error_msg)) << "Incorrectly executed";
+  ASSERT_FALSE(ExecDefaultBootImage(kImgDiagGuaranteedBadPid,
+                                    &error_msg)) << "Incorrectly executed";
   UNUSED(error_msg);
 }
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 6199808..5833129 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -42,6 +42,8 @@
 #include "gc/space/space-inl.h"
 #include "image.h"
 #include "indenter.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -52,7 +54,6 @@
 #include "oat_file-inl.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 47f2569..238d9f3 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -298,7 +298,9 @@
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
         mirror::Throwable* pending_exception = nullptr;
-        self->PopDeoptimizationContext(result, &pending_exception);
+        bool from_code = false;
+        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
+        CHECK(!from_code);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
 
@@ -307,7 +309,7 @@
         if (pending_exception != nullptr) {
           self->SetException(pending_exception);
         }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
+        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index dfd9fcd..c019cae 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -52,7 +52,7 @@
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
-  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
 
   QuickExceptionHandler exception_handler(self, true);
   exception_handler.DeoptimizeSingleFrame();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index c41ee45..2c8ed88 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -685,7 +685,9 @@
     }
 
     mirror::Throwable* pending_exception = nullptr;
-    self->PopDeoptimizationContext(&result, &pending_exception);
+    bool from_code = false;
+    self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
+    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -712,7 +714,7 @@
     if (pending_exception != nullptr) {
       self->SetException(pending_exception);
     }
-    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
@@ -754,7 +756,8 @@
   if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
-    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+    self->PushDeoptimizationContext(
+        result, shorty[0] == 'L', /* from_code */ false, self->GetException());
 
     // Set special exception to cause deoptimization.
     self->SetException(Thread::GetDeoptimizationException());
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 9c8e4df..7d00094 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1526,10 +1526,9 @@
   }
 }
 
+// Below may be called by mutator itself just before thread termination.
 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
-  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  ReaderMutexLock wmu(self, bulk_free_lock_);
   size_t free_bytes = 0U;
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
@@ -1544,10 +1543,17 @@
       // Count the number of free slots left.
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
+      // The above bracket index lock guards thread local free list to avoid race condition
+      // with unioning bulk free list to thread local free list by GC thread in BulkFree.
+      // If thread local run is true, GC thread will help update thread local free list
+      // in BulkFree. And the latest thread local free list will be merged to free list
+      // either when this thread local run is full or when revoking this run here. In this
+      // case the free list wll be updated. If thread local run is false, GC thread will help
+      // merge bulk free list in next BulkFree.
+      // Thus no need to merge bulk free list to free list again here.
       bool dont_care;
       thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index bc2c197..264cd2c 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1062,7 +1062,9 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+    self->PushDeoptimizationContext(return_value,
+                                    return_shorty == 'L',
+                                    false /* from_code */,
                                     nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d686f74..871fad7 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -397,7 +397,10 @@
   self->PopShadowFrame();
 }
 
-void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JValue* ret_val)
+void EnterInterpreterFromDeoptimize(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    bool from_code,
+                                    JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
@@ -408,7 +411,7 @@
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
-    uint32_t new_dex_pc;
+    uint32_t new_dex_pc = dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
       // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
       // the instrumentation. To prevent from reporting it a second time, we simply pass a
@@ -419,11 +422,16 @@
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
                                   // or DexFile::kDexNoIndex if there is none.
-    } else {
-      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      // For an invoke, use the dex pc of the next instruction.
+    } else if (!from_code) {
+      // For the debugger and full deoptimization stack, we must go past the invoke
+      // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
-      new_dex_pc = dex_pc + (instr->IsInvoke() ? instr->SizeInCodeUnits() : 0);
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      DCHECK(instr->IsInvoke());
+      new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+    } else {
+      // Nothing to do, the dex_pc is the one at which the code requested
+      // the deoptimization.
     }
     if (new_dex_pc != DexFile::kDexNoIndex) {
       shadow_frame->SetDexPC(new_dex_pc);
@@ -432,6 +440,8 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    // Following deoptimizations of shadow frames must pass the invoke instruction.
+    from_code = false;
     first = false;
   }
   ret_val->SetJ(value.GetJ());
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index b21ea84..8e7f3da 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -37,7 +37,8 @@
                                        mirror::Object* receiver, uint32_t* args, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame,
+// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
+extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
                                            JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 1b30862..92aa86e 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -142,11 +142,24 @@
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
   DCHECK(!method->IsRuntimeMethod());
+  // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
     return false;
   }
-  return jit_compile_method_(jit_compiler_handle_, method, self);
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    return false;
+  }
+
+  if (!code_cache_->NotifyCompilationOf(method, self)) {
+    return false;
+  }
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  code_cache_->DoneCompiling(method, self);
+  return success;
 }
 
 void Jit::CreateThreadPool() {
@@ -175,7 +188,7 @@
 
   uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
   if (offline_profile_info_->NeedsSaving(last_update_ns)) {
-    VLOG(profiler) << "Initiate save profiling information to: " << filename;
+    VLOG(profiler) << "Iniate save profiling information to: " << filename;
     std::set<ArtMethod*> methods;
     {
       ScopedObjectAccess soa(Thread::Current());
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 804d69f..3342e92 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -536,7 +536,9 @@
       instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
+      if (!info->IsMethodBeingCompiled()) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+      }
     }
   }
 
@@ -577,12 +579,17 @@
       }
     }
 
-    // Free all profiling info.
-    for (ProfilingInfo* info : profiling_infos_) {
-      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
-    }
-    profiling_infos_.clear();
+    void* data_mspace = data_mspace_;
+    // Free all profiling infos of methods that were not being compiled.
+    auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
+      [data_mspace] (ProfilingInfo* info) {
+        if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
+          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          return true;
+        }
+        return false;
+      });
+    profiling_infos_.erase(profiling_kept_end, profiling_infos_.end());
 
     live_bitmap_.reset(nullptr);
     has_done_one_collection_ = true;
@@ -643,7 +650,7 @@
                                                       ArtMethod* method,
                                                       const std::vector<uint32_t>& entries) {
   size_t profile_info_size = RoundUp(
-      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
       sizeof(void*));
   ScopedThreadSuspension sts(self, kSuspended);
   MutexLock mu(self, lock_);
@@ -694,5 +701,25 @@
   MutexLock mu(Thread::Current(), lock_);
   return last_update_time_ns_;
 }
+
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
+  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info == nullptr || info->IsMethodBeingCompiled()) {
+    return false;
+  }
+  info->SetIsMethodBeingCompiled(true);
+  return true;
+}
+
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  DCHECK(info->IsMethodBeingCompiled());
+  info->SetIsMethodBeingCompiled(false);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index acd7c62..4032c7b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -66,6 +66,14 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompiling(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index 7615870..4450653 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -68,6 +68,7 @@
   }
 }
 
+
 void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
   DCHECK(method != nullptr);
   const DexFile* dex_file = method->GetDexFile();
@@ -79,25 +80,11 @@
   info_it->second.insert(method->GetDexMethodIndex());
 }
 
-enum OpenMode {
-  READ,
-  READ_WRITE
-};
-
-static int OpenFile(const std::string& filename, OpenMode open_mode) {
-  int fd = -1;
-  switch (open_mode) {
-    case READ:
-      fd = open(filename.c_str(), O_RDONLY);
-      break;
-    case READ_WRITE:
-      // TODO(calin) allow the shared uid of the app to access the file.
-      fd = open(filename.c_str(),
-                    O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
-                    S_IRUSR | S_IWUSR);
-      break;
-  }
-
+static int OpenOrCreateFile(const std::string& filename) {
+  // TODO(calin) allow the shared uid of the app to access the file.
+  int fd = open(filename.c_str(),
+                O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                S_IRUSR | S_IWUSR);
   if (fd < 0) {
     PLOG(WARNING) << "Failed to open profile file " << filename;
     return -1;
@@ -109,6 +96,7 @@
     PLOG(WARNING) << "Failed to lock profile file " << filename;
     return -1;
   }
+
   return fd;
 }
 
@@ -141,8 +129,8 @@
   } while (length > 0);
 }
 
-static constexpr const char kFieldSeparator = ',';
-static constexpr const char kLineSeparator = '\n';
+static constexpr char kFieldSeparator = ',';
+static constexpr char kLineSeparator = '\n';
 
 /**
  * Serialization format:
@@ -154,7 +142,7 @@
  **/
 bool OfflineProfilingInfo::Serialize(const std::string& filename,
                                      const DexFileToMethodsMap& info) const {
-  int fd = OpenFile(filename, READ_WRITE);
+  int fd = OpenOrCreateFile(filename);
   if (fd == -1) {
     return false;
   }
@@ -180,212 +168,4 @@
 
   return CloseDescriptorForFile(fd, filename);
 }
-
-// TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
-// is the separator. Merge the fix into Utils::Split once verified that it doesn't break its users.
-static void SplitString(const std::string& s, char separator, std::vector<std::string>* result) {
-  const char* p = s.data();
-  const char* end = p + s.size();
-  // Check if the first character is the separator.
-  if (p != end && *p ==separator) {
-    result->push_back("");
-    ++p;
-  }
-  // Process the rest of the characters.
-  while (p != end) {
-    if (*p == separator) {
-      ++p;
-    } else {
-      const char* start = p;
-      while (++p != end && *p != separator) {
-        // Skip to the next occurrence of the separator.
-      }
-      result->push_back(std::string(start, p - start));
-    }
-  }
-}
-
-bool ProfileCompilationInfo::ProcessLine(const std::string& line,
-                                         const std::vector<const DexFile*>& dex_files) {
-  std::vector<std::string> parts;
-  SplitString(line, kFieldSeparator, &parts);
-  if (parts.size() < 3) {
-    LOG(WARNING) << "Invalid line: " << line;
-    return false;
-  }
-
-  const std::string& multidex_suffix = parts[0];
-  uint32_t checksum;
-  if (!ParseInt(parts[1].c_str(), &checksum)) {
-    return false;
-  }
-
-  const DexFile* current_dex_file = nullptr;
-  for (auto dex_file : dex_files) {
-    if (DexFile::GetMultiDexSuffix(dex_file->GetLocation()) == multidex_suffix) {
-      if (checksum != dex_file->GetLocationChecksum()) {
-        LOG(WARNING) << "Checksum mismatch for "
-            << dex_file->GetLocation() << " when parsing " << filename_;
-        return false;
-      }
-      current_dex_file = dex_file;
-      break;
-    }
-  }
-  if (current_dex_file == nullptr) {
-    return true;
-  }
-
-  for (size_t i = 2; i < parts.size(); i++) {
-    uint32_t method_idx;
-    if (!ParseInt(parts[i].c_str(), &method_idx)) {
-      LOG(WARNING) << "Cannot parse method_idx " << parts[i];
-      return false;
-    }
-    uint16_t class_idx = current_dex_file->GetMethodId(method_idx).class_idx_;
-    auto info_it = info_.find(current_dex_file);
-    if (info_it == info_.end()) {
-      info_it = info_.Put(current_dex_file, ClassToMethodsMap());
-    }
-    ClassToMethodsMap& class_map = info_it->second;
-    auto class_it = class_map.find(class_idx);
-    if (class_it == class_map.end()) {
-      class_it = class_map.Put(class_idx, std::set<uint32_t>());
-    }
-    class_it->second.insert(method_idx);
-  }
-  return true;
-}
-
-// Parses the buffer (of length n) starting from start_from and identify new lines
-// based on kLineSeparator marker.
-// Returns the first position after kLineSeparator in the buffer (starting from start_from),
-// or -1 if the marker doesn't appear.
-// The processed characters are appended to the given line.
-static int GetLineFromBuffer(char* buffer, int n, int start_from, std::string& line) {
-  if (start_from >= n) {
-    return -1;
-  }
-  int new_line_pos = -1;
-  for (int i = start_from; i < n; i++) {
-    if (buffer[i] == kLineSeparator) {
-      new_line_pos = i;
-      break;
-    }
-  }
-  int append_limit = new_line_pos == -1 ? n : new_line_pos;
-  line.append(buffer + start_from, append_limit - start_from);
-  // Jump over kLineSeparator and return the position of the next character.
-  return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
-}
-
-bool ProfileCompilationInfo::Load(const std::vector<const DexFile*>& dex_files) {
-  if (dex_files.empty()) {
-    return true;
-  }
-  if (kIsDebugBuild) {
-    // In debug builds verify that the multidex suffixes are unique.
-    std::set<std::string> suffixes;
-    for (auto dex_file : dex_files) {
-      std::string multidex_suffix = DexFile::GetMultiDexSuffix(dex_file->GetLocation());
-      DCHECK(suffixes.find(multidex_suffix) == suffixes.end())
-          << "DexFiles appear to belong to different apks."
-          << " There are multiple dex files with the same multidex suffix: "
-          << multidex_suffix;
-      suffixes.insert(multidex_suffix);
-    }
-  }
-  info_.clear();
-
-  int fd = OpenFile(filename_, READ);
-  if (fd == -1) {
-    return false;
-  }
-
-  std::string current_line;
-  const int kBufferSize = 1024;
-  char buffer[kBufferSize];
-  bool success = true;
-
-  while (success) {
-    int n = read(fd, buffer, kBufferSize);
-    if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file " << filename_;
-      success = false;
-      break;
-    } else if (n == 0) {
-      break;
-    }
-    // Detect the new lines from the buffer. If we manage to complete a line,
-    // process it. Otherwise append to the current line.
-    int current_start_pos = 0;
-    while (current_start_pos < n) {
-      current_start_pos = GetLineFromBuffer(buffer, n, current_start_pos, current_line);
-      if (current_start_pos == -1) {
-        break;
-      }
-      if (!ProcessLine(current_line, dex_files)) {
-        success = false;
-        break;
-      }
-      // Reset the current line (we just processed it).
-      current_line.clear();
-    }
-  }
-  if (!success) {
-    info_.clear();
-  }
-  return CloseDescriptorForFile(fd, filename_) && success;
-}
-
-bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
-  auto info_it = info_.find(method_ref.dex_file);
-  if (info_it != info_.end()) {
-    uint16_t class_idx = method_ref.dex_file->GetMethodId(method_ref.dex_method_index).class_idx_;
-    const ClassToMethodsMap& class_map = info_it->second;
-    auto class_it = class_map.find(class_idx);
-    if (class_it != class_map.end()) {
-      const std::set<uint32_t>& methods = class_it->second;
-      return methods.find(method_ref.dex_method_index) != methods.end();
-    }
-    return false;
-  }
-  return false;
-}
-
-std::string ProfileCompilationInfo::DumpInfo(bool print_full_dex_location) const {
-  std::ostringstream os;
-  if (info_.empty()) {
-    return "ProfileInfo: empty";
-  }
-
-  os << "ProfileInfo:";
-
-  // Use an additional map to achieve a predefined order based on the dex locations.
-  SafeMap<const std::string, const DexFile*> dex_locations_map;
-  for (auto info_it : info_) {
-    dex_locations_map.Put(info_it.first->GetLocation(), info_it.first);
-  }
-
-  const std::string kFirstDexFileKeySubstitute = ":classes.dex";
-  for (auto dex_file_it : dex_locations_map) {
-    os << "\n";
-    const std::string& location = dex_file_it.first;
-    const DexFile* dex_file = dex_file_it.second;
-    if (print_full_dex_location) {
-      os << location;
-    } else {
-      // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
-      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
-      os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
-    }
-    for (auto class_it : info_.find(dex_file)->second) {
-      for (auto method_it : class_it.second) {
-        os << "\n  " << PrettyMethod(method_it, *dex_file, true);
-      }
-    }
-  }
-  return os.str();
-}
-
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 90bda60..e3117eb 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -21,7 +21,6 @@
 
 #include "atomic.h"
 #include "dex_file.h"
-#include "method_reference.h"
 #include "safe_map.h"
 
 namespace art {
@@ -51,47 +50,10 @@
   bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
 
   // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
-  // single thread?)
+  // singe thread?)
   Atomic<uint64_t> last_update_time_ns_;
 };
 
-/**
- * Profile information in a format suitable to be queried by the compiler and performing
- * profile guided compilation.
- */
-class ProfileCompilationInfo {
- public:
-  // Constructs a ProfileCompilationInfo backed by the provided file.
-  explicit ProfileCompilationInfo(const std::string& filename) : filename_(filename) {}
-
-  // Loads profile information corresponding to the provided dex files.
-  // The dex files' multidex suffixes must be unique.
-  // This resets the state of the profiling information
-  // (i.e. all previously loaded info are cleared).
-  bool Load(const std::vector<const DexFile*>& dex_files);
-
-  // Returns true if the method reference is present in the profiling info.
-  bool ContainsMethod(const MethodReference& method_ref) const;
-
-  const std::string& GetFilename() const { return filename_; }
-
-  // Dumps all the loaded profile info into a string and returns it.
-  // This is intended for testing and debugging.
-  std::string DumpInfo(bool print_full_dex_location = true) const;
-
- private:
-  bool ProcessLine(const std::string& line,
-                   const std::vector<const DexFile*>& dex_files);
-
-  using ClassToMethodsMap = SafeMap<uint32_t, std::set<uint32_t>>;
-  // Map identifying the location of the profiled methods.
-  // dex_file -> class_index -> [dex_method_index]+
-  using DexFileToProfileInfoMap = SafeMap<const DexFile*, ClassToMethodsMap>;
-
-  const std::string filename_;
-  DexFileToProfileInfoMap info_;
-};
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2e52b1b..dcb346c 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -54,28 +54,29 @@
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
-  // object, it will never be filled.
-  if (entries.empty()) {
-    return true;
-  }
+  // We always create a `ProfilingInfo` object, even if there is no instruction we are
+  // interested in. The JIT code cache internally uses it.
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
   return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+InlineCache* ProfilingInfo::GetInlineCache(uint32_t dex_pc) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-    if (cache_[i].dex_pc == dex_pc) {
+    if (cache_[i].dex_pc_ == dex_pc) {
       cache = &cache_[i];
       break;
     }
   }
   DCHECK(cache != nullptr);
+  return cache;
+}
 
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = GetInlineCache(dex_pc);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index b13a315..ddaf02f 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -25,6 +25,7 @@
 namespace art {
 
 class ArtMethod;
+class ProfilingInfo;
 
 namespace jit {
 class JitCodeCache;
@@ -34,6 +35,49 @@
 class Class;
 }
 
+// Structure to store the classes seen at runtime for a specific instruction.
+// Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+class InlineCache {
+ public:
+  bool IsMonomorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 2);
+    return !classes_[0].IsNull() && classes_[1].IsNull();
+  }
+
+  bool IsMegamorphic() const {
+    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+      if (classes_[i].IsNull()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we cannot ensure the inline cache is actually monomorphic
+    // at this point, as other threads may have updated it.
+    return classes_[0].Read();
+  }
+
+  bool IsUnitialized() const {
+    return classes_[0].IsNull();
+  }
+
+  bool IsPolymorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 3);
+    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+  }
+
+ private:
+  static constexpr uint16_t kIndividualCacheSize = 5;
+  uint32_t dex_pc_;
+  GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+
+  friend class ProfilingInfo;
+
+  DISALLOW_COPY_AND_ASSIGN(InlineCache);
+};
+
 /**
  * Profiling info for a method, created and filled by the interpreter once the
  * method is warm, and used by the compiler to drive optimizations.
@@ -67,44 +111,24 @@
     return method_;
   }
 
+  InlineCache* GetInlineCache(uint32_t dex_pc);
+
+  bool IsMethodBeingCompiled() const {
+    return is_method_being_compiled_;
+  }
+
+  void SetIsMethodBeingCompiled(bool value) {
+    is_method_being_compiled_ = value;
+  }
+
  private:
-  // Structure to store the classes seen at runtime for a specific instruction.
-  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
-  struct InlineCache {
-    bool IsMonomorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 2);
-      return !classes_[0].IsNull() && classes_[1].IsNull();
-    }
-
-    bool IsMegamorphic() const {
-      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-        if (classes_[i].IsNull()) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    bool IsUnitialized() const {
-      return classes_[0].IsNull();
-    }
-
-    bool IsPolymorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 3);
-      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-    }
-
-    static constexpr uint16_t kIndividualCacheSize = 5;
-    uint32_t dex_pc;
-    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
-  };
-
   ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
       : number_of_inline_caches_(entries.size()),
-        method_(method) {
+        method_(method),
+        is_method_being_compiled_(false) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc = entries[i];
+      cache_[i].dex_pc_ = entries[i];
     }
   }
 
@@ -114,6 +138,11 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Whether the ArtMethod is currently being compiled. This flag
+  // is implicitly guarded by the JIT code cache lock.
+  // TODO: Make the JIT code cache lock global.
+  bool is_method_being_compiled_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 649df5f..d1687d7 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -2210,4 +2210,55 @@
   check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
 }
 
+static bool IsLocked(JNIEnv* env, jobject jobj) {
+  ScopedObjectAccess soa(env);
+  LockWord lock_word = soa.Decode<mirror::Object*>(jobj)->GetLockWord(true);
+  switch (lock_word.GetState()) {
+    case LockWord::kHashCode:
+    case LockWord::kUnlocked:
+      return false;
+    case LockWord::kThinLocked:
+      return true;
+    case LockWord::kFatLocked:
+      return lock_word.FatLockMonitor()->IsLocked();
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      UNREACHABLE();
+    }
+  }
+}
+
+TEST_F(JniInternalTest, DetachThreadUnlockJNIMonitors) {
+  // We need to lock an object, detach, reattach, and check the locks.
+  //
+  // As re-attaching will create a different thread, we need to use a global
+  // ref to keep the object around.
+
+  // Create an object to torture.
+  jobject global_ref;
+  {
+    jclass object_class = env_->FindClass("java/lang/Object");
+    ASSERT_NE(object_class, nullptr);
+    jobject object = env_->AllocObject(object_class);
+    ASSERT_NE(object, nullptr);
+    global_ref = env_->NewGlobalRef(object);
+  }
+
+  // Lock it.
+  env_->MonitorEnter(global_ref);
+  ASSERT_TRUE(IsLocked(env_, global_ref));
+
+  // Detach and re-attach.
+  jint detach_result = vm_->DetachCurrentThread();
+  ASSERT_EQ(detach_result, JNI_OK);
+  jint attach_result = vm_->AttachCurrentThread(&env_, nullptr);
+  ASSERT_EQ(attach_result, JNI_OK);
+
+  // Look at the global ref, check whether it's still locked.
+  ASSERT_FALSE(IsLocked(env_, global_ref));
+
+  // Delete the global ref.
+  env_->DeleteGlobalRef(global_ref);
+}
+
 }  // namespace art
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 1552318..9cb37ee 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -283,7 +283,12 @@
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
-        single_frame_done_(false) {
+        single_frame_done_(false),
+        single_frame_deopt_method_(nullptr) {
+  }
+
+  ArtMethod* GetSingleFrameDeoptMethod() const {
+    return single_frame_deopt_method_;
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -356,6 +361,7 @@
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
+        single_frame_deopt_method_ = method;
       }
       return true;
     }
@@ -586,6 +592,7 @@
   bool stacked_shadow_frame_pushed_;
   const bool single_frame_deopt_;
   bool single_frame_done_;
+  ArtMethod* single_frame_deopt_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -614,6 +621,14 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
+  // Compiled code made an explicit deoptimization. Transfer the code
+  // to interpreter and clear the counter to JIT the method again.
+  ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
+  DCHECK(deopt_method != nullptr);
+  deopt_method->ClearCounter();
+  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+      deopt_method, GetQuickToInterpreterBridge());
+
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
   #ifdef __LP64__
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 3e7b26d..fe8eb0d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -219,6 +219,8 @@
     UnloadNativeBridge();
   }
 
+  MaybeSaveJitProfilingInfo();
+
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 63e6326..90539b4 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -164,14 +164,20 @@
 
 class DeoptimizationContextRecord {
  public:
-  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+  DeoptimizationContextRecord(const JValue& ret_val,
+                              bool is_reference,
+                              bool from_code,
                               mirror::Throwable* pending_exception,
                               DeoptimizationContextRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+      : ret_val_(ret_val),
+        is_reference_(is_reference),
+        from_code_(from_code),
+        pending_exception_(pending_exception),
         link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
+  bool GetFromCode() const { return from_code_; }
   mirror::Throwable* GetPendingException() const { return pending_exception_; }
   DeoptimizationContextRecord* GetLink() const { return link_; }
   mirror::Object** GetReturnValueAsGCRoot() {
@@ -189,6 +195,9 @@
   // Indicates whether the returned value is a reference. If so, the GC will visit it.
   const bool is_reference_;
 
+  // Whether the context was created from an explicit deoptimization in the code.
+  const bool from_code_;
+
   // The exception that was pending before deoptimization (or null if there was no pending
   // exception).
   mirror::Throwable* pending_exception_;
@@ -220,22 +229,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+void Thread::PushDeoptimizationContext(const JValue& return_value,
+                                       bool is_reference,
+                                       bool from_code,
                                        mirror::Throwable* exception) {
   DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
       return_value,
       is_reference,
+      from_code,
       exception,
       tlsPtr_.deoptimization_context_stack);
   tlsPtr_.deoptimization_context_stack = record;
 }
 
-void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+void Thread::PopDeoptimizationContext(JValue* result,
+                                      mirror::Throwable** exception,
+                                      bool* from_code) {
   AssertHasDeoptimizationContext();
   DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
   tlsPtr_.deoptimization_context_stack = record->GetLink();
   result->SetJ(record->GetReturnValue().GetJ());
   *exception = record->GetPendingException();
+  *from_code = record->GetFromCode();
   delete record;
 }
 
@@ -2546,7 +2561,8 @@
     if (is_deoptimization) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
-      PushDeoptimizationContext(JValue(), false, exception);
+      PushDeoptimizationContext(
+          JValue(), /*is_reference */ false, /* from_code */ false, exception);
     }
   }
   // Don't leave exception visible while we try to find the handler, which may cause class
diff --git a/runtime/thread.h b/runtime/thread.h
index 4624f27..3abb3cf 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -849,10 +849,14 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+  // 'from_code' denotes whether the deoptimization was explicitly made from
+  // compiled code.
+  void PushDeoptimizationContext(const JValue& return_value,
+                                 bool is_reference,
+                                 bool from_code,
                                  mirror::Throwable* exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception, bool* from_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 34d2f64..3f6e48b 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -415,6 +415,135 @@
     return result;
   }
 
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnTwoArrayLengths(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < b.length; j++) {
+        // Need to know j < b.length < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < b.length for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnOneArrayLength(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnParameter(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < n for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  // Verifier for triangular methods.
+  private static void verifyTriangular(int[] a, int[] b, int m, int n) {
+    expectEquals(n, a.length);
+    for (int i = 0, k = m; i < n; i++) {
+      expectEquals(a[i], k);
+      if (k > 0) k--;
+    }
+    expectEquals(m, b.length);
+    for (int i = 0; i < m; i++) {
+      expectEquals(b[i], 1);
+    }
+  }
+
+  /// CHECK-START: void Main.bubble(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.bubble(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void bubble(int[] a) {
+    for (int i = a.length; --i >= 0;) {
+      for (int j = 0; j < i; j++) {
+        if (a[j] > a[j+1]) {
+          int tmp = a[j];
+          a[j]  = a[j+1];
+          a[j+1] = tmp;
+        }
+      }
+    }
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
@@ -1012,6 +1141,16 @@
     expectEquals(55, linearDoWhileDown());
     expectEquals(55, linearShort());
     expectEquals(55, invariantFromPreLoop(x, 1));
+    linearTriangularOnTwoArrayLengths(10);
+    linearTriangularOnOneArrayLength(10);
+    linearTriangularOnParameter(10);
+
+    // Sorting.
+    int[] sort = { 5, 4, 1, 9, 10, 2, 7, 6, 3, 8 };
+    bubble(sort);
+    for (int i = 0; i < 10; i++) {
+      expectEquals(sort[i], x[i]);
+    }
 
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
diff --git a/test/554-jit-profile-file/expected.txt b/test/554-jit-profile-file/expected.txt
deleted file mode 100644
index cde211e..0000000
--- a/test/554-jit-profile-file/expected.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-JNI_OnLoad called
-ProfileInfo:
-:classes.dex
-  java.lang.String Main.hotMethod()
-  void Main.main(java.lang.String[])
-:classes2.dex
-  java.lang.String OtherDex.hotMethod()
diff --git a/test/554-jit-profile-file/info.txt b/test/554-jit-profile-file/info.txt
deleted file mode 100644
index b1bfe81..0000000
--- a/test/554-jit-profile-file/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Check that saving and restoring profile files works correctly in a JIT environment.
diff --git a/test/554-jit-profile-file/offline_profile.cc b/test/554-jit-profile-file/offline_profile.cc
deleted file mode 100644
index c63073d..0000000
--- a/test/554-jit-profile-file/offline_profile.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dex_file.h"
-
-#include "jit/offline_profiling_info.h"
-#include "jni.h"
-#include "mirror/class-inl.h"
-#include "oat_file_assistant.h"
-#include "oat_file_manager.h"
-#include "scoped_thread_state_change.h"
-#include "thread.h"
-
-namespace art {
-namespace {
-
-extern "C" JNIEXPORT jstring JNICALL Java_Main_getProfileInfoDump(
-    JNIEnv* env, jclass, jstring filename, jclass cls_from_primary, jclass cls_from_secondary) {
-  // Note:
-  // Ideally we would get the dex list from the primary oat file.
-  // e.g.
-  //   oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile();
-  //   dex_files = OatFileAssistant::LoadDexFiles(*oat_file, dex_location.c_str());
-  // However the ownership of the pointers is complicated since the primary file
-  // already exists and the test crashed sporadically because some data changes under
-  // our feet.
-  // To simplify things get the dex files from the classes passed as arguments.
-  const DexFile* dex_primary;
-  const DexFile* dex_secondary;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    dex_primary = soa.Decode<mirror::Class*>(cls_from_primary)->GetDexCache()->GetDexFile();
-    dex_secondary = soa.Decode<mirror::Class*>(cls_from_secondary)->GetDexCache()->GetDexFile();
-  }
-
-  std::vector<const DexFile*> dex_files;
-  dex_files.push_back(dex_primary);
-  dex_files.push_back(dex_secondary);
-
-  const char* filename_chars = env->GetStringUTFChars(filename, nullptr);
-  ProfileCompilationInfo info(filename_chars);
-  const char* result = info.Load(dex_files)
-      ? info.DumpInfo(/*print_full_dex_location*/false).c_str()
-      : nullptr;
-  env->ReleaseStringUTFChars(filename, filename_chars);
-  return env->NewStringUTF(result);
-}
-
-}  // namespace
-}  // namespace art
diff --git a/test/554-jit-profile-file/run b/test/554-jit-profile-file/run
deleted file mode 100644
index 08dcb38..0000000
--- a/test/554-jit-profile-file/run
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-exec ${RUN} \
-  -Xcompiler-option --compiler-filter=interpret-only \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  --runtime-option -Xjitthreshold:100 \
-  "${@}"
diff --git a/test/554-jit-profile-file/src-multidex/OtherDex.java b/test/554-jit-profile-file/src-multidex/OtherDex.java
deleted file mode 100644
index 51644db..0000000
--- a/test/554-jit-profile-file/src-multidex/OtherDex.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.HashMap;
-
-public class OtherDex {
-  public void coldMethod() {
-    hotMethod();
-  }
-
-  public String hotMethod() {
-    HashMap<String, String> map = new HashMap<String, String>();
-    for (int i = 0; i < 10; i++) {
-      map.put("" + i, "" + i + 1);
-    }
-    return map.get("1");
-  }
-}
diff --git a/test/554-jit-profile-file/src/Main.java b/test/554-jit-profile-file/src/Main.java
deleted file mode 100644
index ba613ae..0000000
--- a/test/554-jit-profile-file/src/Main.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.util.HashMap;
-
-public class Main {
-
-  public void coldMethod() {
-    hotMethod();
-  }
-
-  public String hotMethod() {
-    HashMap<String, String> map = new HashMap<String, String>();
-    for (int i = 0; i < 10; i++) {
-      map.put("" + i, "" + i + 1);
-    }
-    return map.get("1");
-  }
-
-  private static final String PKG_NAME = "test.package";
-  private static final String PROFILE_FILE = PKG_NAME + ".prof";
-  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
-  private static final String TEMP_FILE_NAME_SUFFIX = ".file";
-  private static final int JIT_INVOCATION_COUNT = 101;
-
-  /* needs to match Runtime:: kProfileBackground */
-  private static final int PROFILE_BACKGROUND = 1;
-
-  public static void main(String[] args) throws Exception {
-    System.loadLibrary(args[0]);
-
-    File file = null;
-    File profileDir = null;
-    File profileFile = null;
-    try {
-      // We don't know where we have rights to create the code_cache. So create
-      // a dummy temporary file and get its parent directory. That will serve as
-      // the app directory.
-      file = createTempFile();
-      String appDir = file.getParent();
-      profileDir = new File(appDir, "code_cache");
-      profileDir.mkdir();
-
-      // Registering the app info will set the profile file name.
-      VMRuntime.registerAppInfo(PKG_NAME, appDir);
-
-      // Make sure the hot methods are jitted.
-      Main m = new Main();
-      OtherDex o = new OtherDex();
-      for (int i = 0; i < JIT_INVOCATION_COUNT; i++) {
-        m.hotMethod();
-        o.hotMethod();
-      }
-
-      // Updating the process state to BACKGROUND will trigger profile saving.
-      VMRuntime.updateProcessState(PROFILE_BACKGROUND);
-
-      // Check that the profile file exists.
-      profileFile = new File(profileDir, PROFILE_FILE);
-      if (!profileFile.exists()) {
-        throw new RuntimeException("No profile file found");
-      }
-      // Dump the profile file.
-      // We know what methods are hot and we compare with the golden `expected` output.
-      System.out.println(getProfileInfoDump(profileFile.getPath(), m.getClass(), o.getClass()));
-    } finally {
-      if (file != null) {
-        file.delete();
-      }
-      if (profileFile != null) {
-        profileFile.delete();
-      }
-      if (profileDir != null) {
-        profileDir.delete();
-      }
-    }
-  }
-
-  private static class VMRuntime {
-    private static final Method registerAppInfoMethod;
-    private static final Method updateProcessStateMethod;
-    private static final Method getRuntimeMethod;
-    static {
-      try {
-        Class c = Class.forName("dalvik.system.VMRuntime");
-        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
-            String.class, String.class, String.class);
-        updateProcessStateMethod = c.getDeclaredMethod("updateProcessState", Integer.TYPE);
-        getRuntimeMethod = c.getDeclaredMethod("getRuntime");
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    public static void registerAppInfo(String pkgName, String appDir) throws Exception {
-      registerAppInfoMethod.invoke(null, pkgName, appDir, null);
-    }
-    public static void updateProcessState(int state) throws Exception {
-      Object runtime = getRuntimeMethod.invoke(null);
-      updateProcessStateMethod.invoke(runtime, state);
-    }
-  }
-
-  static native String getProfileInfoDump(
-      String filename, Class<?> clsFromPrimary, Class<?> clsFromSecondary);
-
-  private static File createTempFile() throws Exception {
-    try {
-      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-    } catch (IOException e) {
-      System.setProperty("java.io.tmpdir", "/data/local/tmp");
-      try {
-        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-      } catch (IOException e2) {
-        System.setProperty("java.io.tmpdir", "/sdcard");
-        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-      }
-    }
-  }
-}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index f84dfe6..f74a516 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -38,8 +38,7 @@
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
   497-inlining-and-class-loader/clear_dex_cache.cc \
-  543-env-long-ref/env_long_ref.cc \
-  554-jit-profile-file/offline_profile.cc
+  543-env-long-ref/env_long_ref.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 10a4d15..0925d36 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -298,7 +298,6 @@
   412-new-array \
   471-uninitialized-locals \
   506-verify-aput \
-  554-jit-profile-file \
   800-smali
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
@@ -357,15 +356,13 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
-# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
   119-noimage-patchoat \
   137-cfi \
-  138-duplicate-classes-check2 \
-  554-jit-profile-file
+  138-duplicate-classes-check2
 
 # This test fails without an image.
 TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
@@ -416,8 +413,7 @@
 # Known broken tests for the interpreter.
 # CFI unwinding expects managed frames.
 TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
-  137-cfi \
-  554-jit-profile-file
+  137-cfi
 
 ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -524,8 +520,7 @@
 # Tests that should fail in the heap poisoning configuration with the interpreter.
 # 137: Cannot run this with the interpreter.
 TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS := \
-  137-cfi \
-  554-jit-profile-file
+  137-cfi
 
 ifeq ($(ART_HEAP_POISONING),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))