diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 1c2d16f..243395a 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -17,6 +17,7 @@
 #include "compiler_backend.h"
 #include "compiler_internals.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "dataflow_iterator-inl.h"
 #include "leb128.h"
 #include "mirror/object.h"
@@ -25,7 +26,7 @@
 #include "backend.h"
 #include "base/logging.h"
 #include "base/timing_logger.h"
-
+#include "driver/compiler_options.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 
 namespace art {
@@ -209,13 +210,26 @@
     cu.mir_graph->EnableOpcodeCounting();
   }
 
+  const CompilerOptions& compiler_options = cu.compiler_driver->GetCompilerOptions();
+  CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
+
+  // Check early if we should skip this compilation if using the profiled filter.
+  if (cu.compiler_driver->ProfilePresent()) {
+    std::string methodname = PrettyMethod(method_idx, dex_file);
+    if (cu.mir_graph->SkipCompilation(methodname)) {
+      return NULL;
+    }
+  }
+
   /* Build the raw MIR graph */
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
   cu.NewTimingSplit("MIROpt:CheckFilters");
-  if (cu.mir_graph->SkipCompilation()) {
-    return NULL;
+  if (compiler_filter != CompilerOptions::kInterpretOnly) {
+    if (cu.mir_graph->SkipCompilation()) {
+      return NULL;
+    }
   }
 
   /* Create the pass driver and launch it */
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 667ee26..5314bb7 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -999,7 +999,6 @@
 
  /*
   * Will eventually want this to be a bit more sophisticated and happen at verification time.
-  * Ultimate goal is to drive with profile data.
   */
 bool MIRGraph::SkipCompilation() {
   const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
@@ -1013,8 +1012,7 @@
     return true;
   }
 
-  if (compiler_filter == CompilerOptions::kInterpretOnly) {
-    LOG(WARNING) << "InterpretOnly should ideally be filtered out prior to parsing.";
+  if (compiler_filter == CompilerOptions::kInterpretOnly || compiler_filter == CompilerOptions::kProfiled) {
     return true;
   }
 
@@ -1170,4 +1168,8 @@
   }
 }
 
+bool MIRGraph::SkipCompilation(const std::string& methodname) {
+  return cu_->compiler_driver->SkipCompilation(methodname);
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 85d6d89..94b3816 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -383,6 +383,11 @@
   bool SkipCompilation();
 
   /*
+   * Should we skip the compilation of this method based on its name?
+   */
+  bool SkipCompilation(const std::string& methodname);
+
+  /*
    * Parse dex method and add MIR at current insert point.  Returns id (which is
    * actually the index of the method in the m_units_ array).
    */
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 947c22d..6b0875c 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -110,7 +110,7 @@
   if (((access_flags & kAccConstructor) != 0) && ((access_flags & kAccStatic) != 0)) {
     return false;
   }
-  return (compiler_options_->GetCompilerFilter() != CompilerOptions::kInterpretOnly);
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d3d58c9..a46015d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -21,6 +21,7 @@
 
 #include <vector>
 #include <unistd.h>
+#include <fstream>
 
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
@@ -303,8 +304,9 @@
                                InstructionSet instruction_set,
                                InstructionSetFeatures instruction_set_features,
                                bool image, DescriptorSet* image_classes, size_t thread_count,
-                               bool dump_stats, bool dump_passes, CumulativeLogger* timer)
-    : compiler_options_(compiler_options),
+                               bool dump_stats, bool dump_passes, CumulativeLogger* timer,
+                               std::string profile_file)
+    : profile_ok_(false), compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_backend_(CompilerBackend::Create(compiler_backend_kind)),
@@ -338,6 +340,11 @@
 
   CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key");
 
+  // Read the profile file if one is provided.
+  if (profile_file != "") {
+    profile_ok_ = ReadProfile(profile_file);
+  }
+
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
 
   compiler_backend_->Init(*this);
@@ -1936,7 +1943,6 @@
   } else {
     MethodReference method_ref(&dex_file, method_idx);
     bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags);
-
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return NULL.
       compiled_method = compiler_backend_->Compile(
@@ -2073,4 +2079,86 @@
       LOG(FATAL) << "Unknown instruction set: " << instruction_set;
     }
   }
+
+bool CompilerDriver::ReadProfile(const std::string& filename) {
+  VLOG(compiler) << "reading profile file " << filename;
+  struct stat st;
+  int err = stat(filename.c_str(), &st);
+  if (err == -1) {
+    VLOG(compiler) << "not found";
+    return false;
+  }
+  std::ifstream in(filename.c_str());
+  if (!in) {
+    VLOG(compiler) << "profile file " << filename << " exists but can't be opened";
+    VLOG(compiler) << "file owner: " << st.st_uid << ":" << st.st_gid;
+    VLOG(compiler) << "me: " << getuid() << ":" << getgid();
+    VLOG(compiler) << "file permissions: " << std::oct << st.st_mode;
+    VLOG(compiler) << "errno: " << errno;
+    return false;
+  }
+  // The first line contains summary information.
+  std::string line;
+  std::getline(in, line);
+  if (in.eof()) {
+    return false;
+  }
+  std::vector<std::string> summary_info;
+  Split(line, '/', summary_info);
+  if (summary_info.size() != 3) {
+    // Bad summary info.  It should be count/total/bootpath
+    return false;
+  }
+  // This is the number of hits in all methods.
+  uint32_t total_count = 0;
+  for (int i = 0 ; i < 3; ++i) {
+    total_count += atoi(summary_info[0].c_str());
+  }
+
+  // Now read each line until the end of file.  Each line consists of 3 fields separated by /
+  while (!in.eof()) {
+    std::getline(in, line);
+    if (in.eof()) {
+      break;
+    }
+    std::vector<std::string> info;
+    Split(line, '/', info);
+    if (info.size() != 3) {
+      // Malformed.
+      break;
+    }
+    const std::string& methodname = info[0];
+    uint32_t count = atoi(info[1].c_str());
+    uint32_t size = atoi(info[2].c_str());
+    double percent = (count * 100.0) / total_count;
+    // Add it to the profile map
+    profile_map_[methodname] = ProfileData(methodname, count, size, percent);
+  }
+  return true;
+}
+
+bool CompilerDriver::SkipCompilation(const std::string& method_name) {
+  if (!profile_ok_) {
+    return true;
+  }
+  constexpr double kThresholdPercent = 2.0;      // Anything above this threshold will be compiled.
+
+  // First find the method in the profile map.
+  ProfileMap::iterator i = profile_map_.find(method_name);
+  if (i == profile_map_.end()) {
+    // Not in profile, no information can be determined.
+    VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    return true;
+  }
+  const ProfileData& data = i->second;
+  bool compile = data.IsAbove(kThresholdPercent);
+  if (compile) {
+    LOG(INFO) << "compiling method " << method_name << " because its usage is " <<
+        data.GetPercent() << "%";
+  } else {
+    VLOG(compiler) << "not compiling method " << method_name << " because usage is too low ("
+        << data.GetPercent() << "%)";
+  }
+  return !compile;
+}
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ac70e5a..12463a9 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -105,7 +105,8 @@
                           InstructionSetFeatures instruction_set_features,
                           bool image, DescriptorSet* image_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
-                          CumulativeLogger* timer);
+                          CumulativeLogger* timer,
+                          std::string profile_file = "");
 
   ~CompilerDriver();
 
@@ -141,6 +142,10 @@
     return compiler_backend_.get();
   }
 
+  bool ProfilePresent() const {
+    return profile_ok_;
+  }
+
   // Are we compiling and creating an image file?
   bool IsImage() const {
     return image_;
@@ -554,6 +559,37 @@
     return cfi_info_.get();
   }
 
+  // Profile data.  This is generated from previous runs of the program and stored
+  // in a file.  It is used to determine whether to compile a particular method or not.
+  class ProfileData {
+   public:
+    ProfileData() : count_(0), method_size_(0), percent_(0) {}
+    ProfileData(std::string method_name, uint32_t count, uint32_t method_size, double percent) :
+      method_name_(method_name), count_(count), method_size_(method_size), percent_(percent) {
+    }
+
+    bool IsAbove(double v) const { return percent_ >= v; }
+    double GetPercent() const { return percent_; }
+
+   private:
+    std::string method_name_;   // Method name.
+    uint32_t count_;            // Number number of times it has been called.
+    uint32_t method_size_;      // Size of the method on dex instructions.
+    double percent_;            // Percentage of time spent in this method.
+  };
+
+  // Profile data is stored in a map, indexed by the full method name.
+  typedef std::map<const std::string, ProfileData> ProfileMap;
+  ProfileMap profile_map_;
+  bool profile_ok_;
+
+  // Read the profile data from the given file.  Calculates the percentage for each method.
+  // Returns false if there was no profile file or it was malformed.
+  bool ReadProfile(const std::string& filename);
+
+  // Should the compiler run on this method given profile information?
+  bool SkipCompilation(const std::string& method_name);
+
  private:
   // Compute constant code and method pointers when possible
   void GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType sharp_type,
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 39738ab..0cca1e9 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -23,6 +23,7 @@
  public:
   enum CompilerFilter {
     kInterpretOnly,       // Compile nothing.
+    kProfiled,            // Compile based on profile.
     kSpace,               // Maximize space savings.
     kBalanced,            // Try to get the best performance return on compilation investment.
     kSpeed,               // Maximize runtime performance.
@@ -30,7 +31,11 @@
   };
 
   // Guide heuristics to determine whether to compile method if profile data not available.
+#if ART_SMALL_MODE
+  static const CompilerFilter kDefaultCompilerFilter = kProfiled;
+#else
   static const CompilerFilter kDefaultCompilerFilter = kSpeed;
+#endif
   static const size_t kDefaultHugeMethodThreshold = 10000;
   static const size_t kDefaultLargeMethodThreshold = 600;
   static const size_t kDefaultSmallMethodThreshold = 60;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 7c81ffb..cc78816 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -200,6 +200,8 @@
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
   UsageError("      Example: --runtime-arg -Xms256m");
+    UsageError("");
+    UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
   UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
@@ -310,7 +312,8 @@
                                       bool dump_stats,
                                       bool dump_passes,
                                       TimingLogger& timings,
-                                      CumulativeLogger& compiler_phases_timings) {
+                                      CumulativeLogger& compiler_phases_timings,
+                                      std::string profile_file) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = NULL;
     Thread* self = Thread::Current();
@@ -340,7 +343,8 @@
                                                         thread_count_,
                                                         dump_stats,
                                                         dump_passes,
-                                                        &compiler_phases_timings));
+                                                        &compiler_phases_timings,
+                                                        profile_file));
 
     driver->GetCompilerBackend()->SetBitcodeFileName(*driver.get(), bitcode_filename);
 
@@ -742,6 +746,8 @@
   InstructionSet instruction_set = kNone;
 #endif
 
+  // Profile file to use
+  std::string profile_file;
 
   bool is_host = false;
   bool dump_stats = false;
@@ -896,6 +902,12 @@
       dump_passes = true;
     } else if (option == "--dump-stats") {
       dump_stats = true;
+    } else if (option.starts_with("--profile-file=")) {
+      profile_file = option.substr(strlen("--profile-file=")).data();
+      VLOG(compiler) << "dex2oat: profile file is " << profile_file;
+    } else if (option == "--no-profile-file") {
+      LOG(INFO) << "dex2oat: no profile file supplied (explictly)";
+      // No profile
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1204,7 +1216,8 @@
                                                                   dump_stats,
                                                                   dump_passes,
                                                                   timings,
-                                                                  compiler_phases_timings));
+                                                                  compiler_phases_timings,
+                                                                  profile_file));
 
   if (compiler.get() == NULL) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 8a96d79..bab0604 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -15,6 +15,7 @@
  */
 
 #include <unistd.h>
+#include <fcntl.h>
 
 #include "base/logging.h"
 #include "class_linker.h"
@@ -36,6 +37,10 @@
 #include "toStringArray.h"
 #include "zip_archive.h"
 
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
 namespace art {
 
 // A smart pointer that provides read-only access to a Java string's UTF chars.
@@ -193,7 +198,40 @@
   return toStringArray(env, class_names);
 }
 
-static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
+// Copy a profile file
+static void CopyProfileFile(const char* oldfile, const char* newfile) {
+  int fd = open(oldfile, O_RDONLY);
+  if (fd < 0) {
+    // If we can't open the file show the uid:gid of the this process to allow
+    // diagnosis of the problem.
+    LOG(ERROR) << "Failed to open profile file " << oldfile<< ".  My uid:gid is "
+      << getuid() << ":" << getgid();
+    return;
+  }
+
+  // Create the copy with rw------- (only accessible by system)
+  int fd2 = open(newfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
+  if (fd2 < 0) {
+    // If we can't open the file show the uid:gid of the this process to allow
+    // diagnosis of the problem.
+    LOG(ERROR) << "Failed to create/write prev profile file " << newfile << ".  My uid:gid is "
+      << getuid() << ":" << getgid();
+    return;
+  }
+  char buf[4096];
+  while (true) {
+    int n = read(fd, buf, sizeof(buf));
+    if (n <= 0) {
+      break;
+    }
+    write(fd2, buf, n);
+  }
+  close(fd);
+  close(fd2);
+}
+
+static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
+    jstring javaPkgname, jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
   const bool kDebugLogging = true;  // Logging useful for debugging.
 
@@ -221,6 +259,97 @@
     }
   }
 
+  // Check the profile file.  We need to rerun dex2oat if the profile has changed significantly
+  // since the last time, or it's new.
+  // If the 'defer' argument is true then this will be retried later.  In this case we
+  // need to make sure that the profile file copy is not made so that we will get the
+  // same result second time.
+  if (javaPkgname != NULL) {
+    ScopedUtfChars pkgname(env, javaPkgname);
+    std::string profile_file = GetDalvikCacheOrDie(GetAndroidData()) + std::string("/profiles/") +
+    pkgname.c_str();
+
+    std::string profile_cache_dir = GetDalvikCacheOrDie(GetAndroidData()) + "/profile-cache";
+
+    // Make the profile cache if it doesn't exist.
+    mkdir(profile_cache_dir.c_str(), 0700);
+
+    // The previous profile file (a copy of the profile the last time this was run) is
+    // in the dalvik-cache directory because this is owned by system.  The profiles
+    // directory is owned by install so system cannot write files in there.
+    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname.c_str();
+
+    struct stat profstat, prevstat;
+    int e1 = stat(profile_file.c_str(), &profstat);
+    int e2 = stat(prev_profile_file.c_str(), &prevstat);
+
+    if (e1 < 0) {
+      // No profile file, need to run dex2oat
+      if (kDebugLogging) {
+        LOG(INFO) << "DexFile_isDexOptNeeded profile file " << profile_file << " doesn't exist";
+      }
+      return JNI_TRUE;
+    }
+    if (e2 == 0) {
+      // There is a previous profile file.  Check if the profile has changed significantly.
+      // Let's use the file size as a proxy for significance.  If the new profile is 10%
+      // different in size than the the old profile then we run dex2oat.
+      double newsize = profstat.st_size;
+      double oldsize = prevstat.st_size;
+      bool need_profile = false;
+
+      double ratio = 0;     // If the old file was empty and the new one not
+      if (oldsize > 0 && newsize > 0) {
+        ratio = newsize / oldsize;
+      } else if (oldsize == 0 && newsize > 0) {
+        need_profile = true;
+      } else if (oldsize > 0 && newsize == 0) {
+        // Unlikely to happen, but cover all the bases.
+        need_profile = true;
+      }
+
+      double significant_difference = 10.0;
+#ifdef HAVE_ANDROID_OS
+      // Switch off profiler if the dalvik.vm.profiler property has value 0.
+      char buf[PROP_VALUE_MAX];
+      property_get("dalvik.vm.profiler.dex2oat.threshold", buf, "10.0");
+      significant_difference = strtod(buf, nullptr);
+
+      // Something reasonable?
+      if (significant_difference < 1.0 || significant_difference > 90.0) {
+        significant_difference = 10.0;
+      }
+#endif      // The percentage difference that we consider as being significant.
+      double diff_hwm = 1.0 + significant_difference/10.0;
+      double diff_lwm = 1.0 - significant_difference/10.0;
+
+      if (ratio > diff_hwm || ratio < diff_lwm) {
+        need_profile = true;
+      }
+
+      if (need_profile) {
+        if (kDebugLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file <<
+          " is significantly different from old profile file " << prev_profile_file << " (new: " <<
+          newsize << ", old: " << oldsize << ", ratio: " << ratio << ")";
+        }
+        if (!defer) {
+          CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str());
+        }
+        return JNI_TRUE;
+      }
+    } else {
+      // Previous profile does not exist.  Make a copy of the current one.
+      if (kDebugLogging) {
+        LOG(INFO) << "DexFile_isDexOptNeeded previous profile doesn't exist: " << prev_profile_file;
+      }
+      if (!defer) {
+        CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str());
+      }
+      return JNI_TRUE;
+    }
+  }
+
   // Check if we have an odex file next to the dex file.
   std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename.c_str()));
   std::string error_msg;
@@ -329,11 +458,18 @@
   return JNI_FALSE;
 }
 
+// public API, NULL pkgname
+static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass c, jstring javaFilename) {
+  return DexFile_isDexOptNeededInternal(env, c, javaFilename, NULL, false);
+}
+
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(J)V"),
   NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;J)Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(J)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
+  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Z)Z"),
   NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)J"),
 };
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4aa1d10..0e2d921 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -203,6 +203,7 @@
 
 static void VMRuntime_updateProcessState(JNIEnv* env, jobject, jint process_state) {
   Runtime::Current()->GetHeap()->UpdateProcessState(static_cast<gc::ProcessState>(process_state));
+  Runtime::Current()->UpdateProfilerState(process_state);
 }
 
 static void VMRuntime_trimHeap(JNIEnv*, jobject) {
@@ -511,13 +512,16 @@
  * process name.  We use this information to start up the sampling profiler for
  * for ART.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring appDir, jstring procName) {
+static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName, jstring appDir, jstring procName) {
+  const char *pkgNameChars = env->GetStringUTFChars(pkgName, NULL);
   const char *appDirChars = env->GetStringUTFChars(appDir, NULL);
   const char *procNameChars = env->GetStringUTFChars(procName, NULL);
-  std::string profileFile = std::string(appDirChars) + "/art-profile-" + std::string(procNameChars);
-  Runtime::Current()->StartProfiler(profileFile.c_str());
+
+  std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
+  Runtime::Current()->StartProfiler(profileFile.c_str(), procNameChars);
   env->ReleaseStringUTFChars(appDir, appDirChars);
   env->ReleaseStringUTFChars(procName, procNameChars);
+  env->ReleaseStringUTFChars(pkgName, pkgNameChars);
 }
 
 static JNINativeMethod gMethods[] = {
@@ -542,7 +546,7 @@
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
-  NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;)V"),
+  NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"),
 };
 
 void register_dalvik_system_VMRuntime(JNIEnv* env) {
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 20e08b8..da98938 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -17,6 +17,7 @@
 #include "profiler.h"
 
 #include <sys/uio.h>
+#include <sys/file.h>
 
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
@@ -170,6 +171,7 @@
 
     SampleCheckpoint check_point(profiler);
 
+    size_t valid_samples = 0;
     while (now_us < end_us) {
       if (ShuttingDown(self)) {
         break;
@@ -180,7 +182,15 @@
       ThreadList* thread_list = runtime->GetThreadList();
 
       profiler->profiler_barrier_->Init(self, 0);
-      size_t barrier_count = thread_list->RunCheckpoint(&check_point);
+      size_t barrier_count = thread_list->RunCheckpointOnRunnableThreads(&check_point);
+
+      // All threads are suspended, nothing to do.
+      if (barrier_count == 0) {
+        now_us = MicroTime();
+        continue;
+      }
+
+      valid_samples += barrier_count;
 
       ThreadState old_state = self->SetState(kWaitingForCheckPointsToRun);
 
@@ -206,7 +216,7 @@
       now_us = MicroTime();
     }
 
-    if (!ShuttingDown(self)) {
+    if (valid_samples > 0 && !ShuttingDown(self)) {
       // After the profile has been taken, write it out.
       ScopedObjectAccess soa(self);   // Acquire the mutator lock.
       uint32_t size = profiler->WriteProfile();
@@ -221,39 +231,65 @@
 
 // Write out the profile file if we are generating a profile.
 uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
-  UniquePtr<File> profile_file;
-  Runtime* runtime = Runtime::Current();
-  std::string classpath = runtime->GetClassPathString();
-  size_t colon = classpath.find(':');
-  if (colon != std::string::npos) {
-    // More than one file in the classpath.  Possible?
-    classpath = classpath.substr(0, colon);
-  }
-
-  std::replace(classpath.begin(), classpath.end(), '/', '@');
   std::string full_name = profile_file_name_;
-  if (classpath != "") {
-    full_name = StringPrintf("%s-%s", profile_file_name_.c_str(), classpath.c_str());
-  }
   LOG(DEBUG) << "Saving profile to " << full_name;
 
-  profile_file.reset(OS::CreateEmptyFile(full_name.c_str()));
-  if (profile_file.get() == nullptr) {
-    // Failed to open the profile file, ignore.
-    LOG(INFO) << "Failed to op file";
+  int fd = open(full_name.c_str(), O_RDWR);
+  if (fd < 0) {
+    // Open failed.
+    LOG(ERROR) << "Failed to open profile file " << full_name;
     return 0;
   }
+
+  // Lock the file for exclusive access.  This will block if another process is using
+  // the file.
+  int err = flock(fd, LOCK_EX);
+  if (err < 0) {
+    LOG(ERROR) << "Failed to lock profile file " << full_name;
+    return 0;
+  }
+
+  // Read the previous profile.
+  profile_table_.ReadPrevious(fd);
+
+  // Move back to the start of the file.
+  lseek(fd, 0, SEEK_SET);
+
+  // Format the profile output and write to the file.
   std::ostringstream os;
   uint32_t num_methods = DumpProfile(os);
   std::string data(os.str());
-  profile_file->WriteFully(data.c_str(), data.length());
-  profile_file->Close();
+  const char *p = data.c_str();
+  size_t length = data.length();
+  size_t full_length = length;
+  do {
+    int n = ::write(fd, p, length);
+    p += n;
+    length -= n;
+  } while (length > 0);
+
+  // Truncate the file to the new length.
+  ftruncate(fd, full_length);
+
+  // Now unlock the file, allowing another process in.
+  err = flock(fd, LOCK_UN);
+  if (err < 0) {
+    LOG(ERROR) << "Failed to unlock profile file " << full_name;
+  }
+
+  // Done, close the file.
+  ::close(fd);
+
+  // Clean the profile for the next time.
+  CleanProfile();
+
   return num_methods;
 }
 
 // Start a profile thread with the user-supplied arguments.
 void BackgroundMethodSamplingProfiler::Start(int period, int duration,
-                  std::string profile_file_name, int interval_us,
+                  const std::string& profile_file_name, const std::string& procName,
+                  int interval_us,
                   double backoff_coefficient, bool startImmediately) {
   Thread* self = Thread::Current();
   {
@@ -266,12 +302,14 @@
 
   // Only on target...
 #ifdef HAVE_ANDROID_OS
-  // Switch off profiler if the dalvik.vm.profiler property has value 0.
-  char buf[PROP_VALUE_MAX];
-  property_get("dalvik.vm.profiler", buf, "0");
-  if (strcmp(buf, "0") == 0) {
-    LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
-    return;
+  if (!startImmediately) {
+    // Switch off profiler if the dalvik.vm.profiler property has value 0.
+    char buf[PROP_VALUE_MAX];
+    property_get("dalvik.vm.profiler", buf, "0");
+    if (strcmp(buf, "0") == 0) {
+      LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
+      return;
+    }
   }
 #endif
 
@@ -281,6 +319,7 @@
   {
     MutexLock mu(self, *Locks::profiler_lock_);
     profiler_ = new BackgroundMethodSamplingProfiler(period, duration, profile_file_name,
+                                      procName,
                                       backoff_coefficient,
                                       interval_us, startImmediately);
 
@@ -323,9 +362,10 @@
 }
 
 BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(int period, int duration,
-                   std::string profile_file_name,
+                   const std::string& profile_file_name,
+                   const std::string& process_name,
                    double backoff_coefficient, int interval_us, bool startImmediately)
-    : profile_file_name_(profile_file_name),
+    : profile_file_name_(profile_file_name), process_name_(process_name),
       period_s_(period), start_immediately_(startImmediately),
       interval_us_(interval_us), backoff_factor_(1.0),
       backoff_coefficient_(backoff_coefficient), duration_s_(duration),
@@ -423,9 +463,13 @@
   lock_.Unlock(Thread::Current());
 }
 
-// Write the profile table to the output stream.
+// Write the profile table to the output stream.  Also merge with the previous profile.
 uint32_t ProfileSampleResults::Write(std::ostream &os) {
   ScopedObjectAccess soa(Thread::Current());
+  num_samples_ += previous_num_samples_;
+  num_null_methods_ += previous_num_null_methods_;
+  num_boot_methods_ += previous_num_boot_methods_;
+
   LOG(DEBUG) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
@@ -433,14 +477,35 @@
     Map *map = table[i];
     if (map != nullptr) {
       for (const auto &meth_iter : *map) {
-         mirror::ArtMethod *method = meth_iter.first;
-         std::string method_name = PrettyMethod(method);
-         uint32_t method_size = method->GetCodeSize();
-         os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), meth_iter.second, method_size);
-         ++num_methods;
-       }
+        mirror::ArtMethod *method = meth_iter.first;
+        std::string method_name = PrettyMethod(method);
+
+        MethodHelper mh(method);
+        const DexFile::CodeItem* codeitem = mh.GetCodeItem();
+        uint32_t method_size = 0;
+        if (codeitem != nullptr) {
+          method_size = codeitem->insns_size_in_code_units_;
+        }
+        uint32_t count = meth_iter.second;
+
+        // Merge this profile entry with one from a previous run (if present).  Also
+        // remove the previous entry.
+        PreviousProfile::iterator pi = previous_.find(method_name);
+        if (pi != previous_.end()) {
+          count += pi->second.count_;
+          previous_.erase(pi);
+        }
+        os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
+        ++num_methods;
+      }
     }
   }
+
+  // Now we write out the remaining previous methods.
+  for (PreviousProfile::iterator pi = previous_.begin(); pi != previous_.end(); ++pi) {
+    os << StringPrintf("%s/%u/%u\n",  pi->first.c_str(), pi->second.count_, pi->second.method_size_);
+    ++num_methods;
+  }
   return num_methods;
 }
 
@@ -452,11 +517,67 @@
      delete table[i];
      table[i] = nullptr;
   }
+  previous_.clear();
 }
 
 uint32_t ProfileSampleResults::Hash(mirror::ArtMethod* method) {
   return (PointerToLowMemUInt32(method) >> 3) % kHashSize;
 }
 
+// Read a single line into the given string.  Returns true if everything OK, false
+// on EOF or error.
+static bool ReadProfileLine(int fd, std::string& line) {
+  char buf[4];
+  line.clear();
+  while (true) {
+    int n = read(fd, buf, 1);     // TODO: could speed this up but is it worth it?
+    if (n != 1) {
+      return false;
+    }
+    if (buf[0] == '\n') {
+      break;
+    }
+    line += buf[0];
+  }
+  return true;
+}
+
+void ProfileSampleResults::ReadPrevious(int fd) {
+  // Reset counters.
+  previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
+
+  std::string line;
+
+  // The first line contains summary information.
+  if (!ReadProfileLine(fd, line)) {
+    return;
+  }
+  std::vector<std::string> summary_info;
+  Split(line, '/', summary_info);
+  if (summary_info.size() != 3) {
+    // Bad summary info.  It should be count/nullcount/bootcount
+    return;
+  }
+  previous_num_samples_ = atoi(summary_info[0].c_str());
+  previous_num_null_methods_ = atoi(summary_info[1].c_str());
+  previous_num_boot_methods_ = atoi(summary_info[2].c_str());
+
+  // Now read each line until the end of file.  Each line consists of 3 fields separated by /
+  while (true) {
+    if (!ReadProfileLine(fd, line)) {
+      break;
+    }
+    std::vector<std::string> info;
+    Split(line, '/', info);
+    if (info.size() != 3) {
+      // Malformed.
+      break;
+    }
+    std::string methodname = info[0];
+    uint32_t count = atoi(info[1].c_str());
+    uint32_t size = atoi(info[2].c_str());
+    previous_[methodname] = PreviousValue(count, size);
+  }
+}
 }  // namespace art
 
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 6ea6c84..b03b170 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -54,10 +54,12 @@
 
   void Put(mirror::ArtMethod* method);
   uint32_t Write(std::ostream &os);
+  void ReadPrevious(int fd);
   void Clear();
   uint32_t GetNumSamples() { return num_samples_; }
   void NullMethod() { ++num_null_methods_; }
   void BootMethod() { ++num_boot_methods_; }
+
  private:
   uint32_t Hash(mirror::ArtMethod* method);
   static constexpr int kHashSize = 17;
@@ -68,6 +70,19 @@
 
   typedef std::map<mirror::ArtMethod*, uint32_t> Map;   // Map of method vs its count.
   Map *table[kHashSize];
+
+  struct PreviousValue {
+    PreviousValue() : count_(0), method_size_(0) {}
+    PreviousValue(uint32_t count, uint32_t method_size) : count_(count), method_size_(method_size) {}
+    uint32_t count_;
+    uint32_t method_size_;
+  };
+
+  typedef std::map<std::string, PreviousValue> PreviousProfile;
+  PreviousProfile previous_;
+  uint32_t previous_num_samples_;
+  uint32_t previous_num_null_methods_;     // Number of samples where can don't know the method.
+  uint32_t previous_num_boot_methods_;     // Number of samples in the boot path.
 };
 
 //
@@ -87,7 +102,8 @@
 
 class BackgroundMethodSamplingProfiler {
  public:
-  static void Start(int period, int duration, std::string profile_filename, int interval_us,
+  static void Start(int period, int duration, const std::string& profile_filename,
+                    const std::string& procName, int interval_us,
                     double backoff_coefficient, bool startImmediately)
   LOCKS_EXCLUDED(Locks::mutator_lock_,
                  Locks::thread_list_lock_,
@@ -104,8 +120,10 @@
   }
 
  private:
-  explicit BackgroundMethodSamplingProfiler(int period, int duration, std::string profile_filename,
-                 double backoff_coefficient, int interval_us, bool startImmediately);
+  explicit BackgroundMethodSamplingProfiler(int period, int duration,
+                                            const std::string& profile_filename,
+                                            const std::string& process_name,
+                                            double backoff_coefficient, int interval_us, bool startImmediately);
 
   // The sampling interval in microseconds is passed as an argument.
   static void* RunProfilerThread(void* arg) LOCKS_EXCLUDED(Locks::profiler_lock_);
@@ -130,6 +148,9 @@
   // File to write profile data out to.  Cannot be empty if we are profiling.
   std::string profile_file_name_;
 
+  // Process name.
+  std::string process_name_;
+
   // Number of seconds between profile runs.
   uint32_t period_s_;
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index fdbf245..d1c8370 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -27,6 +27,7 @@
 #include <cstdlib>
 #include <limits>
 #include <vector>
+#include <fcntl.h>
 
 #include "arch/arm/registers_arm.h"
 #include "arch/mips/registers_mips.h"
@@ -69,6 +70,10 @@
 
 #include "JniConstants.h"  // Last to avoid LOG redefinition in ics-mr1-plus-art.
 
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
 namespace art {
 
 Runtime* Runtime::instance_ = NULL;
@@ -370,7 +375,12 @@
 
   if (profile_) {
     // User has asked for a profile using -Xprofile
-    StartProfiler(profile_output_filename_.c_str(), true);
+    // Create the profile file if it doesn't exist.
+    int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
+    if (fd >= 0) {
+      close(fd);
+    }
+    StartProfiler(profile_output_filename_.c_str(), "", true);
   }
 
   return true;
@@ -1055,10 +1065,10 @@
   method_verifiers_.erase(it);
 }
 
-void Runtime::StartProfiler(const char *appDir, bool startImmediately) {
+void Runtime::StartProfiler(const char* appDir, const char* procName, bool startImmediately) {
   BackgroundMethodSamplingProfiler::Start(profile_period_s_, profile_duration_s_, appDir,
-                                          profile_interval_us_, profile_backoff_coefficient_,
-                                          startImmediately);
+      procName, profile_interval_us_,
+      profile_backoff_coefficient_, startImmediately);
 }
 
 // Transaction support.
@@ -1136,4 +1146,7 @@
   fault_message_ = message;
 }
 
+void Runtime::UpdateProfilerState(int state) {
+  LOG(DEBUG) << "Profiler state updated to " << state;
+}
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 65d296a..109f031 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -373,7 +373,8 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
-  void StartProfiler(const char *appDir, bool startImmediately = false);
+  void StartProfiler(const char* appDir, const char* procName, bool startImmediately = false);
+  void UpdateProfilerState(int state);
 
   // Transaction support.
   bool IsActiveTransaction() const;
@@ -419,6 +420,12 @@
   void StartDaemonThreads();
   void StartSignalCatcher();
 
+  // NOTE: these must match the gc::ProcessState values as they come directly
+  // from the framework.
+  static constexpr int kProfileForground = 0;
+  static constexpr int kProfileBackgrouud = 1;
+
+
   // A pointer to the active runtime or NULL.
   static Runtime* instance_;
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index bddebbd..ac5750b 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -269,6 +269,36 @@
   return count + suspended_count_modified_threads.size() + 1;
 }
 
+// Request that a checkpoint function be run on all active (non-suspended)
+// threads.  Returns the number of successful requests.
+size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
+  Thread* self = Thread::Current();
+  if (kIsDebugBuild) {
+    Locks::mutator_lock_->AssertNotExclusiveHeld(self);
+    Locks::thread_list_lock_->AssertNotHeld(self);
+    Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+    CHECK_NE(self->GetState(), kRunnable);
+  }
+
+  size_t count = 0;
+  {
+    // Call a checkpoint function for each non-suspended thread.
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (const auto& thread : list_) {
+      if (thread != self) {
+        if (thread->RequestCheckpoint(checkpoint_function)) {
+          // This thread will run its checkpoint some time in the near future.
+          count++;
+        }
+      }
+    }
+  }
+
+  // Return the number of threads that will run the checkpoint function.
+  return count;
+}
+
 void ThreadList::SuspendAll() {
   Thread* self = Thread::Current();
   DCHECK(self != nullptr);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 1a76705..58bd92a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -90,6 +90,10 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
+  size_t RunCheckpointOnRunnableThreads(Closure* checkpoint_function);
+      LOCKS_EXCLUDED(Locks::thread_list_lock_,
+                 Locks::thread_suspend_count_lock_);
+
   // Suspends all threads
   void SuspendAllForDebugger()
       LOCKS_EXCLUDED(Locks::mutator_lock_,
