Merge "metricsd: Persist the metrics to disk periodically."
am: 818f757383

* commit '818f757383c056d5970c8a205700c390ccc4292e':
  metricsd: Persist the metrics to disk periodically.
diff --git a/metricsd/constants.h b/metricsd/constants.h
index 4815888..b702737 100644
--- a/metricsd/constants.h
+++ b/metricsd/constants.h
@@ -26,6 +26,7 @@
 static const char kMetricsServer[] = "https://clients4.google.com/uma/v2";
 static const char kConsentFileName[] = "enabled";
 static const char kStagedLogName[] = "staged_log";
+static const char kSavedLogName[] = "saved_log";
 static const char kFailedUploadCountName[] = "failed_upload_count";
 static const char kDefaultVersion[] = "0.0.0.0";
 
diff --git a/metricsd/metricsd_main.cc b/metricsd/metricsd_main.cc
index ae441d0..0178342 100644
--- a/metricsd/metricsd_main.cc
+++ b/metricsd/metricsd_main.cc
@@ -33,10 +33,13 @@
 
   // Upload Service flags.
   DEFINE_int32(upload_interval_secs, 1800,
-               "Interval at which metrics_daemon sends the metrics. (needs "
-               "-uploader)");
+               "Interval at which metricsd uploads the metrics.");
+  DEFINE_int32(disk_persistence_interval_secs, 300,
+               "Interval at which metricsd saves the aggregated metrics to "
+               "disk to avoid losing them if metricsd stops in between "
+               "two uploads.");
   DEFINE_string(server, metrics::kMetricsServer,
-                "Server to upload the metrics to. (needs -uploader)");
+                "Server to upload the metrics to.");
   DEFINE_string(private_directory, metrics::kMetricsdDirectory,
                 "Path to the private directory used by metricsd "
                 "(testing only)");
@@ -72,6 +75,7 @@
 
   UploadService upload_service(
       FLAGS_server, base::TimeDelta::FromSeconds(FLAGS_upload_interval_secs),
+      base::TimeDelta::FromSeconds(FLAGS_disk_persistence_interval_secs),
       base::FilePath(FLAGS_private_directory),
       base::FilePath(FLAGS_shared_directory));
 
diff --git a/metricsd/uploader/metrics_log.cc b/metricsd/uploader/metrics_log.cc
index a01b5da..39655e6 100644
--- a/metricsd/uploader/metrics_log.cc
+++ b/metricsd/uploader/metrics_log.cc
@@ -18,6 +18,8 @@
 
 #include <string>
 
+#include <base/files/file_util.h>
+
 #include "uploader/proto/system_profile.pb.h"
 #include "uploader/system_profile_setter.h"
 
@@ -27,6 +29,40 @@
     : MetricsLogBase("", 0, metrics::MetricsLogBase::ONGOING_LOG, "") {
 }
 
+bool MetricsLog::LoadFromFile(const base::FilePath& saved_log) {
+  std::string encoded_log;
+  if (!base::ReadFileToString(saved_log, &encoded_log)) {
+    LOG(ERROR) << "Failed to read the metrics log backup from "
+               << saved_log.value();
+    return false;
+  }
+
+  if (!uma_proto()->ParseFromString(encoded_log)) {
+    LOG(ERROR) << "Failed to parse log from " << saved_log.value()
+               << ", deleting the log";
+    base::DeleteFile(saved_log, false);
+    uma_proto()->Clear();
+    return false;
+  }
+
+  VLOG(1) << uma_proto()->histogram_event_size() << " histograms loaded from "
+          << saved_log.value();
+
+  return true;
+}
+
+bool MetricsLog::SaveToFile(const base::FilePath& path) {
+  std::string encoded_log;
+  GetEncodedLog(&encoded_log);
+
+  if (static_cast<int>(encoded_log.size()) !=
+      base::WriteFile(path, encoded_log.data(), encoded_log.size())) {
+    LOG(ERROR) << "Failed to persist the current log to " << path.value();
+    return false;
+  }
+  return true;
+}
+
 void MetricsLog::IncrementUserCrashCount(unsigned int count) {
   metrics::SystemProfileProto::Stability* stability(
       uma_proto()->mutable_system_profile()->mutable_stability());
diff --git a/metricsd/uploader/metrics_log.h b/metricsd/uploader/metrics_log.h
index b76cd72..9e60b97 100644
--- a/metricsd/uploader/metrics_log.h
+++ b/metricsd/uploader/metrics_log.h
@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include <base/files/file_path.h>
 #include <base/macros.h>
 
 #include "uploader/metrics_log_base.h"
@@ -44,8 +45,15 @@
   // Populate the system profile with system information using setter.
   bool PopulateSystemProfile(SystemProfileSetter* setter);
 
+  // Load the log from |path|.
+  bool LoadFromFile(const base::FilePath& path);
+
+  // Save this log to |path|.
+  bool SaveToFile(const base::FilePath& path);
+
  private:
   friend class UploadServiceTest;
+  FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
   FRIEND_TEST(UploadServiceTest, LogContainsAggregatedValues);
   FRIEND_TEST(UploadServiceTest, LogContainsCrashCounts);
   FRIEND_TEST(UploadServiceTest, LogKernelCrash);
diff --git a/metricsd/uploader/upload_service.cc b/metricsd/uploader/upload_service.cc
index 0d7aacc..ab44b28 100644
--- a/metricsd/uploader/upload_service.cc
+++ b/metricsd/uploader/upload_service.cc
@@ -42,6 +42,7 @@
 
 UploadService::UploadService(const std::string& server,
                              const base::TimeDelta& upload_interval,
+                             const base::TimeDelta& disk_persistence_interval,
                              const base::FilePath& private_metrics_directory,
                              const base::FilePath& shared_metrics_directory)
     : brillo::Daemon(),
@@ -51,11 +52,19 @@
                            private_metrics_directory),
       counters_(new CrashCounters),
       upload_interval_(upload_interval),
+      disk_persistence_interval_(disk_persistence_interval),
       metricsd_service_runner_(counters_) {
   staged_log_path_ = private_metrics_directory.Append(metrics::kStagedLogName);
+  saved_log_path_ = private_metrics_directory.Append(metrics::kSavedLogName);
   consent_file_ = shared_metrics_directory.Append(metrics::kConsentFileName);
 }
 
+void UploadService::LoadSavedLog() {
+  if (base::PathExists(saved_log_path_)) {
+    GetOrCreateCurrentLog()->LoadFromFile(saved_log_path_);
+  }
+}
+
 int UploadService::OnInit() {
   brillo::Daemon::OnInit();
 
@@ -64,12 +73,18 @@
 
   system_profile_setter_.reset(new SystemProfileCache());
 
-  base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
-      base::Bind(&UploadService::UploadEventCallback,
-                 base::Unretained(this),
-                 upload_interval_),
+  base::MessageLoop::current()->PostDelayedTask(
+      FROM_HERE,
+      base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
       upload_interval_);
 
+  base::MessageLoop::current()->PostDelayedTask(
+      FROM_HERE,
+      base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
+      disk_persistence_interval_);
+
+  LoadSavedLog();
+
   return EX_OK;
 }
 
@@ -78,24 +93,37 @@
 }
 
 void UploadService::InitForTest(SystemProfileSetter* setter) {
+  LoadSavedLog();
   system_profile_setter_.reset(setter);
 }
 
 void UploadService::StartNewLog() {
-  CHECK(!HasStagedLog()) << "the staged log should be discarded before "
-                         << "starting a new metrics log";
-  MetricsLog* log = new MetricsLog();
-  current_log_.reset(log);
+  current_log_.reset(new MetricsLog());
 }
 
-void UploadService::UploadEventCallback(const base::TimeDelta& interval) {
+void UploadService::UploadEventCallback() {
   UploadEvent();
 
-  base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
-      base::Bind(&UploadService::UploadEventCallback,
-                 base::Unretained(this),
-                 interval),
-      interval);
+  base::MessageLoop::current()->PostDelayedTask(
+      FROM_HERE,
+      base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
+      upload_interval_);
+}
+
+void UploadService::PersistEventCallback() {
+  PersistToDisk();
+
+  base::MessageLoop::current()->PostDelayedTask(
+      FROM_HERE,
+      base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
+      disk_persistence_interval_);
+}
+
+void UploadService::PersistToDisk() {
+  GatherHistograms();
+  if (current_log_) {
+    current_log_->SaveToFile(saved_log_path_);
+  }
 }
 
 void UploadService::UploadEvent() {
@@ -189,14 +217,16 @@
                  << "log.";
     return;
   }
-  std::string encoded_log;
-  staged_log->GetEncodedLog(&encoded_log);
+
+  if (!base::DeleteFile(saved_log_path_, false)) {
+    // There is a chance that we will upload the same metrics twice but, if we
+    // are lucky, the backup should be overridden before that. In doubt, try not
+    // to lose any metrics.
+    LOG(ERROR) << "failed to delete the last backup of the current log.";
+  }
 
   failed_upload_count_.Set(0);
-  if (static_cast<int>(encoded_log.size()) != base::WriteFile(
-      staged_log_path_, encoded_log.data(), encoded_log.size())) {
-    LOG(ERROR) << "failed to persist to " << staged_log_path_.value();
-  }
+  staged_log->SaveToFile(staged_log_path_);
 }
 
 MetricsLog* UploadService::GetOrCreateCurrentLog() {
diff --git a/metricsd/uploader/upload_service.h b/metricsd/uploader/upload_service.h
index b84179f..a1d9d3b 100644
--- a/metricsd/uploader/upload_service.h
+++ b/metricsd/uploader/upload_service.h
@@ -66,6 +66,7 @@
  public:
   UploadService(const std::string& server,
                 const base::TimeDelta& upload_interval,
+                const base::TimeDelta& disk_persistence_interval,
                 const base::FilePath& private_metrics_directory,
                 const base::FilePath& shared_metrics_directory);
 
@@ -79,8 +80,8 @@
   // launch as it is destroyed when staging the log.
   void StartNewLog();
 
-  // Event callback for handling MessageLoop events.
-  void UploadEventCallback(const base::TimeDelta& interval);
+  // Saves the current metrics to a file.
+  void PersistToDisk();
 
   // Triggers an upload event.
   void UploadEvent();
@@ -100,6 +101,8 @@
   friend class UploadServiceTest;
 
   FRIEND_TEST(UploadServiceTest, CanSendMultipleTimes);
+  FRIEND_TEST(UploadServiceTest, CorruptedSavedLog);
+  FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
   FRIEND_TEST(UploadServiceTest, DiscardLogsAfterTooManyFailedUpload);
   FRIEND_TEST(UploadServiceTest, EmptyLogsAreNotSent);
   FRIEND_TEST(UploadServiceTest, FailedSendAreRetried);
@@ -111,6 +114,7 @@
   FRIEND_TEST(UploadServiceTest, LogKernelCrash);
   FRIEND_TEST(UploadServiceTest, LogUncleanShutdown);
   FRIEND_TEST(UploadServiceTest, LogUserCrash);
+  FRIEND_TEST(UploadServiceTest, PersistEmptyLog);
   FRIEND_TEST(UploadServiceTest, UnknownCrashIgnored);
   FRIEND_TEST(UploadServiceTest, ValuesInConfigFileAreSent);
 
@@ -121,12 +125,21 @@
   // will be discarded.
   static const int kMaxFailedUpload;
 
+  // Loads the log saved to disk if it exists.
+  void LoadSavedLog();
+
   // Resets the internal state.
   void Reset();
 
   // Returns true iff metrics reporting is enabled.
   bool AreMetricsEnabled();
 
+  // Event callback for handling Upload events.
+  void UploadEventCallback();
+
+  // Event callback for handling Persist events.
+  void PersistEventCallback();
+
   // Aggregates all histogram available in memory and store them in the current
   // log.
   void GatherHistograms();
@@ -156,11 +169,13 @@
   std::shared_ptr<CrashCounters> counters_;
 
   base::TimeDelta upload_interval_;
+  base::TimeDelta disk_persistence_interval_;
 
   MetricsdServiceRunner metricsd_service_runner_;
 
   base::FilePath consent_file_;
   base::FilePath staged_log_path_;
+  base::FilePath saved_log_path_;
 
   bool testing_;
 };
diff --git a/metricsd/uploader/upload_service_test.cc b/metricsd/uploader/upload_service_test.cc
index bd5b39a..70112f4 100644
--- a/metricsd/uploader/upload_service_test.cc
+++ b/metricsd/uploader/upload_service_test.cc
@@ -45,17 +45,17 @@
     ASSERT_FALSE(base::StatisticsRecorder::IsActive());
     base::StatisticsRecorder::Initialize();
 
-    base::FilePath private_dir = dir_.path().Append("private");
-    base::FilePath shared_dir = dir_.path().Append("shared");
+    private_dir_ = dir_.path().Append("private");
+    shared_dir_ = dir_.path().Append("shared");
 
-    EXPECT_TRUE(base::CreateDirectory(private_dir));
-    EXPECT_TRUE(base::CreateDirectory(shared_dir));
+    EXPECT_TRUE(base::CreateDirectory(private_dir_));
+    EXPECT_TRUE(base::CreateDirectory(shared_dir_));
 
-    ASSERT_EQ(0, base::WriteFile(shared_dir.Append(metrics::kConsentFileName),
+    ASSERT_EQ(0, base::WriteFile(shared_dir_.Append(metrics::kConsentFileName),
                                  "", 0));
 
-    upload_service_.reset(
-        new UploadService("", base::TimeDelta(), private_dir, shared_dir));
+    upload_service_.reset(new UploadService(
+        "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
     counters_ = upload_service_->counters_;
 
     upload_service_->sender_.reset(new SenderMock);
@@ -81,15 +81,16 @@
     base::FilePath filepath =
         dir_.path().Append("etc/os-release.d").Append(name);
     ASSERT_TRUE(base::CreateDirectory(filepath.DirName()));
-    ASSERT_EQ(
-        value.size(),
-        base::WriteFile(filepath, value.data(), value.size()));
+    ASSERT_EQ(value.size(),
+              base::WriteFile(filepath, value.data(), value.size()));
   }
 
   const metrics::SystemProfileProto_Stability GetCurrentStability() {
     EXPECT_TRUE(upload_service_->current_log_.get());
 
-    return upload_service_->current_log_->uma_proto()->system_profile().stability();
+    return upload_service_->current_log_->uma_proto()
+        ->system_profile()
+        .stability();
   }
 
   base::ScopedTempDir dir_;
@@ -97,6 +98,8 @@
 
   std::unique_ptr<base::AtExitManager> exit_manager_;
   std::shared_ptr<CrashCounters> counters_;
+  base::FilePath private_dir_;
+  base::FilePath shared_dir_;
 };
 
 TEST_F(UploadServiceTest, FailedSendAreRetried) {
@@ -219,10 +222,8 @@
 }
 
 TEST_F(UploadServiceTest, ExtractChannelFromString) {
-  EXPECT_EQ(
-      SystemProfileCache::ProtoChannelFromString(
-          "developer-build"),
-      metrics::SystemProfileProto::CHANNEL_UNKNOWN);
+  EXPECT_EQ(SystemProfileCache::ProtoChannelFromString("developer-build"),
+            metrics::SystemProfileProto::CHANNEL_UNKNOWN);
 
   EXPECT_EQ(metrics::SystemProfileProto::CHANNEL_DEV,
             SystemProfileCache::ProtoChannelFromString("dev-channel"));
@@ -297,3 +298,38 @@
   SetTestingProperty(metrics::kProductId, "hello");
   ASSERT_TRUE(cache.Initialize());
 }
+
+TEST_F(UploadServiceTest, CurrentLogSavedAndResumed) {
+  SendHistogram("hello", 10, 0, 100, 10);
+  upload_service_->PersistToDisk();
+  EXPECT_EQ(
+      1, upload_service_->current_log_->uma_proto()->histogram_event().size());
+  upload_service_.reset(new UploadService(
+      "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
+  upload_service_->InitForTest(nullptr);
+
+  SendHistogram("hello", 10, 0, 100, 10);
+  upload_service_->GatherHistograms();
+  EXPECT_EQ(2, upload_service_->GetOrCreateCurrentLog()
+                   ->uma_proto()
+                   ->histogram_event()
+                   .size());
+}
+
+TEST_F(UploadServiceTest, PersistEmptyLog) {
+  upload_service_->PersistToDisk();
+  EXPECT_FALSE(base::PathExists(upload_service_->saved_log_path_));
+}
+
+TEST_F(UploadServiceTest, CorruptedSavedLog) {
+  // Write a bogus saved log.
+  EXPECT_EQ(5, base::WriteFile(upload_service_->saved_log_path_, "hello", 5));
+
+  upload_service_.reset(new UploadService(
+      "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
+
+  upload_service_->InitForTest(nullptr);
+  // If the log is unreadable, we drop it and continue execution.
+  ASSERT_NE(nullptr, upload_service_->GetOrCreateCurrentLog());
+  ASSERT_FALSE(base::PathExists(upload_service_->saved_log_path_));
+}