metricsd: Persist the metrics to disk periodically.
Every now and then (5 minutes by default), the uploader will persist the
current metrics to disk to avoid losing them in case we exit
unexpectedly (reboot or crash).
When starting up, metricsd will load the previously saved log and resume
the metrics collection from there.
Bug: 25670584
Test: Unit tests.
Test: manual: restart metricsd. The saved log is detected and parsed
correctly.
Test: manual: Send a sample to metricsd, send SIGTERM to metricsd, the
log is saved to disk, metricsd restarts and picks up the log where it
left.
Change-Id: I4cefc62c7ea1fa51333d84d8a7ba0a2e9c7fd58f
diff --git a/metricsd/constants.h b/metricsd/constants.h
index 4815888..b702737 100644
--- a/metricsd/constants.h
+++ b/metricsd/constants.h
@@ -26,6 +26,7 @@
static const char kMetricsServer[] = "https://clients4.google.com/uma/v2";
static const char kConsentFileName[] = "enabled";
static const char kStagedLogName[] = "staged_log";
+static const char kSavedLogName[] = "saved_log";
static const char kFailedUploadCountName[] = "failed_upload_count";
static const char kDefaultVersion[] = "0.0.0.0";
diff --git a/metricsd/metricsd_main.cc b/metricsd/metricsd_main.cc
index ae441d0..0178342 100644
--- a/metricsd/metricsd_main.cc
+++ b/metricsd/metricsd_main.cc
@@ -33,10 +33,13 @@
// Upload Service flags.
DEFINE_int32(upload_interval_secs, 1800,
- "Interval at which metrics_daemon sends the metrics. (needs "
- "-uploader)");
+ "Interval at which metricsd uploads the metrics.");
+ DEFINE_int32(disk_persistence_interval_secs, 300,
+ "Interval at which metricsd saves the aggregated metrics to "
+ "disk to avoid losing them if metricsd stops in between "
+ "two uploads.");
DEFINE_string(server, metrics::kMetricsServer,
- "Server to upload the metrics to. (needs -uploader)");
+ "Server to upload the metrics to.");
DEFINE_string(private_directory, metrics::kMetricsdDirectory,
"Path to the private directory used by metricsd "
"(testing only)");
@@ -72,6 +75,7 @@
UploadService upload_service(
FLAGS_server, base::TimeDelta::FromSeconds(FLAGS_upload_interval_secs),
+ base::TimeDelta::FromSeconds(FLAGS_disk_persistence_interval_secs),
base::FilePath(FLAGS_private_directory),
base::FilePath(FLAGS_shared_directory));
diff --git a/metricsd/uploader/metrics_log.cc b/metricsd/uploader/metrics_log.cc
index a01b5da..39655e6 100644
--- a/metricsd/uploader/metrics_log.cc
+++ b/metricsd/uploader/metrics_log.cc
@@ -18,6 +18,8 @@
#include <string>
+#include <base/files/file_util.h>
+
#include "uploader/proto/system_profile.pb.h"
#include "uploader/system_profile_setter.h"
@@ -27,6 +29,40 @@
: MetricsLogBase("", 0, metrics::MetricsLogBase::ONGOING_LOG, "") {
}
+bool MetricsLog::LoadFromFile(const base::FilePath& saved_log) {
+ std::string encoded_log;
+ if (!base::ReadFileToString(saved_log, &encoded_log)) {
+ LOG(ERROR) << "Failed to read the metrics log backup from "
+ << saved_log.value();
+ return false;
+ }
+
+ if (!uma_proto()->ParseFromString(encoded_log)) {
+ LOG(ERROR) << "Failed to parse log from " << saved_log.value()
+ << ", deleting the log";
+ base::DeleteFile(saved_log, false);
+ uma_proto()->Clear();
+ return false;
+ }
+
+ VLOG(1) << uma_proto()->histogram_event_size() << " histograms loaded from "
+ << saved_log.value();
+
+ return true;
+}
+
+bool MetricsLog::SaveToFile(const base::FilePath& path) {
+ std::string encoded_log;
+ GetEncodedLog(&encoded_log);
+
+ if (static_cast<int>(encoded_log.size()) !=
+ base::WriteFile(path, encoded_log.data(), encoded_log.size())) {
+ LOG(ERROR) << "Failed to persist the current log to " << path.value();
+ return false;
+ }
+ return true;
+}
+
void MetricsLog::IncrementUserCrashCount(unsigned int count) {
metrics::SystemProfileProto::Stability* stability(
uma_proto()->mutable_system_profile()->mutable_stability());
diff --git a/metricsd/uploader/metrics_log.h b/metricsd/uploader/metrics_log.h
index b76cd72..9e60b97 100644
--- a/metricsd/uploader/metrics_log.h
+++ b/metricsd/uploader/metrics_log.h
@@ -19,6 +19,7 @@
#include <string>
+#include <base/files/file_path.h>
#include <base/macros.h>
#include "uploader/metrics_log_base.h"
@@ -44,8 +45,15 @@
// Populate the system profile with system information using setter.
bool PopulateSystemProfile(SystemProfileSetter* setter);
+ // Load the log from |path|.
+ bool LoadFromFile(const base::FilePath& path);
+
+ // Save this log to |path|.
+ bool SaveToFile(const base::FilePath& path);
+
private:
friend class UploadServiceTest;
+ FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
FRIEND_TEST(UploadServiceTest, LogContainsAggregatedValues);
FRIEND_TEST(UploadServiceTest, LogContainsCrashCounts);
FRIEND_TEST(UploadServiceTest, LogKernelCrash);
diff --git a/metricsd/uploader/upload_service.cc b/metricsd/uploader/upload_service.cc
index 0d7aacc..ab44b28 100644
--- a/metricsd/uploader/upload_service.cc
+++ b/metricsd/uploader/upload_service.cc
@@ -42,6 +42,7 @@
UploadService::UploadService(const std::string& server,
const base::TimeDelta& upload_interval,
+ const base::TimeDelta& disk_persistence_interval,
const base::FilePath& private_metrics_directory,
const base::FilePath& shared_metrics_directory)
: brillo::Daemon(),
@@ -51,11 +52,19 @@
private_metrics_directory),
counters_(new CrashCounters),
upload_interval_(upload_interval),
+ disk_persistence_interval_(disk_persistence_interval),
metricsd_service_runner_(counters_) {
staged_log_path_ = private_metrics_directory.Append(metrics::kStagedLogName);
+ saved_log_path_ = private_metrics_directory.Append(metrics::kSavedLogName);
consent_file_ = shared_metrics_directory.Append(metrics::kConsentFileName);
}
+void UploadService::LoadSavedLog() {
+ if (base::PathExists(saved_log_path_)) {
+ GetOrCreateCurrentLog()->LoadFromFile(saved_log_path_);
+ }
+}
+
int UploadService::OnInit() {
brillo::Daemon::OnInit();
@@ -64,12 +73,18 @@
system_profile_setter_.reset(new SystemProfileCache());
- base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
- base::Bind(&UploadService::UploadEventCallback,
- base::Unretained(this),
- upload_interval_),
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
upload_interval_);
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
+ disk_persistence_interval_);
+
+ LoadSavedLog();
+
return EX_OK;
}
@@ -78,24 +93,37 @@
}
void UploadService::InitForTest(SystemProfileSetter* setter) {
+ LoadSavedLog();
system_profile_setter_.reset(setter);
}
void UploadService::StartNewLog() {
- CHECK(!HasStagedLog()) << "the staged log should be discarded before "
- << "starting a new metrics log";
- MetricsLog* log = new MetricsLog();
- current_log_.reset(log);
+ current_log_.reset(new MetricsLog());
}
-void UploadService::UploadEventCallback(const base::TimeDelta& interval) {
+void UploadService::UploadEventCallback() {
UploadEvent();
- base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
- base::Bind(&UploadService::UploadEventCallback,
- base::Unretained(this),
- interval),
- interval);
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::UploadEventCallback, base::Unretained(this)),
+ upload_interval_);
+}
+
+void UploadService::PersistEventCallback() {
+ PersistToDisk();
+
+ base::MessageLoop::current()->PostDelayedTask(
+ FROM_HERE,
+ base::Bind(&UploadService::PersistEventCallback, base::Unretained(this)),
+ disk_persistence_interval_);
+}
+
+void UploadService::PersistToDisk() {
+ GatherHistograms();
+ if (current_log_) {
+ current_log_->SaveToFile(saved_log_path_);
+ }
}
void UploadService::UploadEvent() {
@@ -189,14 +217,16 @@
<< "log.";
return;
}
- std::string encoded_log;
- staged_log->GetEncodedLog(&encoded_log);
+
+ if (!base::DeleteFile(saved_log_path_, false)) {
+ // There is a chance that we will upload the same metrics twice but, if we
+ // are lucky, the backup should be overridden before that. In doubt, try not
+ // to lose any metrics.
+ LOG(ERROR) << "failed to delete the last backup of the current log.";
+ }
failed_upload_count_.Set(0);
- if (static_cast<int>(encoded_log.size()) != base::WriteFile(
- staged_log_path_, encoded_log.data(), encoded_log.size())) {
- LOG(ERROR) << "failed to persist to " << staged_log_path_.value();
- }
+ staged_log->SaveToFile(staged_log_path_);
}
MetricsLog* UploadService::GetOrCreateCurrentLog() {
diff --git a/metricsd/uploader/upload_service.h b/metricsd/uploader/upload_service.h
index b84179f..a1d9d3b 100644
--- a/metricsd/uploader/upload_service.h
+++ b/metricsd/uploader/upload_service.h
@@ -66,6 +66,7 @@
public:
UploadService(const std::string& server,
const base::TimeDelta& upload_interval,
+ const base::TimeDelta& disk_persistence_interval,
const base::FilePath& private_metrics_directory,
const base::FilePath& shared_metrics_directory);
@@ -79,8 +80,8 @@
// launch as it is destroyed when staging the log.
void StartNewLog();
- // Event callback for handling MessageLoop events.
- void UploadEventCallback(const base::TimeDelta& interval);
+ // Saves the current metrics to a file.
+ void PersistToDisk();
// Triggers an upload event.
void UploadEvent();
@@ -100,6 +101,8 @@
friend class UploadServiceTest;
FRIEND_TEST(UploadServiceTest, CanSendMultipleTimes);
+ FRIEND_TEST(UploadServiceTest, CorruptedSavedLog);
+ FRIEND_TEST(UploadServiceTest, CurrentLogSavedAndResumed);
FRIEND_TEST(UploadServiceTest, DiscardLogsAfterTooManyFailedUpload);
FRIEND_TEST(UploadServiceTest, EmptyLogsAreNotSent);
FRIEND_TEST(UploadServiceTest, FailedSendAreRetried);
@@ -111,6 +114,7 @@
FRIEND_TEST(UploadServiceTest, LogKernelCrash);
FRIEND_TEST(UploadServiceTest, LogUncleanShutdown);
FRIEND_TEST(UploadServiceTest, LogUserCrash);
+ FRIEND_TEST(UploadServiceTest, PersistEmptyLog);
FRIEND_TEST(UploadServiceTest, UnknownCrashIgnored);
FRIEND_TEST(UploadServiceTest, ValuesInConfigFileAreSent);
@@ -121,12 +125,21 @@
// will be discarded.
static const int kMaxFailedUpload;
+ // Loads the log saved to disk if it exists.
+ void LoadSavedLog();
+
// Resets the internal state.
void Reset();
// Returns true iff metrics reporting is enabled.
bool AreMetricsEnabled();
+ // Event callback for handling Upload events.
+ void UploadEventCallback();
+
+ // Event callback for handling Persist events.
+ void PersistEventCallback();
+
// Aggregates all histogram available in memory and store them in the current
// log.
void GatherHistograms();
@@ -156,11 +169,13 @@
std::shared_ptr<CrashCounters> counters_;
base::TimeDelta upload_interval_;
+ base::TimeDelta disk_persistence_interval_;
MetricsdServiceRunner metricsd_service_runner_;
base::FilePath consent_file_;
base::FilePath staged_log_path_;
+ base::FilePath saved_log_path_;
bool testing_;
};
diff --git a/metricsd/uploader/upload_service_test.cc b/metricsd/uploader/upload_service_test.cc
index bd5b39a..70112f4 100644
--- a/metricsd/uploader/upload_service_test.cc
+++ b/metricsd/uploader/upload_service_test.cc
@@ -45,17 +45,17 @@
ASSERT_FALSE(base::StatisticsRecorder::IsActive());
base::StatisticsRecorder::Initialize();
- base::FilePath private_dir = dir_.path().Append("private");
- base::FilePath shared_dir = dir_.path().Append("shared");
+ private_dir_ = dir_.path().Append("private");
+ shared_dir_ = dir_.path().Append("shared");
- EXPECT_TRUE(base::CreateDirectory(private_dir));
- EXPECT_TRUE(base::CreateDirectory(shared_dir));
+ EXPECT_TRUE(base::CreateDirectory(private_dir_));
+ EXPECT_TRUE(base::CreateDirectory(shared_dir_));
- ASSERT_EQ(0, base::WriteFile(shared_dir.Append(metrics::kConsentFileName),
+ ASSERT_EQ(0, base::WriteFile(shared_dir_.Append(metrics::kConsentFileName),
"", 0));
- upload_service_.reset(
- new UploadService("", base::TimeDelta(), private_dir, shared_dir));
+ upload_service_.reset(new UploadService(
+ "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
counters_ = upload_service_->counters_;
upload_service_->sender_.reset(new SenderMock);
@@ -81,15 +81,16 @@
base::FilePath filepath =
dir_.path().Append("etc/os-release.d").Append(name);
ASSERT_TRUE(base::CreateDirectory(filepath.DirName()));
- ASSERT_EQ(
- value.size(),
- base::WriteFile(filepath, value.data(), value.size()));
+ ASSERT_EQ(value.size(),
+ base::WriteFile(filepath, value.data(), value.size()));
}
const metrics::SystemProfileProto_Stability GetCurrentStability() {
EXPECT_TRUE(upload_service_->current_log_.get());
- return upload_service_->current_log_->uma_proto()->system_profile().stability();
+ return upload_service_->current_log_->uma_proto()
+ ->system_profile()
+ .stability();
}
base::ScopedTempDir dir_;
@@ -97,6 +98,8 @@
std::unique_ptr<base::AtExitManager> exit_manager_;
std::shared_ptr<CrashCounters> counters_;
+ base::FilePath private_dir_;
+ base::FilePath shared_dir_;
};
TEST_F(UploadServiceTest, FailedSendAreRetried) {
@@ -219,10 +222,8 @@
}
TEST_F(UploadServiceTest, ExtractChannelFromString) {
- EXPECT_EQ(
- SystemProfileCache::ProtoChannelFromString(
- "developer-build"),
- metrics::SystemProfileProto::CHANNEL_UNKNOWN);
+ EXPECT_EQ(SystemProfileCache::ProtoChannelFromString("developer-build"),
+ metrics::SystemProfileProto::CHANNEL_UNKNOWN);
EXPECT_EQ(metrics::SystemProfileProto::CHANNEL_DEV,
SystemProfileCache::ProtoChannelFromString("dev-channel"));
@@ -297,3 +298,38 @@
SetTestingProperty(metrics::kProductId, "hello");
ASSERT_TRUE(cache.Initialize());
}
+
+TEST_F(UploadServiceTest, CurrentLogSavedAndResumed) {
+ SendHistogram("hello", 10, 0, 100, 10);
+ upload_service_->PersistToDisk();
+ EXPECT_EQ(
+ 1, upload_service_->current_log_->uma_proto()->histogram_event().size());
+ upload_service_.reset(new UploadService(
+ "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
+ upload_service_->InitForTest(nullptr);
+
+ SendHistogram("hello", 10, 0, 100, 10);
+ upload_service_->GatherHistograms();
+ EXPECT_EQ(2, upload_service_->GetOrCreateCurrentLog()
+ ->uma_proto()
+ ->histogram_event()
+ .size());
+}
+
+TEST_F(UploadServiceTest, PersistEmptyLog) {
+ upload_service_->PersistToDisk();
+ EXPECT_FALSE(base::PathExists(upload_service_->saved_log_path_));
+}
+
+TEST_F(UploadServiceTest, CorruptedSavedLog) {
+ // Write a bogus saved log.
+ EXPECT_EQ(5, base::WriteFile(upload_service_->saved_log_path_, "hello", 5));
+
+ upload_service_.reset(new UploadService(
+ "", base::TimeDelta(), base::TimeDelta(), private_dir_, shared_dir_));
+
+ upload_service_->InitForTest(nullptr);
+ // If the log is unreadable, we drop it and continue execution.
+ ASSERT_NE(nullptr, upload_service_->GetOrCreateCurrentLog());
+ ASSERT_FALSE(base::PathExists(upload_service_->saved_log_path_));
+}