Log active use time between kernel crashes.

Also, initialize the network state from flimflam, just in case -- now
that the metrics daemon process starts a bit late,

BUG=none
TEST=unit tests, ran on the device, emerged arm-generic

Review URL: http://codereview.chromium.org/2864009
diff --git a/metrics/Makefile b/metrics/Makefile
index fa68a40..2986078 100644
--- a/metrics/Makefile
+++ b/metrics/Makefile
@@ -35,7 +35,6 @@
 	c_metrics_library.o \
 	metrics_library.o
 TESTLIB_OBJS = \
-	metrics_library.o \
 	metrics_library_test.o
 
 TESTCOUNTER_LIBS = -lgmock -lgtest -lbase -lrt -lpthread
@@ -71,17 +70,6 @@
 %.o: %.cc
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
-# dependencies in addition to those defined by the rules
-
-metrics_daemon.o: \
-	metrics_daemon.h \
-	network_states.h \
-	power_states.h
-metrics_daemon_test.o: \
-	metrics_daemon.h \
-	network_states.h \
-	power_states.h
-
 clean:
 	rm -f $(CLIENT) $(DAEMON) $(LIB) $(SHAREDLIB) *.o
 	rm -f $(COUNTER_TEST) $(DAEMON_TEST) $(LIB_TEST)
diff --git a/metrics/metrics_daemon.cc b/metrics/metrics_daemon.cc
index 6cbe8e8..30d3da8 100644
--- a/metrics/metrics_daemon.cc
+++ b/metrics/metrics_daemon.cc
@@ -6,6 +6,7 @@
 
 #include <dbus/dbus-glib-lowlevel.h>
 
+#include <base/file_util.h>
 #include <base/logging.h>
 
 #include "counter.h"
@@ -13,6 +14,7 @@
 using base::Time;
 using base::TimeDelta;
 using base::TimeTicks;
+using std::string;
 
 #define SAFE_MESSAGE(e) (e.message ? e.message : "unknown error")
 #define DBUS_IFACE_CRASH_REPORTER "org.chromium.CrashReporter"
@@ -20,13 +22,6 @@
 #define DBUS_IFACE_POWER_MANAGER "org.chromium.PowerManager"
 #define DBUS_IFACE_SESSION_MANAGER "org.chromium.SessionManagerInterface"
 
-// File to aggregate daily usage before sending to UMA.
-// TODO(petkov): This file should probably live in a user-specific stateful
-// location, e.g., /home/chronos/user.
-static const char kDailyUseRecordFile[] = "/var/log/metrics/daily-usage";
-static const char kUserCrashIntervalRecordFile[] =
-    "/var/log/metrics/user-crash-interval";
-
 static const int kSecondsPerMinute = 60;
 static const int kMinutesPerHour = 60;
 static const int kHoursPerDay = 24;
@@ -51,6 +46,12 @@
 const int MetricsDaemon::kMetricDailyUseTimeMax = kMinutesPerDay;
 const int MetricsDaemon::kMetricDailyUseTimeBuckets = 50;
 
+const char MetricsDaemon::kMetricKernelCrashIntervalName[] =
+    "Logging.KernelCrashInterval";
+const int MetricsDaemon::kMetricKernelCrashIntervalMin = 1;
+const int MetricsDaemon::kMetricKernelCrashIntervalMax = 4 * kSecondsPerWeek;
+const int MetricsDaemon::kMetricKernelCrashIntervalBuckets = 50;
+
 const char MetricsDaemon::kMetricTimeToNetworkDropName[] =
     "Network.TimeToDrop";
 const int MetricsDaemon::kMetricTimeToNetworkDropMin = 1;
@@ -106,6 +107,53 @@
 #include "session_states.h"
 };
 
+// Invokes a remote method over D-Bus that takes no input arguments
+// and returns a string result. The method call is issued with a 2
+// second blocking timeout. Returns an empty string on failure or
+// timeout.
+static string DBusGetString(DBusConnection* connection,
+                            const string& destination,
+                            const string& path,
+                            const string& interface,
+                            const string& method) {
+  DBusMessage* message =
+      dbus_message_new_method_call(destination.c_str(),
+                                   path.c_str(),
+                                   interface.c_str(),
+                                   method.c_str());
+  if (!message) {
+    DLOG(WARNING) << "DBusGetString: unable to allocate a message";
+    return "";
+  }
+
+  DBusError error;
+  dbus_error_init(&error);
+  const int kTimeout = 2000;  // ms
+  DLOG(INFO) << "DBusGetString: dest=" << destination << " path=" << path
+             << " iface=" << interface << " method=" << method;
+  DBusMessage* reply =
+      dbus_connection_send_with_reply_and_block(connection, message, kTimeout,
+                                                &error);
+  dbus_message_unref(message);
+  if (dbus_error_is_set(&error) || !reply) {
+    DLOG(WARNING) << "DBusGetString: call failed";
+    return "";
+  }
+  DBusMessageIter iter;
+  dbus_message_iter_init(reply, &iter);
+  if (dbus_message_iter_get_arg_type(&iter) != DBUS_TYPE_STRING) {
+    NOTREACHED();
+    dbus_message_unref(reply);
+    return "";
+  }
+  const char* c_result = "";
+  dbus_message_iter_get_basic(&iter, &c_result);
+  string result = c_result;
+  DLOG(INFO) << "DBusGetString: result=" << result;
+  dbus_message_unref(reply);
+  return result;
+}
+
 MetricsDaemon::MetricsDaemon()
     : network_state_(kUnknownNetworkState),
       power_state_(kUnknownPowerState),
@@ -117,21 +165,35 @@
 MetricsDaemon::~MetricsDaemon() {}
 
 void MetricsDaemon::Run(bool run_as_daemon) {
-  if (!run_as_daemon || daemon(0, 0) == 0) {
-    Loop();
-  }
+  if (run_as_daemon && daemon(0, 0) != 0)
+    return;
+
+  static const char kKernelCrashDetectedFile[] = "/tmp/kernel-crash-detected";
+  CheckKernelCrash(kKernelCrashDetectedFile);
+  Loop();
 }
 
 void MetricsDaemon::Init(bool testing, MetricsLibraryInterface* metrics_lib) {
   testing_ = testing;
   DCHECK(metrics_lib != NULL);
   metrics_lib_ = metrics_lib;
+
+  static const char kDailyUseRecordFile[] = "/var/log/metrics/daily-usage";
   daily_use_.reset(new chromeos_metrics::TaggedCounter());
   daily_use_->Init(kDailyUseRecordFile, &DailyUseReporter, this);
+
+  static const char kUserCrashIntervalRecordFile[] =
+      "/var/log/metrics/user-crash-interval";
   user_crash_interval_.reset(new chromeos_metrics::TaggedCounter());
   user_crash_interval_->Init(kUserCrashIntervalRecordFile,
                              &UserCrashIntervalReporter, this);
 
+  static const char kKernelCrashIntervalRecordFile[] =
+      "/var/log/metrics/kernel-crash-interval";
+  kernel_crash_interval_.reset(new chromeos_metrics::TaggedCounter());
+  kernel_crash_interval_->Init(kKernelCrashIntervalRecordFile,
+                               &KernelCrashIntervalReporter, this);
+
   // Don't setup D-Bus and GLib in test mode.
   if (testing)
     return;
@@ -162,6 +224,11 @@
   // the registered D-Bus matches is successful. The daemon is not
   // activated for D-Bus messages that don't match.
   CHECK(dbus_connection_add_filter(connection, MessageFilter, this, NULL));
+
+  // Initializes the current network state by retrieving it from flimflam.
+  string state_name = DBusGetString(connection, "org.chromium.flimflam", "/",
+                                    DBUS_IFACE_FLIMFLAM_MANAGER, "GetState");
+  NetStateChanged(state_name.c_str(), TimeTicks::Now());
 }
 
 void MetricsDaemon::Loop() {
@@ -320,6 +387,7 @@
   int day = since_epoch.InDays();
   daily_use_->Update(day, seconds);
   user_crash_interval_->Update(0, seconds);
+  kernel_crash_interval_->Update(0, seconds);
 
   // Schedules a use monitor on inactive->active transitions and
   // unschedules it on active->inactive transitions.
@@ -342,6 +410,27 @@
   user_crash_interval_->Flush();
 }
 
+void MetricsDaemon::ProcessKernelCrash() {
+  // Counts the active use time up to now.
+  SetUserActiveState(user_active_, Time::Now());
+
+  // Reports the active use time since the last crash and resets it.
+  kernel_crash_interval_->Flush();
+}
+
+void MetricsDaemon::CheckKernelCrash(const std::string& crash_file) {
+  FilePath crash_detected(crash_file);
+  if (!file_util::PathExists(crash_detected))
+    return;
+
+  ProcessKernelCrash();
+
+  // Deletes the crash-detected file so that the daemon doesn't report
+  // another kernel crash in case it's restarted.
+  file_util::Delete(crash_detected,
+                    false);  // recursive
+}
+
 // static
 gboolean MetricsDaemon::UseMonitorStatic(gpointer data) {
   return static_cast<MetricsDaemon*>(data)->UseMonitor() ? TRUE : FALSE;
@@ -425,7 +514,17 @@
                      kMetricUserCrashIntervalBuckets);
 }
 
-void MetricsDaemon::SendMetric(const std::string& name, int sample,
+// static
+void MetricsDaemon::KernelCrashIntervalReporter(void* handle,
+                                                int tag, int count) {
+  MetricsDaemon* daemon = static_cast<MetricsDaemon*>(handle);
+  daemon->SendMetric(kMetricKernelCrashIntervalName, count,
+                     kMetricKernelCrashIntervalMin,
+                     kMetricKernelCrashIntervalMax,
+                     kMetricKernelCrashIntervalBuckets);
+}
+
+void MetricsDaemon::SendMetric(const string& name, int sample,
                                int min, int max, int nbuckets) {
   DLOG(INFO) << "received metric: " << name << " " << sample << " "
              << min << " " << max << " " << nbuckets;
diff --git a/metrics/metrics_daemon.h b/metrics/metrics_daemon.h
index 50958b8..437cafd 100644
--- a/metrics/metrics_daemon.h
+++ b/metrics/metrics_daemon.h
@@ -31,7 +31,9 @@
 
  private:
   friend class MetricsDaemonTest;
+  FRIEND_TEST(MetricsDaemonTest, CheckKernelCrash);
   FRIEND_TEST(MetricsDaemonTest, DailyUseReporter);
+  FRIEND_TEST(MetricsDaemonTest, KernelCrashIntervalReporter);
   FRIEND_TEST(MetricsDaemonTest, LookupNetworkState);
   FRIEND_TEST(MetricsDaemonTest, LookupPowerState);
   FRIEND_TEST(MetricsDaemonTest, LookupScreenSaverState);
@@ -40,6 +42,7 @@
   FRIEND_TEST(MetricsDaemonTest, NetStateChangedSimpleDrop);
   FRIEND_TEST(MetricsDaemonTest, NetStateChangedSuspend);
   FRIEND_TEST(MetricsDaemonTest, PowerStateChanged);
+  FRIEND_TEST(MetricsDaemonTest, ProcessKernelCrash);
   FRIEND_TEST(MetricsDaemonTest, ProcessUserCrash);
   FRIEND_TEST(MetricsDaemonTest, ScreenSaverStateChanged);
   FRIEND_TEST(MetricsDaemonTest, SendMetric);
@@ -85,6 +88,10 @@
   static const int kMetricDailyUseTimeMin;
   static const int kMetricDailyUseTimeMax;
   static const int kMetricDailyUseTimeBuckets;
+  static const char kMetricKernelCrashIntervalName[];
+  static const int kMetricKernelCrashIntervalMin;
+  static const int kMetricKernelCrashIntervalMax;
+  static const int kMetricKernelCrashIntervalBuckets;
   static const char kMetricTimeToNetworkDropName[];
   static const int kMetricTimeToNetworkDropMin;
   static const int kMetricTimeToNetworkDropMax;
@@ -152,6 +159,14 @@
   // process crashes.
   void ProcessUserCrash();
 
+  // Updates the active use time and logs time between kernel crashes.
+  void ProcessKernelCrash();
+
+  // Checks if a kernel crash has been detected and processes if so.
+  // The method assumes that a kernel crash has happened if
+  // |crash_file| exists.
+  void CheckKernelCrash(const std::string& crash_file);
+
   // Callbacks for the daily use monitor. The daily use monitor uses
   // LogDailyUseRecord to aggregate current usage data and send it to
   // UMA, if necessary. It also reschedules itself using an
@@ -187,6 +202,10 @@
   // crashes and send to UMA.
   static void UserCrashIntervalReporter(void* data, int tag, int count);
 
+  // TaggedCounter callback to process time between kernel crashes and
+  // send to UMA.
+  static void KernelCrashIntervalReporter(void* data, int tag, int count);
+
   // Test mode.
   bool testing_;
 
@@ -222,6 +241,9 @@
   // Active use time between user-space process crashes.
   scoped_ptr<chromeos_metrics::TaggedCounterInterface> user_crash_interval_;
 
+  // Active use time between kernel crashes.
+  scoped_ptr<chromeos_metrics::TaggedCounterInterface> kernel_crash_interval_;
+
   // Sleep period until the next daily usage aggregation performed by
   // the daily use monitor (see ScheduleUseMonitor).
   int usemon_interval_;
diff --git a/metrics/metrics_daemon_test.cc b/metrics/metrics_daemon_test.cc
index 1f2c0fa..e75c161 100644
--- a/metrics/metrics_daemon_test.cc
+++ b/metrics/metrics_daemon_test.cc
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <base/file_util.h>
 #include <gtest/gtest.h>
 
 #include "counter_mock.h"
@@ -42,16 +43,20 @@
  protected:
   virtual void SetUp() {
     EXPECT_EQ(NULL, daemon_.daily_use_.get());
+    EXPECT_EQ(NULL, daemon_.kernel_crash_interval_.get());
     EXPECT_EQ(NULL, daemon_.user_crash_interval_.get());
     daemon_.Init(true, &metrics_lib_);
 
     // Tests constructor initialization. Switches to mock counters.
     EXPECT_TRUE(NULL != daemon_.daily_use_.get());
+    EXPECT_TRUE(NULL != daemon_.kernel_crash_interval_.get());
     EXPECT_TRUE(NULL != daemon_.user_crash_interval_.get());
 
     // Allocates mock counter and transfers ownership.
     daily_use_ = new StrictMock<TaggedCounterMock>();
     daemon_.daily_use_.reset(daily_use_);
+    kernel_crash_interval_ = new StrictMock<TaggedCounterMock>();
+    daemon_.kernel_crash_interval_.reset(kernel_crash_interval_);
     user_crash_interval_ = new StrictMock<TaggedCounterMock>();
     daemon_.user_crash_interval_.reset(user_crash_interval_);
 
@@ -71,6 +76,9 @@
     EXPECT_CALL(*daily_use_, Update(daily_tag, count))
         .Times(1)
         .RetiresOnSaturation();
+    EXPECT_CALL(*kernel_crash_interval_, Update(0, count))
+        .Times(1)
+        .RetiresOnSaturation();
     EXPECT_CALL(*user_crash_interval_, Update(0, count))
         .Times(1)
         .RetiresOnSaturation();
@@ -82,6 +90,9 @@
     EXPECT_CALL(*daily_use_, Update(_, _))
         .Times(1)
         .RetiresOnSaturation();
+    EXPECT_CALL(*kernel_crash_interval_, Update(_, _))
+        .Times(1)
+        .RetiresOnSaturation();
     EXPECT_CALL(*user_crash_interval_, Update(_, _))
         .Times(1)
         .RetiresOnSaturation();
@@ -159,9 +170,24 @@
   // update calls are marked as failures. They are pointers so that
   // they can replace the scoped_ptr's allocated by the daemon.
   StrictMock<TaggedCounterMock>* daily_use_;
+  StrictMock<TaggedCounterMock>* kernel_crash_interval_;
   StrictMock<TaggedCounterMock>* user_crash_interval_;
 };
 
+TEST_F(MetricsDaemonTest, CheckKernelCrash) {
+  static const char kKernelCrashDetected[] = "test-kernel-crash-detected";
+  daemon_.CheckKernelCrash(kKernelCrashDetected);
+
+  FilePath crash_detected(kKernelCrashDetected);
+  file_util::WriteFile(crash_detected, "", 0);
+  IgnoreActiveUseUpdate();
+  EXPECT_CALL(*kernel_crash_interval_, Flush())
+      .Times(1)
+      .RetiresOnSaturation();
+  daemon_.CheckKernelCrash(kKernelCrashDetected);
+  file_util::Delete(crash_detected, false);
+}
+
 TEST_F(MetricsDaemonTest, DailyUseReporter) {
   ExpectDailyUseTimeMetric(/* sample */ 2);
   MetricsDaemon::DailyUseReporter(&daemon_, /* tag */ 20, /* count */ 90);
@@ -174,6 +200,14 @@
   MetricsDaemon::DailyUseReporter(&daemon_, /* tag */ 60, /* count */ -5);
 }
 
+TEST_F(MetricsDaemonTest, KernelCrashIntervalReporter) {
+  ExpectMetric(MetricsDaemon::kMetricKernelCrashIntervalName, 50,
+               MetricsDaemon::kMetricKernelCrashIntervalMin,
+               MetricsDaemon::kMetricKernelCrashIntervalMax,
+               MetricsDaemon::kMetricKernelCrashIntervalBuckets);
+  MetricsDaemon::KernelCrashIntervalReporter(&daemon_, 0, 50);
+}
+
 TEST_F(MetricsDaemonTest, LookupNetworkState) {
   EXPECT_EQ(MetricsDaemon::kNetworkStateOnline,
             daemon_.LookupNetworkState("online"));
@@ -340,6 +374,14 @@
   EXPECT_EQ(TestTime(7 * kSecondsPerDay + 185), daemon_.user_active_last_);
 }
 
+TEST_F(MetricsDaemonTest, ProcessKernelCrash) {
+  IgnoreActiveUseUpdate();
+  EXPECT_CALL(*kernel_crash_interval_, Flush())
+      .Times(1)
+      .RetiresOnSaturation();
+  daemon_.ProcessKernelCrash();
+}
+
 TEST_F(MetricsDaemonTest, ProcessUserCrash) {
   IgnoreActiveUseUpdate();
   EXPECT_CALL(*user_crash_interval_, Flush())