Add udev event-based crash reporting

This allows crash_reporter to handle nonfatal errors from the kernel
passed thru udev events.  There is a udev .rules file for invoking crash
reporter.  Currently it supports DRM error info logging.

BUG=chrome-os-partner:6492
TEST=Run:
"crash_reporter --udev=ACTION=change:KERNEL=card0:SUBSYSTEM=drm"
Check that a card0-drm log file has been created under /var/spool/crash

Change-Id: I05a1d508e5446988575b0c1924878b8e36ae46bb
Signed-off-by: Simon Que <sque@chromium.org>
Reviewed-on: https://gerrit.chromium.org/gerrit/10618
diff --git a/crash_reporter/99-crash-reporter.rules b/crash_reporter/99-crash-reporter.rules
new file mode 100644
index 0000000..25a1504
--- /dev/null
+++ b/crash_reporter/99-crash-reporter.rules
@@ -0,0 +1 @@
+ACTION=="change", SUBSYSTEM=="drm", KERNEL=="card0", ENV{ERROR}=="1", RUN+="/sbin/crash_reporter --udev=KERNEL=card0:SUBSYSTEM=drm:ACTION=change"
diff --git a/crash_reporter/crash_collector.cc b/crash_reporter/crash_collector.cc
index fc7a52c..6046dc5 100644
--- a/crash_reporter/crash_collector.cc
+++ b/crash_reporter/crash_collector.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -14,6 +14,7 @@
 #include <inttypes.h>
 
 #include <set>
+#include <vector>
 
 #include "base/eintr_wrapper.h"
 #include "base/file_util.h"
@@ -24,12 +25,16 @@
 
 static const char kCollectChromeFile[] =
     "/mnt/stateful_partition/etc/collect_chrome_crashes";
+static const char kCollectUdevSignature[] = "crash_reporter-udev-collection";
 static const char kCrashTestInProgressPath[] = "/tmp/crash-test-in-progress";
+static const char kDefaultLogConfig[] = "/etc/crash_reporter_logs.conf";
 static const char kDefaultUserName[] = "chronos";
 static const char kLeaveCoreFile[] = "/root/.leave_core";
 static const char kLsbRelease[] = "/etc/lsb-release";
 static const char kShellPath[] = "/bin/sh";
 static const char kSystemCrashPath[] = "/var/spool/crash";
+static const char kUdevExecName[] = "udev";
+static const char kUdevSignatureKey[] = "sig";
 static const char kUserCrashPath[] = "/home/chronos/user/crash";
 
 // Directory mode of the user crash spool directory.
@@ -69,6 +74,59 @@
   is_feedback_allowed_function_ = is_feedback_allowed_function;
 }
 
+bool CrashCollector::HandleUdevCrash(const std::string &udev_event) {
+  // Process the udev event string.
+  // The udev string should be formatted as follows:
+  //   "ACTION=[action]:KERNEL=[name]:SUBSYSTEM=[subsystem]"
+  // The values don't have to be in any particular order.
+
+  // First get all the key-value pairs.
+  std::vector<std::pair<std::string, std::string> > udev_event_keyval;
+  base::SplitStringIntoKeyValuePairs(udev_event, '=', ':', &udev_event_keyval);
+  std::vector<std::pair<std::string, std::string> >::const_iterator iter;
+  std::map<std::string, std::string> udev_event_map;
+  for (iter = udev_event_keyval.begin();
+       iter != udev_event_keyval.end();
+       ++iter) {
+    udev_event_map[iter->first] = iter->second;
+  }
+
+  // Construct the basename string for crash_reporter_logs.conf:
+  //   "crash_reporter-udev-collection-[action]-[name]-[subsystem]"
+  // If a udev field is not provided, "" is used in its place, e.g.:
+  //   "crash_reporter-udev-collection-[action]--[subsystem]"
+  // Hence, "" is used as a wildcard name string.
+  std::string basename = udev_event_map["ACTION"] + "-" +
+                         udev_event_map["KERNEL"] + "-" +
+                         udev_event_map["SUBSYSTEM"];
+  std::string udev_log_name = std::string(kCollectUdevSignature) + '-' +
+                              basename;
+
+  // Make sure the crash directory exists, or create it if it doesn't.
+  FilePath crash_directory;
+  if (!GetCreatedCrashDirectoryByEuid(0, &crash_directory, NULL)) {
+    LOG(ERROR) << "Could not get crash directory.";
+    return false;
+  }
+  // Create the destination path.
+  std::string log_file_name =
+      FormatDumpBasename(basename, time(NULL), 0);
+  FilePath crash_path = GetCrashPath(crash_directory, log_file_name, "log");
+
+  // Handle the crash.
+  bool result = GetLogContents(FilePath(kDefaultLogConfig), udev_log_name,
+                               crash_path);
+  if (!result) {
+    LOG(ERROR) << "Error reading udev log info " << udev_log_name;
+    return false;
+  }
+
+  AddCrashMetaData(kUdevSignatureKey, kCollectUdevSignature);
+  WriteCrashMetaData(GetCrashPath(crash_directory, log_file_name, "meta"),
+                     kUdevExecName, crash_path.value());
+  return true;
+}
+
 int CrashCollector::WriteNewFile(const FilePath &filename,
                                  const char *data,
                                  int size) {
diff --git a/crash_reporter/crash_collector.h b/crash_reporter/crash_collector.h
index 8759fc8..aa00416 100644
--- a/crash_reporter/crash_collector.h
+++ b/crash_reporter/crash_collector.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -28,6 +28,8 @@
   void Initialize(CountCrashFunction count_crash,
                   IsFeedbackAllowedFunction is_metrics_allowed);
 
+  bool HandleUdevCrash(const std::string &udev_event);
+
  protected:
   friend class CrashCollectorTest;
   FRIEND_TEST(CrashCollectorTest, CheckHasCapacityCorrectBasename);
diff --git a/crash_reporter/crash_reporter.cc b/crash_reporter/crash_reporter.cc
index ac3964b..263caf4 100644
--- a/crash_reporter/crash_reporter.cc
+++ b/crash_reporter/crash_reporter.cc
@@ -1,14 +1,16 @@
-// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include <fcntl.h>  // for open
 
 #include <string>
+#include <vector>
 
 #include "base/file_util.h"
 #include "base/command_line.h"
 #include "base/logging.h"
+#include "base/string_split.h"
 #include "base/string_util.h"
 #include "chromeos/syslog_logging.h"
 #include "crash-reporter/kernel_collector.h"
@@ -25,6 +27,7 @@
 DEFINE_bool(crash_test, false, "Crash test");
 DEFINE_string(user, "", "User crash info (pid:signal:exec_name)");
 DEFINE_bool(unclean_check, true, "Check for unclean shutdown");
+DEFINE_string(udev, "", "Udev event description (type:device:subsystem)");
 #pragma GCC diagnostic error "-Wstrict-aliasing"
 
 static const char kCrashCounterHistogram[] = "Logging.CrashCounter";
@@ -160,6 +163,19 @@
   return 0;
 }
 
+static int HandleUdevCrash(CrashCollector *udev_collector) {
+  // Handle a crash indicated by a udev event.
+  CHECK(!FLAGS_udev.empty()) << "--udev= must be set";
+
+  // Accumulate logs to help in diagnosing failures during user collection.
+  chromeos::LogToString(true);
+  bool handled = udev_collector->HandleUdevCrash(FLAGS_udev);
+  chromeos::LogToString(false);
+  if (!handled)
+    return 1;
+  return 0;
+}
+
 // Interactive/diagnostics mode for generating kernel crash signatures.
 static int GenerateKernelSignature(KernelCollector *kernel_collector) {
   std::string kcrash_contents;
@@ -236,5 +252,10 @@
     return GenerateKernelSignature(&kernel_collector);
   }
 
+  if (!FLAGS_udev.empty()) {
+    CrashCollector udev_collector;
+    return HandleUdevCrash(&udev_collector);
+  }
+
   return HandleUserCrash(&user_collector);
 }
diff --git a/crash_reporter/crash_reporter_logs.conf b/crash_reporter/crash_reporter_logs.conf
index c91d4d5..5650346 100644
--- a/crash_reporter/crash_reporter_logs.conf
+++ b/crash_reporter/crash_reporter_logs.conf
@@ -1,4 +1,4 @@
-# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can
 # be found in the LICENSE file.
 
@@ -27,6 +27,10 @@
 # the commands' arguments (i.e. "comm" instead of "command").
 crash_reporter-user-collection:echo "===ps output==="; ps axw -o user,pid,%cpu,%mem,vsz,rss,tname,stat,start_time,bsdtime,comm | tail -c 25000; echo "===dmesg output==="; dmesg | tail -c 25000; echo "===meminfo==="; cat /proc/meminfo
 
+# This rule is similar to the crash_reporter-user-collection rule, except it is
+# run for kernel errors reported through udev events.
+crash_reporter-udev-collection-change-card0-drm:for dri in /sys/kernel/debug/dri/*; do echo "===$dri/i915_error_state==="; cat $dri/i915_error_state; done
+
 # The following rules are only for testing purposes.
 crash_log_test:echo hello world
 crash_log_recursion_test:sleep 1 && /usr/local/autotest/tests/crash_log_recursion_test