crash-reporter: add support for device coredump

Copy the device coredump file to /var/spool/crash directory when
udev event "devcoredump" is detected. Also include the last 50 lines
of /var/log/messages and /var/log/net.log in the coredump file for
debugging purpose. Only perform the collection if feedback is allowed
(metrics are enabled) or the device is running a developer image.

By default, crash-reporter will not upload device coredumps to the crash
server. The user can enable device coredumps upload via a to-be-added
crosh command, which should only be done at the direction of CrOS engineers.

BUG=chromium:464872
TEST=USE="asan clang" FEATURES=test emerge-$BOARD crash-reporter
Manual Test:
1. Trigger wifi firmware error on a Clapper running developer image by
   running following command:
   "echo 1 > /sys/kernel/debug/iwlwifi/0000\:01\:00.0/iwlmvm/fw_restart"
2. Verify there is a device coredump file in "/var/spool/crash/"
   "devcoredump_iwlwifi*.devcore", as well as the additional log file
   ".log" and the meta file ".meta".

Change-Id: Ic4cf67d4b5715a6f422505f409276d1261b7d51f
Reviewed-on: https://chromium-review.googlesource.com/257091
Reviewed-by: Zeping Qiu <zqiu@chromium.org>
Commit-Queue: Zeping Qiu <zqiu@chromium.org>
Tested-by: Zeping Qiu <zqiu@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
diff --git a/crash_reporter/99-crash-reporter.rules b/crash_reporter/99-crash-reporter.rules
index 57a71ca..aea5b1c 100644
--- a/crash_reporter/99-crash-reporter.rules
+++ b/crash_reporter/99-crash-reporter.rules
@@ -3,3 +3,4 @@
 ACTION=="change", SUBSYSTEM=="i2c", DRIVER=="cyapa", ENV{ERROR}=="1", RUN+="/sbin/crash_reporter --udev=SUBSYSTEM=i2c-cyapa:ACTION=change"
 # For detecting Atmel trackpad/touchscreen issue. Passing into crash_reporter SUBSYSTEM=i2c-atmel_mxt_ts since crash_reporter does not handle DRIVER string.
 ACTION=="change", SUBSYSTEM=="i2c", DRIVER=="atmel_mxt_ts", ENV{ERROR}=="1", RUN+="/sbin/crash_reporter --udev=SUBSYSTEM=i2c-atmel_mxt_ts:ACTION=change"
+ACTION=="add", SUBSYSTEM=="devcoredump", RUN+="/sbin/crash_reporter --udev=SUBSYSTEM=devcoredump:ACTION=add:KERNEL_NUMBER=%n"
diff --git a/crash_reporter/crash_reporter_logs.conf b/crash_reporter/crash_reporter_logs.conf
index 1a69032..f5ca80c 100644
--- a/crash_reporter/crash_reporter_logs.conf
+++ b/crash_reporter/crash_reporter_logs.conf
@@ -74,6 +74,15 @@
 # Periodically collect touch event log for debugging (crosbug.com/p/17244)
 crash_reporter-udev-collection---TouchEvent=cat /var/log/touch_event.log
 
+# Collect the last 50 lines of /var/log/messages and /var/log/net.log for
+# intel wifi driver (iwlwifi) for debugging purpose.
+crash_reporter-udev-collection-devcoredump-iwlwifi=\
+  echo "===/var/log/messages==="; \
+  tail -n 50 /var/log/messages; \
+  echo "===/var/log/net.log==="; \
+  tail -n 50 /var/log/net.log; \
+  echo EOF
+
 # Dump the last 50 lines of the last two powerd log files -- if the job has
 # already restarted, we want to see the end of the previous instance's logs.
 powerd=\
diff --git a/crash_reporter/crash_sender b/crash_reporter/crash_sender
index c83a12a..2ad1e46 100755
--- a/crash_reporter/crash_sender
+++ b/crash_reporter/crash_sender
@@ -69,6 +69,10 @@
 # Maximum time to sleep between sends.
 SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600}
 
+# Set this to 1 to allow uploading of device coredumps.
+DEVCOREDUMP_UPLOAD_FLAG_FILE=\
+"/var/lib/crash_reporter/device_coredump_upload_allowed"
+
 # The syslog tag for all logging we emit.
 TAG="$(basename $0)[$$]"
 
@@ -156,6 +160,12 @@
   crossystem "devsw_boot?1"  # exit status will be accurate
 }
 
+# Return 0 if the uploading of device coredumps is allowed.
+is_device_coredump_upload_allowed() {
+  [ -f "${DEVCOREDUMP_UPLOAD_FLAG_FILE}" ] && return 0
+  return 1
+}
+
 # Generate a uniform random number in 0..max-1.
 generate_uniform_random() {
   local max=$1
@@ -508,7 +518,8 @@
     local kind=$(get_kind "${meta_path}")
     if [ "${kind}" != "minidump" ] && \
        [ "${kind}" != "kcrash" ] && \
-       [ "${kind}" != "log" ]; then
+       [ "${kind}" != "log" ] &&
+       [ "${kind}" != "devcore" ]; then
       lecho "Unknown report kind ${kind}.  Removing report."
       remove_report "${meta_path}"
       continue
@@ -527,6 +538,12 @@
       continue
     fi
 
+    # Ignore device coredump if device coredump uploading is not allowed.
+    if [ "${kind}" = "devcore" ] && ! is_device_coredump_upload_allowed; then
+      lecho "Ignoring device coredump. Device coredump upload not allowed."
+      continue
+    fi
+
     if ! is_mock && ! is_official_image; then
       lecho "Not an official OS version.  Removing crash."
       remove_report "${meta_path}"
diff --git a/crash_reporter/udev_collector.cc b/crash_reporter/udev_collector.cc
index 64ec339..908bbc9 100644
--- a/crash_reporter/udev_collector.cc
+++ b/crash_reporter/udev_collector.cc
@@ -8,26 +8,41 @@
 #include <utility>
 #include <vector>
 
+#include <base/files/file_enumerator.h>
 #include <base/files/file_util.h>
 #include <base/logging.h>
+#include <base/strings/string_number_conversions.h>
 #include <base/strings/string_split.h>
 #include <base/strings/string_util.h>
+#include <base/strings/stringprintf.h>
 #include <chromeos/process.h>
 
-static const char kCollectUdevSignature[] = "crash_reporter-udev-collection";
-static const char kGzipPath[] = "/bin/gzip";
-static const char kUdevExecName[] = "udev";
-static const char kUdevSignatureKey[] = "sig";
-
 using base::FilePath;
 
-UdevCollector::UdevCollector() {}
+namespace {
+
+const char kCollectUdevSignature[] = "crash_reporter-udev-collection";
+const char kGzipPath[] = "/bin/gzip";
+const char kUdevExecName[] = "udev";
+const char kUdevSignatureKey[] = "sig";
+const char kUdevSubsystemDevCoredump[] = "devcoredump";
+const char kDefaultDevCoredumpDirectory[] = "/sys/class/devcoredump";
+const char kDevCoredumpFilePrefixFormat[] = "devcoredump_%s";
+
+}  // namespace
+
+UdevCollector::UdevCollector()
+    : dev_coredump_directory_(kDefaultDevCoredumpDirectory) {}
 
 UdevCollector::~UdevCollector() {}
 
 bool UdevCollector::HandleCrash(const std::string &udev_event) {
-  if (!is_feedback_allowed_function_()) {
-    LOG(ERROR) << "No consent given to collect crash info.";
+  if (IsDeveloperImage()) {
+    LOG(INFO) << "developer image - collect udev crash info.";
+  } else if (is_feedback_allowed_function_()) {
+    LOG(INFO) << "Consent given - collect udev crash info.";
+  } else {
+    LOG(INFO) << "Ignoring - Non-developer image and no consent given.";
     return false;
   }
 
@@ -43,24 +58,43 @@
     udev_event_map[iter->first] = iter->second;
   }
 
-  // Construct the basename string for crash_reporter_logs.conf:
-  //   "crash_reporter-udev-collection-[action]-[name]-[subsystem]"
-  // If a udev field is not provided, "" is used in its place, e.g.:
-  //   "crash_reporter-udev-collection-[action]--[subsystem]"
-  // Hence, "" is used as a wildcard name string.
-  // TODO(sque, crosbug.com/32238): Implement wildcard checking.
-  std::string basename = udev_event_map["ACTION"] + "-" +
-                         udev_event_map["KERNEL"] + "-" +
-                         udev_event_map["SUBSYSTEM"];
-  std::string udev_log_name = std::string(kCollectUdevSignature) + '-' +
-                              basename;
-
   // Make sure the crash directory exists, or create it if it doesn't.
   FilePath crash_directory;
   if (!GetCreatedCrashDirectoryByEuid(0, &crash_directory, nullptr)) {
     LOG(ERROR) << "Could not get crash directory.";
     return false;
   }
+
+  if (udev_event_map["SUBSYSTEM"] == kUdevSubsystemDevCoredump) {
+    int instance_number;
+    if (!base::StringToInt(udev_event_map["KERNEL_NUMBER"], &instance_number)) {
+      LOG(ERROR) << "Invalid kernel number: "
+                 << udev_event_map["KERNEL_NUMBER"];
+      return false;
+    }
+    return ProcessDevCoredump(crash_directory, instance_number);
+  }
+
+  return ProcessUdevCrashLogs(crash_directory,
+                              udev_event_map["ACTION"],
+                              udev_event_map["KERNEL"],
+                              udev_event_map["SUBSYSTEM"]);
+}
+
+bool UdevCollector::ProcessUdevCrashLogs(const FilePath& crash_directory,
+                                         const std::string& action,
+                                         const std::string& kernel,
+                                         const std::string& subsystem) {
+  // Construct the basename string for crash_reporter_logs.conf:
+  //   "crash_reporter-udev-collection-[action]-[name]-[subsystem]"
+  // If a udev field is not provided, "" is used in its place, e.g.:
+  //   "crash_reporter-udev-collection-[action]--[subsystem]"
+  // Hence, "" is used as a wildcard name string.
+  // TODO(sque, crosbug.com/32238): Implement wildcard checking.
+  std::string basename = action + "-" + kernel + "-" + subsystem;
+  std::string udev_log_name = std::string(kCollectUdevSignature) + '-' +
+                              basename;
+
   // Create the destination path.
   std::string log_file_name =
       FormatDumpBasename(basename, time(nullptr), 0);
@@ -85,10 +119,114 @@
   else
     crash_path = crash_path_zipped;
 
-  std::string exec_name = std::string(kUdevExecName) + "-" +
-      udev_event_map["SUBSYSTEM"];
+  std::string exec_name = std::string(kUdevExecName) + "-" + subsystem;
   AddCrashMetaData(kUdevSignatureKey, udev_log_name);
   WriteCrashMetaData(GetCrashPath(crash_directory, log_file_name, "meta"),
                      exec_name, crash_path.value());
   return true;
 }
+
+bool UdevCollector::ProcessDevCoredump(const FilePath& crash_directory,
+                                       int instance_number) {
+  FilePath coredump_path =
+      FilePath(base::StringPrintf("%s/devcd%d/data",
+                                  dev_coredump_directory_.c_str(),
+                                  instance_number));
+  if (!base::PathExists(coredump_path)) {
+    LOG(ERROR) << "Device coredump file " << coredump_path.value()
+               << " does not exist";
+    return false;
+  }
+
+  // Add coredump file to the crash directory.
+  if (!AppendDevCoredump(crash_directory, coredump_path, instance_number)) {
+    ClearDevCoredump(coredump_path);
+    return false;
+  }
+
+  // Clear the coredump data to allow generation of future device coredumps
+  // without having to wait for the 5-minutes timeout.
+  return ClearDevCoredump(coredump_path);
+}
+
+bool UdevCollector::AppendDevCoredump(const FilePath& crash_directory,
+                                      const FilePath& coredump_path,
+                                      int instance_number) {
+  // Retrieve the driver name of the failing device.
+  std::string driver_name = GetFailingDeviceDriverName(instance_number);
+  if (driver_name.empty()) {
+    LOG(ERROR) << "Failed to obtain driver name for instance: "
+               << instance_number;
+    return false;
+  }
+
+  std::string coredump_prefix =
+      base::StringPrintf(kDevCoredumpFilePrefixFormat, driver_name.c_str());
+
+  std::string dump_basename = FormatDumpBasename(coredump_prefix,
+                                                 time(nullptr),
+                                                 instance_number);
+  FilePath core_path = GetCrashPath(crash_directory, dump_basename, "devcore");
+  FilePath log_path = GetCrashPath(crash_directory, dump_basename, "log");
+  FilePath meta_path = GetCrashPath(crash_directory, dump_basename, "meta");
+
+  // Collect coredump data.
+  if (!base::CopyFile(coredump_path, core_path)) {
+    LOG(ERROR) << "Failed to copy device coredumpm file from "
+               << coredump_path.value() << " to " << core_path.value();
+    return false;
+  }
+
+  // Collect additional logs if one is specified in the config file.
+  std::string udev_log_name = std::string(kCollectUdevSignature) + '-' +
+      kUdevSubsystemDevCoredump + '-' + driver_name;
+  bool result = GetLogContents(log_config_path_, udev_log_name, log_path);
+  if (result) {
+    AddCrashMetaUploadFile("logs", log_path.value());
+  }
+
+  WriteCrashMetaData(meta_path, coredump_prefix, core_path.value());
+
+  return true;
+}
+
+bool UdevCollector::ClearDevCoredump(const FilePath& coredump_path) {
+  if (!base::WriteFile(coredump_path, "0", 1)) {
+    LOG(ERROR) << "Failed to delete the coredump data file "
+               << coredump_path.value();
+    return false;
+  }
+  return true;
+}
+
+std::string UdevCollector::GetFailingDeviceDriverName(int instance_number) {
+  FilePath failing_uevent_path =
+      FilePath(base::StringPrintf("%s/devcd%d/failing_device/uevent",
+                                  dev_coredump_directory_.c_str(),
+                                  instance_number));
+  if (!base::PathExists(failing_uevent_path)) {
+    LOG(ERROR) << "Failing uevent path " << failing_uevent_path.value()
+               << " does not exist";
+    return "";
+  }
+
+  std::string uevent_content;
+  if (!base::ReadFileToString(failing_uevent_path, &uevent_content)) {
+    LOG(ERROR) << "Failed to read uevent file " << failing_uevent_path.value();
+    return "";
+  }
+
+  // Parse uevent file contents as key-value pairs.
+  std::vector<std::pair<std::string, std::string>> uevent_keyval;
+  base::SplitStringIntoKeyValuePairs(uevent_content, '=', '\n', &uevent_keyval);
+  std::vector<std::pair<std::string, std::string>>::const_iterator iter;
+  for (iter = uevent_keyval.begin();
+       iter != uevent_keyval.end();
+       ++iter) {
+    if (iter->first == "DRIVER") {
+      return iter->second;
+    }
+  }
+
+  return "";
+}
diff --git a/crash_reporter/udev_collector.h b/crash_reporter/udev_collector.h
index 68eb85a..1689dd3 100644
--- a/crash_reporter/udev_collector.h
+++ b/crash_reporter/udev_collector.h
@@ -26,9 +26,33 @@
   // could be omitted, in which case it would be treated as a wildcard (*).
   bool HandleCrash(const std::string& udev_event);
 
+ protected:
+  std::string dev_coredump_directory_;
+
  private:
   friend class UdevCollectorTest;
 
+  // Process udev crash logs, collecting log files according to the config
+  // file (crash_reporter_logs.conf).
+  bool ProcessUdevCrashLogs(const base::FilePath& crash_directory,
+                            const std::string& action,
+                            const std::string& kernel,
+                            const std::string& subsystem);
+  // Process device coredump, collecting device coredump file.
+  // |instance_number| is the kernel number of the virtual device for the device
+  // coredump instance.
+  bool ProcessDevCoredump(const base::FilePath& crash_directory,
+                          int instance_number);
+  // Copy device coredump file to crash directory, and perform necessary
+  // coredump file management.
+  bool AppendDevCoredump(const base::FilePath& crash_directory,
+                         const base::FilePath& coredump_path,
+                         int instance_number);
+  // Clear the device coredump file by performing a dummy write to it.
+  bool ClearDevCoredump(const base::FilePath& coredump_path);
+  // Return the driver name of the device that generates the coredump.
+  std::string GetFailingDeviceDriverName(int instance_number);
+
   // Mutator for unit testing.
   void set_log_config_path(const std::string& path) {
     log_config_path_ = base::FilePath(path);
diff --git a/crash_reporter/udev_collector_test.cc b/crash_reporter/udev_collector_test.cc
index 1705a6d..08d9b2c 100644
--- a/crash_reporter/udev_collector_test.cc
+++ b/crash_reporter/udev_collector_test.cc
@@ -5,6 +5,7 @@
 #include <base/files/file_enumerator.h>
 #include <base/files/file_util.h>
 #include <base/files/scoped_temp_dir.h>
+#include <base/strings/stringprintf.h>
 #include <chromeos/syslog_logging.h>
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -18,12 +19,25 @@
 // Dummy log config file name.
 const char kLogConfigFileName[] = "log_config_file";
 
+// Dummy directory for storing device coredumps.
+const char kDevCoredumpDirectory[] = "devcoredump";
+
 // A bunch of random rules to put into the dummy log config file.
 const char kLogConfigFileContents[] =
     "crash_reporter-udev-collection-change-card0-drm=echo change card0 drm\n"
     "crash_reporter-udev-collection-add-state0-cpu=echo change state0 cpu\n"
+    "crash_reporter-udev-collection-devcoredump-iwlwifi=echo devcoredump\n"
     "cros_installer=echo not for udev";
 
+const char kCrashLogFilePattern[] = "*.log.gz";
+const char kDevCoredumpFilePattern[] = "*.devcore";
+
+// Dummy content for device coredump data file.
+const char kDevCoredumpDataContents[] = "coredump";
+
+// Content for failing device's uevent file.
+const char kFailingDeviceUeventContents[] = "DRIVER=iwlwifi\n";
+
 void CountCrash() {}
 
 bool s_consent_given = true;
@@ -32,10 +46,11 @@
   return s_consent_given;
 }
 
-// Returns the number of compressed crash log files found in the given path.
-int GetNumLogFiles(const FilePath& path) {
+// Returns the number of files found in the given path that matches the
+// specified file name pattern.
+int GetNumFiles(const FilePath& path, const std::string& file_pattern) {
   base::FileEnumerator enumerator(path, false, base::FileEnumerator::FILES,
-                                  "*.log.gz");
+                                  file_pattern);
   int num_files = 0;
   for (FilePath file_path = enumerator.Next();
        !file_path.value().empty();
@@ -60,6 +75,35 @@
     collector_.HandleCrash(udev_event);
   }
 
+  void GenerateDevCoredump(const std::string& device_name) {
+    // Generate coredump data file.
+    ASSERT_TRUE(CreateDirectory(
+        FilePath(base::StringPrintf("%s/%s",
+                                    collector_.dev_coredump_directory_.c_str(),
+                                    device_name.c_str()))));
+    FilePath data_path =
+        FilePath(base::StringPrintf("%s/%s/data",
+                                    collector_.dev_coredump_directory_.c_str(),
+                                    device_name.c_str()));
+    ASSERT_EQ(strlen(kDevCoredumpDataContents),
+              base::WriteFile(data_path,
+                              kDevCoredumpDataContents,
+                              strlen(kDevCoredumpDataContents)));
+    // Generate uevent file for failing device.
+    ASSERT_TRUE(CreateDirectory(
+        FilePath(base::StringPrintf("%s/%s/failing_device",
+                                    collector_.dev_coredump_directory_.c_str(),
+                                    device_name.c_str()))));
+    FilePath uevent_path =
+        FilePath(base::StringPrintf("%s/%s/failing_device/uevent",
+                                    collector_.dev_coredump_directory_.c_str(),
+                                    device_name.c_str()));
+    ASSERT_EQ(strlen(kFailingDeviceUeventContents),
+              base::WriteFile(uevent_path,
+                              kFailingDeviceUeventContents,
+                              strlen(kFailingDeviceUeventContents)));
+  }
+
  private:
   void SetUp() override {
     s_consent_given = true;
@@ -75,6 +119,10 @@
     collector_.log_config_path_ = log_config_path;
     collector_.ForceCrashDirectory(temp_dir_generator_.path());
 
+    FilePath dev_coredump_path =
+        temp_dir_generator_.path().Append(kDevCoredumpDirectory);
+    collector_.dev_coredump_directory_ = dev_coredump_path.value();
+
     // Write to a dummy log config file.
     ASSERT_EQ(strlen(kLogConfigFileContents),
               base::WriteFile(log_config_path,
@@ -90,22 +138,33 @@
 TEST_F(UdevCollectorTest, TestNoConsent) {
   s_consent_given = false;
   HandleCrash("ACTION=change:KERNEL=card0:SUBSYSTEM=drm");
-  EXPECT_EQ(0, GetNumLogFiles(temp_dir_generator_.path()));
+  EXPECT_EQ(0, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern));
 }
 
 TEST_F(UdevCollectorTest, TestNoMatch) {
   // No rule should match this.
   HandleCrash("ACTION=change:KERNEL=foo:SUBSYSTEM=bar");
-  EXPECT_EQ(0, GetNumLogFiles(temp_dir_generator_.path()));
+  EXPECT_EQ(0, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern));
 }
 
 TEST_F(UdevCollectorTest, TestMatches) {
   // Try multiple udev events in sequence.  The number of log files generated
   // should increase.
   HandleCrash("ACTION=change:KERNEL=card0:SUBSYSTEM=drm");
-  EXPECT_EQ(1, GetNumLogFiles(temp_dir_generator_.path()));
+  EXPECT_EQ(1, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern));
   HandleCrash("ACTION=add:KERNEL=state0:SUBSYSTEM=cpu");
-  EXPECT_EQ(2, GetNumLogFiles(temp_dir_generator_.path()));
+  EXPECT_EQ(2, GetNumFiles(temp_dir_generator_.path(), kCrashLogFilePattern));
+}
+
+TEST_F(UdevCollectorTest, TestDevCoredump) {
+  GenerateDevCoredump("devcd0");
+  HandleCrash("ACTION=add:KERNEL_NUMBER=0:SUBSYSTEM=devcoredump");
+  EXPECT_EQ(1, GetNumFiles(temp_dir_generator_.path(),
+                           kDevCoredumpFilePattern));
+  GenerateDevCoredump("devcd1");
+  HandleCrash("ACTION=add:KERNEL_NUMBER=1:SUBSYSTEM=devcoredump");
+  EXPECT_EQ(2, GetNumFiles(temp_dir_generator_.path(),
+                           kDevCoredumpFilePattern));
 }
 
 // TODO(sque, crosbug.com/32238) - test wildcard cases, multiple identical udev