Handle Chrome crashes

Chrome will be modified to pass crashes to crash_reporter via a specially
formatted file, a TLV-style designed to be easily parsed, instead of
multipart/form-data. Extra attributes are added into the .meta file.

BUG=chromium:187808
TEST=new unit tests created

Change-Id: I0d65d118e0e348329d14bb4004c8f7bad5a44a97
Reviewed-on: https://gerrit.chromium.org/gerrit/49972
Reviewed-by: Chris Masone <cmasone@chromium.org>
Commit-Queue: Albert Chaulk <achaulk@chromium.org>
Tested-by: Albert Chaulk <achaulk@chromium.org>
diff --git a/crash_reporter/Makefile b/crash_reporter/Makefile
index 4a0f488..c784e6e 100644
--- a/crash_reporter/Makefile
+++ b/crash_reporter/Makefile
@@ -10,6 +10,7 @@
 PKG_CONFIG ?= pkg-config
 
 CRASH_OBJS = \
+	chrome_collector.o \
 	crash_collector.o \
 	kernel_collector.o \
 	kernel_warning_collector.o \
@@ -60,6 +61,7 @@
 # -- Unit Tests --
 
 # Uncomment these to just run specific test(s).
+#TEST_BINS += chrome_collector_test
 #TEST_BINS += crash_collector_test
 #TEST_BINS += kernel_collector_test
 #TEST_BINS += udev_collector_test
diff --git a/crash_reporter/chrome_collector.cc b/crash_reporter/chrome_collector.cc
new file mode 100644
index 0000000..20170bd
--- /dev/null
+++ b/crash_reporter/chrome_collector.cc
@@ -0,0 +1,175 @@
+// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "crash-reporter/chrome_collector.h"
+
+#include <pcrecpp.h>
+#include <string>
+#include <vector>
+
+#include <base/file_util.h>
+#include <base/logging.h>
+#include <base/string_number_conversions.h>
+#include <base/string_util.h>
+#include "chromeos/process.h"
+#include "chromeos/syslog_logging.h"
+
+const char kDefaultMinidumpName[] = "upload_file_minidump";
+
+namespace {
+
+// Extract a string delimited by the given character, from the given offset
+// into a source string. Returns false if the string is zero-sized or no
+// delimiter was found.
+bool GetDelimitedString(const std::string &str, char ch, size_t offset,
+                        std::string *substr) {
+  size_t at = str.find_first_of(ch, offset);
+  if (at == std::string::npos || at == offset)
+    return false;
+  *substr = str.substr(offset, at - offset);
+  return true;
+}
+
+} //namespace
+
+
+ChromeCollector::ChromeCollector() {}
+
+ChromeCollector::~ChromeCollector() {}
+
+bool ChromeCollector::HandleCrash(const std::string &file_path,
+                                  const std::string &pid_string,
+                                  const std::string &uid_string) {
+  if (!is_feedback_allowed_function_())
+    return true;
+
+
+  FilePath dir;
+  uid_t uid = atoi(uid_string.c_str());
+  pid_t pid = atoi(pid_string.c_str());
+  if (!GetCreatedCrashDirectoryByEuid(uid, &dir, NULL)) {
+    LOG(ERROR) << "Can't create crash directory for uid " << uid;
+    return false;
+  }
+
+  std::string exec;
+  if (!GetExecutableBaseNameFromPid(pid, &exec)) {
+    LOG(ERROR) << "Can't get executable name for pid " << pid;
+    return false;
+  }
+
+  std::string dump_basename = FormatDumpBasename(exec, time(NULL), pid);
+  FilePath meta_path = GetCrashPath(dir, dump_basename, "meta");
+  FilePath minidump_path = GetCrashPath(dir, dump_basename, "dmp");
+  FilePath log_path = GetCrashPath(dir, dump_basename, "log");
+
+  std::string data;
+  if (!file_util::ReadFileToString(FilePath(file_path), &data)) {
+    LOG(ERROR) << "Can't read crash log: " << file_path.c_str();
+    return false;
+  }
+
+  if (!ParseCrashLog(data, dir, minidump_path)) {
+    LOG(ERROR) << "Failed to parse Chrome's crash log";
+    return false;
+  }
+
+  if (GetLogContents(FilePath(log_config_path_), exec, log_path))
+    AddCrashMetaData("log", log_path.value());
+
+  // We're done.
+  WriteCrashMetaData(meta_path, exec, minidump_path.value());
+
+  return true;
+}
+
+bool ChromeCollector::ParseCrashLog(const std::string &data,
+                                    const FilePath &dir,
+                                    const FilePath &minidump) {
+  size_t at = 0;
+  while (at < data.size()) {
+    // Look for a : followed by a decimal number, followed by another :
+    // followed by N bytes of data.
+    std::string name, size_string;
+    if (!GetDelimitedString(data, ':', at, &name))
+      break;
+    at += name.size() + 1; // Skip the name & : delimiter.
+
+    if (!GetDelimitedString(data, ':', at, &size_string))
+      break;
+    at += size_string.size() + 1; // Skip the size & : delimiter.
+
+    size_t size;
+    if (!base::StringToSizeT(size_string, &size))
+      break;
+
+    // Data would run past the end, did we get a truncated file?
+    if (at + size > data.size())
+      break;
+
+    if (name.find("filename") != std::string::npos) {
+      // File.
+      // Name will be in a semi-MIME format of
+      // <descriptive name>"; filename="<name>"
+      // Descriptive name will be upload_file_minidump for the dump.
+      std::string desc, filename;
+      pcrecpp::RE re("(.*)\" *; *filename=\"(.*)\"");
+      if (!re.FullMatch(name.c_str(), &desc, &filename))
+        break;
+
+      if (filename.compare(kDefaultMinidumpName) == 0) {
+        // The minidump.
+        WriteNewFile(minidump, data.c_str() + at, size);
+      } else {
+        // Some other file.
+        FilePath path = GetCrashPath(dir, filename, "other");
+        if (WriteNewFile(path, data.c_str() + at, size) >= 0) {
+          std::string value = "@";
+          value.append(path.value());
+          AddCrashMetaData(desc, value);
+        }
+      }
+    } else {
+      // Other attribute.
+      std::string value_str;
+      value_str.reserve(size);
+
+      // Since metadata is one line/value the values must be escaped properly.
+      for (size_t i = at; i < at + size; i++) {
+        switch (data[i]) {
+          case '"':
+          case '\\':
+            value_str.push_back('\\');
+            value_str.push_back(data[i]);
+            break;
+
+          case '\r':
+            value_str += "\\r";
+            break;
+
+          case '\n':
+            value_str += "\\n";
+           break;
+
+          case '\t':
+            value_str += "\\t";
+           break;
+
+          case '\0':
+            value_str += "\\0";
+           break;
+
+          default:
+           value_str.push_back(data[i]);
+           break;
+        }
+      }
+      AddCrashMetaData(name, value_str);
+    }
+
+    at += size;
+  }
+
+  return at == data.size();
+}
diff --git a/crash_reporter/chrome_collector.h b/crash_reporter/chrome_collector.h
new file mode 100644
index 0000000..223bb00
--- /dev/null
+++ b/crash_reporter/chrome_collector.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef _CRASH_REPORTER_CHROME_COLLECTOR_H_
+#define _CRASH_REPORTER_CHROME_COLLECTOR_H_
+
+#include <string>
+
+#include "base/file_path.h"
+#include "crash-reporter/crash_collector.h"
+#include "gtest/gtest_prod.h"  // for FRIEND_TEST
+
+class SystemLogging;
+
+// Chrome crash collector.
+class ChromeCollector : public CrashCollector {
+ public:
+  ChromeCollector();
+  virtual ~ChromeCollector();
+
+  // Handle a specific chrome crash.  Returns true on success.
+  bool HandleCrash(const std::string &file_path, const std::string &pid_string,
+                   const std::string &uid_string);
+
+ private:
+  friend class ChromeCollectorTest;
+  FRIEND_TEST(ChromeCollectorTest, GoodValues);
+  FRIEND_TEST(ChromeCollectorTest, BadValues);
+  FRIEND_TEST(ChromeCollectorTest, Newlines);
+  FRIEND_TEST(ChromeCollectorTest, File);
+
+  // Crashes are expected to be in a TLV-style format of:
+  // <name>:<length>:<value>
+  // Length is encoded as a decimal number. It can be zero, but must consist of
+  // at least one character
+  // For file values, name actually contains both a description and a filename,
+  // in a fixed format of: <description>"; filename="<filename>"
+  bool ParseCrashLog(const std::string &data, const base::FilePath &dir,
+                     const base::FilePath &minidump);
+};
+
+#endif
diff --git a/crash_reporter/chrome_collector_test.cc b/crash_reporter/chrome_collector_test.cc
new file mode 100644
index 0000000..b24a194
--- /dev/null
+++ b/crash_reporter/chrome_collector_test.cc
@@ -0,0 +1,126 @@
+// Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <bits/wordsize.h>
+#include <elf.h>
+#include <unistd.h>
+
+#include "base/file_util.h"
+#include "base/files/scoped_temp_dir.h"
+#include "base/string_split.h"
+#include "chromeos/syslog_logging.h"
+#include "chromeos/test_helpers.h"
+#include "crash-reporter/chrome_collector.h"
+#include "gflags/gflags.h"
+#include "gtest/gtest.h"
+
+using base::FilePath;
+
+static const char kCrashFormatGood[] = "value1:10:abcdefghijvalue2:5:12345";
+static const char kCrashFormatEmbeddedNewline[] =
+    "value1:10:abcd\r\nghijvalue2:5:12\n34";
+static const char kCrashFormatBad1[] = "value1:10:abcdefghijvalue2:6=12345";
+static const char kCrashFormatBad2[] = "value1:10:abcdefghijvalue2:512345";
+static const char kCrashFormatBad3[] = "value1:10::abcdefghijvalue2:5=12345";
+static const char kCrashFormatBad4[] = "value1:10:abcdefghijvalue2:4=12345";
+
+static const char kCrashFormatWithFile[] =
+    "value1:10:abcdefghijvalue2:5:12345"
+    "some_file\"; filename=\"foo.txt\":15:12345\n789\n12345"
+    "value3:2:ok";
+
+void CountCrash() {
+  static int s_crashes = 0;
+  ++s_crashes;
+}
+
+bool IsMetrics() {
+  return false;
+}
+
+class ChromeCollectorTest : public ::testing::Test {
+  void SetUp() {
+    collector_.Initialize(CountCrash, IsMetrics);
+    pid_ = getpid();
+    chromeos::ClearLog();
+  }
+
+ protected:
+  void ExpectFileEquals(const char *golden,
+                        const char *file_path) {
+    std::string contents;
+    EXPECT_TRUE(file_util::ReadFileToString(FilePath(file_path),
+                                            &contents));
+    EXPECT_EQ(golden, contents);
+  }
+
+  std::vector<std::string> SplitLines(const std::string &lines) const {
+    std::vector<std::string> result;
+    base::SplitString(lines, '\n', &result);
+    return result;
+  }
+
+  ChromeCollector collector_;
+  pid_t pid_;
+};
+
+TEST_F(ChromeCollectorTest, GoodValues) {
+  FilePath dir(".");
+  EXPECT_TRUE(collector_.ParseCrashLog(kCrashFormatGood,
+                                       dir, dir.Append("minidump.dmp")));
+
+  // Check to see if the values made it in properly.
+  std::string meta = collector_.extra_metadata_;
+  EXPECT_TRUE(meta.find("value1=abcdefghij") != std::string::npos);
+  EXPECT_TRUE(meta.find("value2=12345") != std::string::npos);
+}
+
+TEST_F(ChromeCollectorTest, Newlines) {
+  FilePath dir(".");
+  EXPECT_TRUE(collector_.ParseCrashLog(kCrashFormatEmbeddedNewline,
+                                       dir, dir.Append("minidump.dmp")));
+
+  // Check to see if the values were escaped.
+  std::string meta = collector_.extra_metadata_;
+  EXPECT_TRUE(meta.find("value1=abcd\\r\\nghij") != std::string::npos);
+  EXPECT_TRUE(meta.find("value2=12\\n34") != std::string::npos);
+}
+
+TEST_F(ChromeCollectorTest, BadValues) {
+  FilePath dir(".");
+  const struct {
+    const char *data;
+  } list[] = {
+    {kCrashFormatBad1, },
+    {kCrashFormatBad2, },
+    {kCrashFormatBad3, },
+    {kCrashFormatBad4, },
+  };
+
+  for (size_t i = 0; i < sizeof(list) / sizeof(list[0]); i++) {
+    chromeos::ClearLog();
+    EXPECT_FALSE(collector_.ParseCrashLog(list[i].data,
+                                          dir, dir.Append("minidump.dmp")));
+  }
+}
+
+TEST_F(ChromeCollectorTest, File) {
+  FilePath dir(".");
+  EXPECT_TRUE(collector_.ParseCrashLog(kCrashFormatWithFile,
+                                       dir, dir.Append("minidump.dmp")));
+
+  // Check to see if the values are still correct and that the file was
+  // written with the right data.
+  std::string meta = collector_.extra_metadata_;
+  EXPECT_TRUE(meta.find("value1=abcdefghij") != std::string::npos);
+  EXPECT_TRUE(meta.find("value2=12345") != std::string::npos);
+  EXPECT_TRUE(meta.find("value3=ok") != std::string::npos);
+  ExpectFileEquals("12345\n789\n12345", "foo.txt.other");
+  file_util::Delete(dir.Append("foo.txt.other"), false);
+}
+
+int main(int argc, char **argv) {
+  SetUpTests(&argc, argv, false);
+  return RUN_ALL_TESTS();
+}
diff --git a/crash_reporter/crash_collector.cc b/crash_reporter/crash_collector.cc
index 990ef03..85c0933 100644
--- a/crash_reporter/crash_collector.cc
+++ b/crash_reporter/crash_collector.cc
@@ -226,6 +226,66 @@
   return true;
 }
 
+FilePath CrashCollector::GetProcessPath(pid_t pid) {
+  return FilePath(StringPrintf("/proc/%d", pid));
+}
+
+bool CrashCollector::GetSymlinkTarget(const FilePath &symlink,
+                                     FilePath *target) {
+  int max_size = 32;
+  scoped_array<char> buffer;
+  while (true) {
+    buffer.reset(new char[max_size + 1]);
+    ssize_t size = readlink(symlink.value().c_str(), buffer.get(), max_size);
+    if (size < 0) {
+      int saved_errno = errno;
+      LOG(ERROR) << "Readlink failed on " << symlink.value() << " with "
+                 << saved_errno;
+      return false;
+    }
+    buffer[size] = 0;
+    if (size == max_size) {
+      // Avoid overflow when doubling.
+      if (max_size * 2 > max_size) {
+        max_size *= 2;
+        continue;
+      } else {
+        return false;
+      }
+    }
+    break;
+  }
+
+  *target = FilePath(buffer.get());
+  return true;
+}
+
+bool CrashCollector::GetExecutableBaseNameFromPid(pid_t pid,
+                                                 std::string *base_name) {
+  FilePath target;
+  FilePath process_path = GetProcessPath(pid);
+  FilePath exe_path = process_path.Append("exe");
+  if (!GetSymlinkTarget(exe_path, &target)) {
+    LOG(INFO) << "GetSymlinkTarget failed - Path " << process_path.value()
+              << " DirectoryExists: "
+              << file_util::DirectoryExists(process_path);
+    // Try to further diagnose exe readlink failure cause.
+    struct stat buf;
+    int stat_result = stat(exe_path.value().c_str(), &buf);
+    int saved_errno = errno;
+    if (stat_result < 0) {
+      LOG(INFO) << "stat " << exe_path.value() << " failed: " << stat_result
+                << " " << saved_errno;
+    } else {
+      LOG(INFO) << "stat " << exe_path.value() << " succeeded: st_mode="
+                << buf.st_mode;
+    }
+    return false;
+  }
+  *base_name = target.BaseName().value();
+  return true;
+}
+
 // Return true if the given crash directory has not already reached
 // maximum capacity.
 bool CrashCollector::CheckHasCapacity(const FilePath &crash_directory) {
diff --git a/crash_reporter/crash_collector.h b/crash_reporter/crash_collector.h
index 3f0a27d..949552a 100644
--- a/crash_reporter/crash_collector.h
+++ b/crash_reporter/crash_collector.h
@@ -102,6 +102,12 @@
                               const std::string &basename,
                               const std::string &extension);
 
+  base::FilePath GetProcessPath(pid_t pid);
+  bool GetSymlinkTarget(const base::FilePath &symlink,
+                        base::FilePath *target);
+  bool GetExecutableBaseNameFromPid(pid_t pid,
+                                    std::string *base_name);
+
   // Check given crash directory still has remaining capacity for another
   // crash.
   bool CheckHasCapacity(const base::FilePath &crash_directory);
diff --git a/crash_reporter/crash_reporter.cc b/crash_reporter/crash_reporter.cc
index d298108..914f711 100644
--- a/crash_reporter/crash_reporter.cc
+++ b/crash_reporter/crash_reporter.cc
@@ -7,13 +7,14 @@
 #include <string>
 #include <vector>
 
-#include "base/file_util.h"
-#include "base/command_line.h"
-#include "base/logging.h"
-#include "base/string_split.h"
-#include "base/string_util.h"
-#include "base/stringprintf.h"
+#include <base/file_util.h>
+#include <base/command_line.h>
+#include <base/logging.h>
+#include <base/string_split.h>
+#include <base/string_util.h>
+#include <base/stringprintf.h>
 #include "chromeos/syslog_logging.h"
+#include "crash-reporter/chrome_collector.h"
 #include "crash-reporter/kernel_collector.h"
 #include "crash-reporter/kernel_warning_collector.h"
 #include "crash-reporter/udev_collector.h"
@@ -32,6 +33,9 @@
 DEFINE_bool(unclean_check, true, "Check for unclean shutdown");
 DEFINE_string(udev, "", "Udev event description (type:device:subsystem)");
 DEFINE_bool(kernel_warning, false, "Report collected kernel warning");
+DEFINE_string(chrome, "", "Chrome crash dump file");
+DEFINE_string(pid, "", "PID of crashing process");
+DEFINE_string(uid, "", "UID of crashing process");
 #pragma GCC diagnostic error "-Wstrict-aliasing"
 
 static const char kCrashCounterHistogram[] = "Logging.CrashCounter";
@@ -109,6 +113,13 @@
   LOG_IF(WARNING, status != 0) << "dbus-send running failed";
 }
 
+static void CountChromeCrash() {
+  // For now, consider chrome crashes the same as user crashes for reporting
+  // purposes.
+  CountUserCrash();
+}
+
+
 static int Initialize(KernelCollector *kernel_collector,
                       UserCollector *user_collector,
                       UncleanShutdownCollector *unclean_shutdown_collector) {
@@ -166,6 +177,20 @@
   return 0;
 }
 
+static int HandleChromeCrash(ChromeCollector *chrome_collector) {
+  CHECK(!FLAGS_chrome.empty()) << "--chrome= must be set";
+  CHECK(!FLAGS_pid.empty()) << "--pid= must be set";
+  CHECK(!FLAGS_uid.empty()) << "--uid= must be set";
+
+  chromeos::LogToString(true);
+  bool handled = chrome_collector->HandleCrash(FLAGS_chrome, FLAGS_pid,
+                                               FLAGS_uid);
+  chromeos::LogToString(false);
+  if (!handled)
+    return 1;
+  return 0;
+}
+
 static int HandleUdevCrash(UdevCollector *udev_collector) {
   // Handle a crash indicated by a udev event.
   CHECK(!FLAGS_udev.empty()) << "--udev= must be set";
@@ -251,6 +276,8 @@
                                         IsFeedbackAllowed);
   UdevCollector udev_collector;
   udev_collector.Initialize(CountUdevCrash, IsFeedbackAllowed);
+  ChromeCollector chrome_collector;
+  chrome_collector.Initialize(CountChromeCrash, IsFeedbackAllowed);
 
   KernelWarningCollector kernel_warning_collector;
   udev_collector.Initialize(CountUdevCrash, IsFeedbackAllowed);
@@ -279,5 +306,9 @@
     return HandleKernelWarning(&kernel_warning_collector);
   }
 
+  if (!FLAGS_chrome.empty()) {
+    return HandleChromeCrash(&chrome_collector);
+  }
+
   return HandleUserCrash(&user_collector);
 }
diff --git a/crash_reporter/user_collector.cc b/crash_reporter/user_collector.cc
index a89ea81..51be283 100644
--- a/crash_reporter/user_collector.cc
+++ b/crash_reporter/user_collector.cc
@@ -134,66 +134,6 @@
   return true;
 }
 
-FilePath UserCollector::GetProcessPath(pid_t pid) {
-  return FilePath(StringPrintf("/proc/%d", pid));
-}
-
-bool UserCollector::GetSymlinkTarget(const FilePath &symlink,
-                                     FilePath *target) {
-  int max_size = 32;
-  scoped_array<char> buffer;
-  while (true) {
-    buffer.reset(new char[max_size + 1]);
-    ssize_t size = readlink(symlink.value().c_str(), buffer.get(), max_size);
-    if (size < 0) {
-      int saved_errno = errno;
-      LOG(ERROR) << "Readlink failed on " << symlink.value() << " with "
-                 << saved_errno;
-      return false;
-    }
-    buffer[size] = 0;
-    if (size == max_size) {
-      // Avoid overflow when doubling.
-      if (max_size * 2 > max_size) {
-        max_size *= 2;
-        continue;
-      } else {
-        return false;
-      }
-    }
-    break;
-  }
-
-  *target = FilePath(buffer.get());
-  return true;
-}
-
-bool UserCollector::GetExecutableBaseNameFromPid(pid_t pid,
-                                                 std::string *base_name) {
-  FilePath target;
-  FilePath process_path = GetProcessPath(pid);
-  FilePath exe_path = process_path.Append("exe");
-  if (!GetSymlinkTarget(exe_path, &target)) {
-    LOG(INFO) << "GetSymlinkTarget failed - Path " << process_path.value()
-              << " DirectoryExists: "
-              << file_util::DirectoryExists(process_path);
-    // Try to further diagnose exe readlink failure cause.
-    struct stat buf;
-    int stat_result = stat(exe_path.value().c_str(), &buf);
-    int saved_errno = errno;
-    if (stat_result < 0) {
-      LOG(INFO) << "stat " << exe_path.value() << " failed: " << stat_result
-                << " " << saved_errno;
-    } else {
-      LOG(INFO) << "stat " << exe_path.value() << " succeeded: st_mode="
-                << buf.st_mode;
-    }
-    return false;
-  }
-  *base_name = target.BaseName().value();
-  return true;
-}
-
 bool UserCollector::GetFirstLineWithPrefix(
     const std::vector<std::string> &lines,
     const char *prefix, std::string *line) {
diff --git a/crash_reporter/user_collector.h b/crash_reporter/user_collector.h
index c7ac0de..237a990 100644
--- a/crash_reporter/user_collector.h
+++ b/crash_reporter/user_collector.h
@@ -101,11 +101,6 @@
   std::string GetPattern(bool enabled) const;
   bool SetUpInternal(bool enabled);
 
-  base::FilePath GetProcessPath(pid_t pid);
-  bool GetSymlinkTarget(const base::FilePath &symlink,
-                        base::FilePath *target);
-  bool GetExecutableBaseNameFromPid(pid_t pid,
-                                    std::string *base_name);
   // Returns, via |line|, the first line in |lines| that starts with |prefix|.
   // Returns true if a line is found, or false otherwise.
   bool GetFirstLineWithPrefix(const std::vector<std::string> &lines,