Strip MAC addresses from dmesg sent w/ kcrash files.
BUG=chromium-os:13579
TEST=Ran the kernel_collector_test
rm -f /build/x86-mario/tmp/portage/chromeos-base/crash-reporter-9999/.tested; cros_workon_make --test --board=x86-mario crash-reporter
[ RUN ] KernelCollectorTest.StripSensitiveDataBasic
[ OK ] KernelCollectorTest.StripSensitiveDataBasic (1 ms)
[ RUN ] KernelCollectorTest.StripSensitiveDataBulk
[ OK ] KernelCollectorTest.StripSensitiveDataBulk (2 ms)
[ RUN ] KernelCollectorTest.StripSensitiveDataSample
[ OK ] KernelCollectorTest.StripSensitiveDataSample (0 ms)
Review URL: http://codereview.chromium.org/6777001
Change-Id: Ie2cd3d007f9ee2fb877d28280cfe62748c108bd2
diff --git a/crash_reporter/kernel_collector.cc b/crash_reporter/kernel_collector.cc
index 56fa301..529e35f 100644
--- a/crash_reporter/kernel_collector.cc
+++ b/crash_reporter/kernel_collector.cc
@@ -68,6 +68,81 @@
return true;
}
+void KernelCollector::StripSensitiveData(std::string *kernel_dump) {
+ // Strip any data that the user might not want sent up to the crash servers.
+ // We'll read in from kernel_dump and also place our output there.
+ //
+ // At the moment, the only sensitive data we strip is MAC addresses.
+
+ // Get rid of things that look like MAC addresses, since they could possibly
+ // give information about where someone has been. This is strings that look
+ // like this: 11:22:33:44:55:66
+ // Complications:
+ // - Within a given kernel_dump, want to be able to tell when the same MAC
+ // was used more than once. Thus, we'll consistently replace the first
+ // MAC found with 00:00:00:00:00:01, the second with ...:02, etc.
+ // - ACPI commands look like MAC addresses. We'll specifically avoid getting
+ // rid of those.
+ std::ostringstream result;
+ std::string pre_mac_str;
+ std::string mac_str;
+ std::map<std::string, std::string> mac_map;
+ pcrecpp::StringPiece input(*kernel_dump);
+
+ // This RE will find the next MAC address and can return us the data preceding
+ // the MAC and the MAC itself.
+ pcrecpp::RE mac_re("(.*?)("
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F]:"
+ "[0-9a-fA-F][0-9a-fA-F])",
+ pcrecpp::RE_Options()
+ .set_multiline(true)
+ .set_dotall(true));
+
+ // This RE will identify when the 'pre_mac_str' shows that the MAC address
+ // was really an ACPI cmd. The full string looks like this:
+ // ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES) filtered out
+ pcrecpp::RE acpi_re("ACPI cmd ef/$",
+ pcrecpp::RE_Options()
+ .set_multiline(true)
+ .set_dotall(true));
+
+ // Keep consuming, building up a result string as we go.
+ while (mac_re.Consume(&input, &pre_mac_str, &mac_str)) {
+ if (acpi_re.PartialMatch(pre_mac_str)) {
+ // We really saw an ACPI command; add to result w/ no stripping.
+ result << pre_mac_str << mac_str;
+ } else {
+ // Found a MAC address; look up in our hash for the mapping.
+ std::string replacement_mac = mac_map[mac_str];
+ if (replacement_mac == "") {
+ // It wasn't present, so build up a replacement string.
+ int mac_id = mac_map.size();
+
+ // Handle up to 2^32 unique MAC address; overkill, but doesn't hurt.
+ replacement_mac = StringPrintf("00:00:%02x:%02x:%02x:%02x",
+ (mac_id & 0xff000000) >> 24,
+ (mac_id & 0x00ff0000) >> 16,
+ (mac_id & 0x0000ff00) >> 8,
+ (mac_id & 0x000000ff));
+ mac_map[mac_str] = replacement_mac;
+ }
+
+ // Dump the string before the MAC and the fake MAC address into result.
+ result << pre_mac_str << replacement_mac;
+ }
+ }
+
+ // One last bit of data might still be in the input.
+ result << input;
+
+ // We'll just assign right back to kernel_dump.
+ *kernel_dump = result.str();
+}
+
bool KernelCollector::Enable() {
if (arch_ == archUnknown || arch_ >= archCount ||
s_pc_regex[arch_] == NULL) {
@@ -300,6 +375,7 @@
if (!LoadPreservedDump(&kernel_dump)) {
return false;
}
+ StripSensitiveData(&kernel_dump);
if (kernel_dump.empty()) {
return false;
}
diff --git a/crash_reporter/kernel_collector.h b/crash_reporter/kernel_collector.h
index 04f3bdd..1c86f40 100644
--- a/crash_reporter/kernel_collector.h
+++ b/crash_reporter/kernel_collector.h
@@ -58,10 +58,14 @@
friend class KernelCollectorTest;
FRIEND_TEST(KernelCollectorTest, ClearPreservedDump);
FRIEND_TEST(KernelCollectorTest, LoadPreservedDump);
+ FRIEND_TEST(KernelCollectorTest, StripSensitiveDataBasic);
+ FRIEND_TEST(KernelCollectorTest, StripSensitiveDataBulk);
+ FRIEND_TEST(KernelCollectorTest, StripSensitiveDataSample);
FRIEND_TEST(KernelCollectorTest, CollectOK);
bool LoadPreservedDump(std::string *contents);
bool ClearPreservedDump();
+ void StripSensitiveData(std::string *kernel_dump);
void ProcessStackTrace(pcrecpp::StringPiece kernel_dump,
bool print_diagnostics,
diff --git a/crash_reporter/kernel_collector_test.cc b/crash_reporter/kernel_collector_test.cc
index 08e3169..bfa2f04 100644
--- a/crash_reporter/kernel_collector_test.cc
+++ b/crash_reporter/kernel_collector_test.cc
@@ -100,6 +100,123 @@
ASSERT_EQ(KernelCollector::kClearingSequence, dump);
}
+TEST_F(KernelCollectorTest, StripSensitiveDataBasic) {
+ // Basic tests of StripSensitiveData...
+
+ // Make sure we work OK with a string w/ no MAC addresses.
+ const std::string kCrashWithNoMacsOrig =
+ "<7>[111566.131728] PM: Entering mem sleep\n";
+ std::string crash_with_no_macs(kCrashWithNoMacsOrig);
+ collector_.StripSensitiveData(&crash_with_no_macs);
+ EXPECT_EQ(kCrashWithNoMacsOrig, crash_with_no_macs);
+
+ // Make sure that we handle the case where there's nothing before/after the
+ // MAC address.
+ const std::string kJustAMacOrig =
+ "11:22:33:44:55:66";
+ const std::string kJustAMacStripped =
+ "00:00:00:00:00:01";
+ std::string just_a_mac(kJustAMacOrig);
+ collector_.StripSensitiveData(&just_a_mac);
+ EXPECT_EQ(kJustAMacStripped, just_a_mac);
+
+ // Test MAC addresses crammed together to make sure it gets both of them.
+ //
+ // I'm not sure that the code does ideal on these two test cases (they don't
+ // look like two MAC addresses to me), but since we don't see them I think
+ // it's OK to behave as shown here.
+ const std::string kCrammedMacs1Orig =
+ "11:22:33:44:55:66:11:22:33:44:55:66";
+ const std::string kCrammedMacs1Stripped =
+ "00:00:00:00:00:01:00:00:00:00:00:01";
+ std::string crammed_macs_1(kCrammedMacs1Orig);
+ collector_.StripSensitiveData(&crammed_macs_1);
+ EXPECT_EQ(kCrammedMacs1Stripped, crammed_macs_1);
+
+ const std::string kCrammedMacs2Orig =
+ "11:22:33:44:55:6611:22:33:44:55:66";
+ const std::string kCrammedMacs2Stripped =
+ "00:00:00:00:00:0100:00:00:00:00:01";
+ std::string crammed_macs_2(kCrammedMacs2Orig);
+ collector_.StripSensitiveData(&crammed_macs_2);
+ EXPECT_EQ(kCrammedMacs2Stripped, crammed_macs_2);
+
+ // Test case-sensitiveness (we shouldn't be case-senstive).
+ const std::string kCapsMacOrig =
+ "AA:BB:CC:DD:EE:FF";
+ const std::string kCapsMacStripped =
+ "00:00:00:00:00:01";
+ std::string caps_mac(kCapsMacOrig);
+ collector_.StripSensitiveData(&caps_mac);
+ EXPECT_EQ(kCapsMacStripped, caps_mac);
+
+ const std::string kLowerMacOrig =
+ "aa:bb:cc:dd:ee:ff";
+ const std::string kLowerMacStripped =
+ "00:00:00:00:00:01";
+ std::string lower_mac(kLowerMacOrig);
+ collector_.StripSensitiveData(&lower_mac);
+ EXPECT_EQ(kLowerMacStripped, lower_mac);
+}
+
+TEST_F(KernelCollectorTest, StripSensitiveDataBulk) {
+ // Test calling StripSensitiveData w/ lots of MAC addresses in the "log".
+
+ // Test that stripping code handles more than 256 unique MAC addresses, since
+ // that overflows past the last byte...
+ // We'll write up some code that generates 258 unique MAC addresses. Sorta
+ // cheating since the code is very similar to the current code in
+ // StripSensitiveData(), but would catch if someone changed that later.
+ std::string lotsa_macs_orig;
+ std::string lotsa_macs_stripped;
+ int i;
+ for (i = 0; i < 258; i++) {
+ lotsa_macs_orig += StringPrintf(" 11:11:11:11:%02X:%02x",
+ (i & 0xff00) >> 8, i & 0x00ff);
+ lotsa_macs_stripped += StringPrintf(" 00:00:00:00:%02X:%02x",
+ ((i+1) & 0xff00) >> 8, (i+1) & 0x00ff);
+ }
+ std::string lotsa_macs(lotsa_macs_orig);
+ collector_.StripSensitiveData(&lotsa_macs);
+ EXPECT_EQ(lotsa_macs_stripped, lotsa_macs);
+}
+
+TEST_F(KernelCollectorTest, StripSensitiveDataSample) {
+ // Test calling StripSensitiveData w/ some actual lines from a real crash;
+ // included two MAC addresses (though replaced them with some bogusness).
+ const std::string kCrashWithMacsOrig =
+ "<6>[111567.195339] ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES)"
+ " filtered out\n"
+ "<7>[108539.540144] wlan0: authenticate with 11:22:33:44:55:66 (try 1)\n"
+ "<7>[108539.554973] wlan0: associate with 11:22:33:44:55:66 (try 1)\n"
+ "<6>[110136.587583] usb0: register 'QCUSBNet2k' at usb-0000:00:1d.7-2,"
+ " QCUSBNet Ethernet Device, 99:88:77:66:55:44\n"
+ "<7>[110964.314648] wlan0: deauthenticated from 11:22:33:44:55:66"
+ " (Reason: 6)\n"
+ "<7>[110964.325057] phy0: Removed STA 11:22:33:44:55:66\n"
+ "<7>[110964.325115] phy0: Destroyed STA 11:22:33:44:55:66\n"
+ "<6>[110969.219172] usb0: register 'QCUSBNet2k' at usb-0000:00:1d.7-2,"
+ " QCUSBNet Ethernet Device, 99:88:77:66:55:44\n"
+ "<7>[111566.131728] PM: Entering mem sleep\n";
+ const std::string kCrashWithMacsStripped =
+ "<6>[111567.195339] ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES)"
+ " filtered out\n"
+ "<7>[108539.540144] wlan0: authenticate with 00:00:00:00:00:01 (try 1)\n"
+ "<7>[108539.554973] wlan0: associate with 00:00:00:00:00:01 (try 1)\n"
+ "<6>[110136.587583] usb0: register 'QCUSBNet2k' at usb-0000:00:1d.7-2,"
+ " QCUSBNet Ethernet Device, 00:00:00:00:00:02\n"
+ "<7>[110964.314648] wlan0: deauthenticated from 00:00:00:00:00:01"
+ " (Reason: 6)\n"
+ "<7>[110964.325057] phy0: Removed STA 00:00:00:00:00:01\n"
+ "<7>[110964.325115] phy0: Destroyed STA 00:00:00:00:00:01\n"
+ "<6>[110969.219172] usb0: register 'QCUSBNet2k' at usb-0000:00:1d.7-2,"
+ " QCUSBNet Ethernet Device, 00:00:00:00:00:02\n"
+ "<7>[111566.131728] PM: Entering mem sleep\n";
+ std::string crash_with_macs(kCrashWithMacsOrig);
+ collector_.StripSensitiveData(&crash_with_macs);
+ EXPECT_EQ(kCrashWithMacsStripped, crash_with_macs);
+}
+
TEST_F(KernelCollectorTest, CollectPreservedFileMissing) {
ASSERT_FALSE(collector_.Collect());
ASSERT_TRUE(FindLog("Unable to read test/kcrash"));