dumpstate: use tombstoned/debuggerd for java traces.

- debuggerd_dump_traces now supports Java traces as well, so
  use that when dalvik.vm.stack-trace-dir is set. The output
  FD we use for the intercept is currently a regular file
  written by mkostemp, but this can be cleaned up once the
  old way of doing things is removed.

- We're no longer writing traces to a global trace file, so
  add bug report entries for all java traces written in the past
  30 minutes just as we do for native traces.

The minor refactoring that this CL undertakes was done under the
assumption that the old way of doing things will be removed in the
near future.

Bug: 32064548
Test: manual

Change-Id: I8af6f0a644115296dc41affc3b7cd98a2db32c48
diff --git a/cmds/dumpstate/utils.cpp b/cmds/dumpstate/utils.cpp
index 638c868..5698d1b 100644
--- a/cmds/dumpstate/utils.cpp
+++ b/cmds/dumpstate/utils.cpp
@@ -38,6 +38,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
@@ -46,6 +47,7 @@
 #include <android-base/properties.h>
 #include <android-base/stringprintf.h>
 #include <android-base/strings.h>
+#include <android-base/unique_fd.h>
 #include <android/hidl/manager/1.0/IServiceManager.h>
 #include <cutils/properties.h>
 #include <cutils/sockets.h>
@@ -852,15 +854,143 @@
     return pids; // whether it was okay or not
 }
 
+const char* DumpTraces(const std::string& traces_path);
+const char* DumpTracesTombstoned(const std::string& traces_dir);
+
 /* dump Dalvik and native stack traces, return the trace file location (NULL if none) */
 const char *dump_traces() {
     DurationReporter duration_reporter("DUMP TRACES");
 
-    const char* result = nullptr;
+    const std::string traces_dir = android::base::GetProperty("dalvik.vm.stack-trace-dir", "");
+    if (!traces_dir.empty()) {
+        return DumpTracesTombstoned(traces_dir);
+    }
 
-    std::string traces_path = android::base::GetProperty("dalvik.vm.stack-trace-file", "");
-    if (traces_path.empty()) return nullptr;
+    const std::string traces_file = android::base::GetProperty("dalvik.vm.stack-trace-file", "");
+    if (!traces_file.empty()) {
+        return DumpTraces(traces_file);
+    }
 
+    return nullptr;
+}
+
+static bool IsZygote(int pid) {
+    static const std::string kZygotePrefix = "zygote";
+
+    std::string cmdline;
+    if (!android::base::ReadFileToString(android::base::StringPrintf("/proc/%d/cmdline", pid),
+                                         &cmdline)) {
+        return true;
+    }
+
+    return (cmdline.find(kZygotePrefix) == 0);
+}
+
+const char* DumpTracesTombstoned(const std::string& traces_dir) {
+    const std::string temp_file_pattern = traces_dir + "/dumptrace_XXXXXX";
+
+    const size_t buf_size = temp_file_pattern.length() + 1;
+    std::unique_ptr<char[]> file_name_buf(new char[buf_size]);
+    memcpy(file_name_buf.get(), temp_file_pattern.c_str(), buf_size);
+
+    // Create a new, empty file to receive all trace dumps.
+    //
+    // TODO: This can be simplified once we remove support for the old style
+    // dumps. We can have a file descriptor passed in to dump_traces instead
+    // of creating a file, closing it and then reopening it again.
+    android::base::unique_fd fd(mkostemp(file_name_buf.get(), O_APPEND | O_CLOEXEC));
+    if (fd < 0) {
+        MYLOGE("mkostemp on pattern %s: %s\n", file_name_buf.get(), strerror(errno));
+        return nullptr;
+    }
+
+    // Nobody should have access to this temporary file except dumpstate, but we
+    // temporarily grant 'read' to 'others' here because this file is created
+    // when tombstoned is still running as root, but dumped after dropping. This
+    // can go away once support for old style dumping has.
+    const int chmod_ret = fchmod(fd, 0666);
+    if (chmod_ret < 0) {
+        MYLOGE("fchmod on %s failed: %s\n", file_name_buf.get(), strerror(errno));
+        return nullptr;
+    }
+
+    std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
+    if (proc.get() == nullptr) {
+        MYLOGE("opendir /proc failed: %s\n", strerror(errno));
+        return nullptr;
+    }
+
+    // Number of times process dumping has timed out. If we encounter too many
+    // failures, we'll give up.
+    int timeout_failures = 0;
+    bool dalvik_found = false;
+
+    const std::set<int> hal_pids = get_interesting_hal_pids();
+
+    struct dirent* d;
+    while ((d = readdir(proc.get()))) {
+        int pid = atoi(d->d_name);
+        if (pid <= 0) {
+            continue;
+        }
+
+        const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
+        std::string exe;
+        if (!android::base::Readlink(link_name, &exe)) {
+            continue;
+        }
+
+        bool is_java_process;
+        if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
+            // Don't bother dumping backtraces for the zygote.
+            if (IsZygote(pid)) {
+                continue;
+            }
+
+            dalvik_found = true;
+            is_java_process = true;
+        } else if (should_dump_native_traces(exe.c_str()) || hal_pids.find(pid) != hal_pids.end()) {
+            is_java_process = false;
+        } else {
+            // Probably a native process we don't care about, continue.
+            continue;
+        }
+
+        // If 3 backtrace dumps fail in a row, consider debuggerd dead.
+        if (timeout_failures == 3) {
+            dprintf(fd, "ERROR: Too many stack dump failures, exiting.\n");
+            break;
+        }
+
+        const uint64_t start = Nanotime();
+        const int ret = dump_backtrace_to_file_timeout(
+            pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace,
+            is_java_process ? 5 : 20, fd);
+
+        if (ret == -1) {
+            dprintf(fd, "dumping failed, likely due to a timeout\n");
+            timeout_failures++;
+            continue;
+        }
+
+        // We've successfully dumped stack traces, reset the failure count
+        // and write a summary of the elapsed time to the file and continue with the
+        // next process.
+        timeout_failures = 0;
+
+        dprintf(fd, "[dump %s stack %d: %.3fs elapsed]\n", is_java_process ? "dalvik" : "native",
+                pid, (float)(Nanotime() - start) / NANOS_PER_SEC);
+    }
+
+    if (!dalvik_found) {
+        MYLOGE("Warning: no Dalvik processes found to dump stacks\n");
+    }
+
+    return file_name_buf.release();
+}
+
+const char* DumpTraces(const std::string& traces_path) {
+    const char* result = NULL;
     /* move the old traces.txt (if any) out of the way temporarily */
     std::string anrtraces_path = traces_path + ".anr";
     if (rename(traces_path.c_str(), anrtraces_path.c_str()) && errno != ENOENT) {