Dumping stack traces to proto.
Bug: 72177715
Test: flash device and check incident.proto output
Change-Id: Id2a15e0fc62b66efe875949af97f0eb651c7e322
(cherry picked from commit 5c804e2b9893c7892900148229cf19fa6268e7dc)
diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp
index 46243c05..6dd76a8 100644
--- a/cmds/incidentd/src/Section.cpp
+++ b/cmds/incidentd/src/Section.cpp
@@ -18,13 +18,19 @@
#include "Section.h"
+#include <dirent.h>
+#include <errno.h>
#include <wait.h>
#include <mutex>
+#include <set>
#include <android-base/file.h>
+#include <android-base/stringprintf.h>
#include <android/util/protobuf.h>
#include <binder/IServiceManager.h>
+#include <debuggerd/client.h>
+#include <dumputils/dump_utils.h>
#include <log/log_event_list.h>
#include <log/log_read.h>
#include <log/logprint.h>
@@ -33,6 +39,7 @@
#include "FdBuffer.h"
#include "Privacy.h"
#include "PrivacyBuffer.h"
+#include "frameworks/base/core/proto/android/os/backtrace.proto.h"
#include "frameworks/base/core/proto/android/os/data.proto.h"
#include "frameworks/base/core/proto/android/util/log.proto.h"
#include "incidentd_util.h"
@@ -95,6 +102,7 @@
return WriteFully(fd, buf, p - buf) ? NO_ERROR : -errno;
}
+// Reads data from FdBuffer and writes it to the requests file descriptor.
static status_t write_report_requests(const int id, const FdBuffer& buffer,
ReportRequestSet* requests) {
status_t err = -EBADF;
@@ -387,6 +395,7 @@
return NO_ERROR;
}
+
// ================================================================================
struct WorkerThreadData : public virtual RefBase {
const WorkerThreadSection* section;
@@ -413,7 +422,8 @@
WorkerThreadData::~WorkerThreadData() {}
// ================================================================================
-WorkerThreadSection::WorkerThreadSection(int id) : Section(id) {}
+WorkerThreadSection::WorkerThreadSection(int id, const int64_t timeoutMs)
+ : Section(id, timeoutMs) {}
WorkerThreadSection::~WorkerThreadSection() {}
@@ -594,7 +604,7 @@
return readStatus;
}
- // TODO: wait for command here has one trade-off: the failed status of command won't be detected
+ // Waiting for command here has one trade-off: the failed status of command won't be detected
// until buffer timeout, but it has advatage on starting the data stream earlier.
status_t cmdStatus = wait_child(cmdPid);
status_t ihStatus = wait_child(ihPid);
@@ -694,7 +704,6 @@
}
status_t LogSection::BlockingCall(int pipeWriteFd) const {
- status_t err = NO_ERROR;
// Open log buffer and getting logs since last retrieved time if any.
unique_ptr<logger_list, void (*)(logger_list*)> loggers(
gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end()
@@ -705,15 +714,16 @@
if (android_logger_open(loggers.get(), mLogID) == NULL) {
ALOGW("LogSection %s: Can't get logger.", this->name.string());
- return err;
+ return NO_ERROR;
}
log_msg msg;
log_time lastTimestamp(0);
+ status_t err = NO_ERROR;
ProtoOutputStream proto;
while (true) { // keeps reading until logd buffer is fully read.
- status_t err = android_logger_list_read(loggers.get(), &msg);
+ err = android_logger_list_read(loggers.get(), &msg);
// err = 0 - no content, unexpected connection drop or EOF.
// err = +ive number - size of retrieved data from logger
// err = -ive number, OS supplied error _except_ for -EAGAIN
@@ -814,3 +824,133 @@
proto.flush(pipeWriteFd);
return err;
}
+
+// ================================================================================
+
+TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs)
+ : WorkerThreadSection(id, timeoutMs), mType(type) {
+ name += "tombstone ";
+ name += type;
+}
+
+TombstoneSection::~TombstoneSection() {}
+
+status_t TombstoneSection::BlockingCall(int pipeWriteFd) const {
+ std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
+ if (proc.get() == nullptr) {
+ ALOGE("opendir /proc failed: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ const std::set<int> hal_pids = get_interesting_hal_pids();
+
+ ProtoOutputStream proto;
+ struct dirent* d;
+ status_t err = NO_ERROR;
+ while ((d = readdir(proc.get()))) {
+ int pid = atoi(d->d_name);
+ if (pid <= 0) {
+ continue;
+ }
+
+ const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
+ std::string exe;
+ if (!android::base::Readlink(link_name, &exe)) {
+ ALOGE("Can't read '%s': %s\n", link_name.c_str(), strerror(errno));
+ continue;
+ }
+
+ bool is_java_process;
+ if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
+ if (mType != "java") continue;
+ // Don't bother dumping backtraces for the zygote.
+ if (IsZygote(pid)) {
+ VLOG("Skipping Zygote");
+ continue;
+ }
+
+ is_java_process = true;
+ } else if (should_dump_native_traces(exe.c_str())) {
+ if (mType != "native") continue;
+ is_java_process = false;
+ } else if (hal_pids.find(pid) != hal_pids.end()) {
+ if (mType != "hal") continue;
+ is_java_process = false;
+ } else {
+ // Probably a native process we don't care about, continue.
+ VLOG("Skipping %d", pid);
+ continue;
+ }
+
+ Fpipe dumpPipe;
+ if (!dumpPipe.init()) {
+ ALOGW("TombstoneSection '%s' failed to setup dump pipe", this->name.string());
+ err = -errno;
+ break;
+ }
+
+ const uint64_t start = Nanotime();
+ pid_t child = fork();
+ if (child < 0) {
+ ALOGE("Failed to fork child process");
+ break;
+ } else if (child == 0) {
+ // This is the child process.
+ close(dumpPipe.readFd());
+ const int ret = dump_backtrace_to_file_timeout(
+ pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace,
+ is_java_process ? 5 : 20, dumpPipe.writeFd());
+ if (ret == -1) {
+ if (errno == 0) {
+ ALOGW("Dumping failed for pid '%d', likely due to a timeout\n", pid);
+ } else {
+ ALOGE("Dumping failed for pid '%d': %s\n", pid, strerror(errno));
+ }
+ }
+ if (close(dumpPipe.writeFd()) != 0) {
+ ALOGW("TombstoneSection '%s' failed to close dump pipe writeFd: %d",
+ this->name.string(), errno);
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ close(dumpPipe.writeFd());
+ // Parent process.
+ // Read from the pipe concurrently to avoid blocking the child.
+ FdBuffer buffer;
+ err = buffer.readFully(dumpPipe.readFd());
+ if (err != NO_ERROR) {
+ ALOGW("TombstoneSection '%s' failed to read stack dump: %d", this->name.string(), err);
+ if (close(dumpPipe.readFd()) != 0) {
+ ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %s",
+ this->name.string(), strerror(errno));
+ }
+ break;
+ }
+
+ auto dump = std::make_unique<char[]>(buffer.size());
+ auto iterator = buffer.data();
+ int i = 0;
+ while (iterator.hasNext()) {
+ dump[i] = iterator.next();
+ i++;
+ }
+ long long token = proto.start(android::os::BackTraceProto::TRACES);
+ proto.write(android::os::BackTraceProto::Stack::PID, pid);
+ proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i);
+ proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS,
+ static_cast<long long>(Nanotime() - start));
+ proto.end(token);
+
+ if (close(dumpPipe.readFd()) != 0) {
+ ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %d", this->name.string(),
+ errno);
+ err = -errno;
+ break;
+ }
+ }
+
+ proto.flush(pipeWriteFd);
+ return err;
+}