Dumping stack traces to proto.
Bug: 72177715
Test: flash device and check incident.proto output
Change-Id: Id2a15e0fc62b66efe875949af97f0eb651c7e322
(cherry picked from commit 5c804e2b9893c7892900148229cf19fa6268e7dc)
diff --git a/cmds/incidentd/Android.mk b/cmds/incidentd/Android.mk
index 3a47fe1..008a1bf 100644
--- a/cmds/incidentd/Android.mk
+++ b/cmds/incidentd/Android.mk
@@ -15,8 +15,10 @@
LOCAL_PATH:= $(call my-dir)
# proto files used in incidentd to generate cppstream proto headers.
-PROTO_FILES:= frameworks/base/core/proto/android/util/log.proto \
- frameworks/base/core/proto/android/os/data.proto
+PROTO_FILES:= \
+ frameworks/base/core/proto/android/os/backtrace.proto \
+ frameworks/base/core/proto/android/os/data.proto \
+ frameworks/base/core/proto/android/util/log.proto
# ========= #
# incidentd #
@@ -46,6 +48,8 @@
libbase \
libbinder \
libcutils \
+ libdebuggerd_client \
+ libdumputils \
libincident \
liblog \
libprotobuf-cpp-lite \
@@ -119,6 +123,8 @@
libbase \
libbinder \
libcutils \
+ libdebuggerd_client \
+ libdumputils \
libincident \
liblog \
libprotobuf-cpp-lite \
diff --git a/cmds/incidentd/incidentd.rc b/cmds/incidentd/incidentd.rc
index 6dd8114..9c16a1c 100644
--- a/cmds/incidentd/incidentd.rc
+++ b/cmds/incidentd/incidentd.rc
@@ -16,6 +16,7 @@
class main
user incidentd
group incidentd log readproc
+ capabilities KILL SYS_PTRACE
on post-fs-data
# Create directory for incidentd
diff --git a/cmds/incidentd/src/FdBuffer.cpp b/cmds/incidentd/src/FdBuffer.cpp
index 64da677..3570144 100644
--- a/cmds/incidentd/src/FdBuffer.cpp
+++ b/cmds/incidentd/src/FdBuffer.cpp
@@ -87,6 +87,35 @@
return NO_ERROR;
}
+status_t FdBuffer::readFully(int fd) {
+ mStartTime = uptimeMillis();
+
+ while (true) {
+ if (mBuffer.size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) {
+ // Don't let it get too big.
+ mTruncated = true;
+ VLOG("Truncating data");
+ break;
+ }
+ if (mBuffer.writeBuffer() == NULL) return NO_MEMORY;
+
+ ssize_t amt =
+ TEMP_FAILURE_RETRY(::read(fd, mBuffer.writeBuffer(), mBuffer.currentToWrite()));
+ if (amt < 0) {
+ VLOG("Fail to read %d: %s", fd, strerror(errno));
+ return -errno;
+ } else if (amt == 0) {
+ VLOG("Done reading %zu bytes", mBuffer.size());
+ // We're done.
+ break;
+ }
+ mBuffer.wp()->move(amt);
+ }
+
+ mFinishTime = uptimeMillis();
+ return NO_ERROR;
+}
+
status_t FdBuffer::readProcessedDataInStream(int fd, int toFd, int fromFd, int64_t timeoutMs,
const bool isSysfs) {
struct pollfd pfds[] = {
diff --git a/cmds/incidentd/src/FdBuffer.h b/cmds/incidentd/src/FdBuffer.h
index 66a3de1..34ebcf5 100644
--- a/cmds/incidentd/src/FdBuffer.h
+++ b/cmds/incidentd/src/FdBuffer.h
@@ -41,6 +41,12 @@
status_t read(int fd, int64_t timeoutMs);
/**
+ * Read the data until we hit eof.
+ * Returns NO_ERROR if there were no errors.
+ */
+ status_t readFully(int fd);
+
+ /**
* Read processed results by streaming data to a parsing process, e.g. incident helper.
* The parsing process provides IO fds which are 'toFd' and 'fromFd'. The function
* reads original data in 'fd' and writes to parsing process through 'toFd', then it reads
diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp
index 46243c05..6dd76a8 100644
--- a/cmds/incidentd/src/Section.cpp
+++ b/cmds/incidentd/src/Section.cpp
@@ -18,13 +18,19 @@
#include "Section.h"
+#include <dirent.h>
+#include <errno.h>
#include <wait.h>
#include <mutex>
+#include <set>
#include <android-base/file.h>
+#include <android-base/stringprintf.h>
#include <android/util/protobuf.h>
#include <binder/IServiceManager.h>
+#include <debuggerd/client.h>
+#include <dumputils/dump_utils.h>
#include <log/log_event_list.h>
#include <log/log_read.h>
#include <log/logprint.h>
@@ -33,6 +39,7 @@
#include "FdBuffer.h"
#include "Privacy.h"
#include "PrivacyBuffer.h"
+#include "frameworks/base/core/proto/android/os/backtrace.proto.h"
#include "frameworks/base/core/proto/android/os/data.proto.h"
#include "frameworks/base/core/proto/android/util/log.proto.h"
#include "incidentd_util.h"
@@ -95,6 +102,7 @@
return WriteFully(fd, buf, p - buf) ? NO_ERROR : -errno;
}
+// Reads data from FdBuffer and writes it to the requests file descriptor.
static status_t write_report_requests(const int id, const FdBuffer& buffer,
ReportRequestSet* requests) {
status_t err = -EBADF;
@@ -387,6 +395,7 @@
return NO_ERROR;
}
+
// ================================================================================
struct WorkerThreadData : public virtual RefBase {
const WorkerThreadSection* section;
@@ -413,7 +422,8 @@
WorkerThreadData::~WorkerThreadData() {}
// ================================================================================
-WorkerThreadSection::WorkerThreadSection(int id) : Section(id) {}
+WorkerThreadSection::WorkerThreadSection(int id, const int64_t timeoutMs)
+ : Section(id, timeoutMs) {}
WorkerThreadSection::~WorkerThreadSection() {}
@@ -594,7 +604,7 @@
return readStatus;
}
- // TODO: wait for command here has one trade-off: the failed status of command won't be detected
+ // Waiting for command here has one trade-off: the failed status of command won't be detected
// until buffer timeout, but it has advatage on starting the data stream earlier.
status_t cmdStatus = wait_child(cmdPid);
status_t ihStatus = wait_child(ihPid);
@@ -694,7 +704,6 @@
}
status_t LogSection::BlockingCall(int pipeWriteFd) const {
- status_t err = NO_ERROR;
// Open log buffer and getting logs since last retrieved time if any.
unique_ptr<logger_list, void (*)(logger_list*)> loggers(
gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end()
@@ -705,15 +714,16 @@
if (android_logger_open(loggers.get(), mLogID) == NULL) {
ALOGW("LogSection %s: Can't get logger.", this->name.string());
- return err;
+ return NO_ERROR;
}
log_msg msg;
log_time lastTimestamp(0);
+ status_t err = NO_ERROR;
ProtoOutputStream proto;
while (true) { // keeps reading until logd buffer is fully read.
- status_t err = android_logger_list_read(loggers.get(), &msg);
+ err = android_logger_list_read(loggers.get(), &msg);
// err = 0 - no content, unexpected connection drop or EOF.
// err = +ive number - size of retrieved data from logger
// err = -ive number, OS supplied error _except_ for -EAGAIN
@@ -814,3 +824,133 @@
proto.flush(pipeWriteFd);
return err;
}
+
+// ================================================================================
+
+TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs)
+ : WorkerThreadSection(id, timeoutMs), mType(type) {
+ name += "tombstone ";
+ name += type;
+}
+
+TombstoneSection::~TombstoneSection() {}
+
+status_t TombstoneSection::BlockingCall(int pipeWriteFd) const {
+ std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
+ if (proc.get() == nullptr) {
+ ALOGE("opendir /proc failed: %s\n", strerror(errno));
+ return -errno;
+ }
+
+ const std::set<int> hal_pids = get_interesting_hal_pids();
+
+ ProtoOutputStream proto;
+ struct dirent* d;
+ status_t err = NO_ERROR;
+ while ((d = readdir(proc.get()))) {
+ int pid = atoi(d->d_name);
+ if (pid <= 0) {
+ continue;
+ }
+
+ const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
+ std::string exe;
+ if (!android::base::Readlink(link_name, &exe)) {
+ ALOGE("Can't read '%s': %s\n", link_name.c_str(), strerror(errno));
+ continue;
+ }
+
+ bool is_java_process;
+ if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
+ if (mType != "java") continue;
+ // Don't bother dumping backtraces for the zygote.
+ if (IsZygote(pid)) {
+ VLOG("Skipping Zygote");
+ continue;
+ }
+
+ is_java_process = true;
+ } else if (should_dump_native_traces(exe.c_str())) {
+ if (mType != "native") continue;
+ is_java_process = false;
+ } else if (hal_pids.find(pid) != hal_pids.end()) {
+ if (mType != "hal") continue;
+ is_java_process = false;
+ } else {
+ // Probably a native process we don't care about, continue.
+ VLOG("Skipping %d", pid);
+ continue;
+ }
+
+ Fpipe dumpPipe;
+ if (!dumpPipe.init()) {
+ ALOGW("TombstoneSection '%s' failed to setup dump pipe", this->name.string());
+ err = -errno;
+ break;
+ }
+
+ const uint64_t start = Nanotime();
+ pid_t child = fork();
+ if (child < 0) {
+ ALOGE("Failed to fork child process");
+ break;
+ } else if (child == 0) {
+ // This is the child process.
+ close(dumpPipe.readFd());
+ const int ret = dump_backtrace_to_file_timeout(
+ pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace,
+ is_java_process ? 5 : 20, dumpPipe.writeFd());
+ if (ret == -1) {
+ if (errno == 0) {
+ ALOGW("Dumping failed for pid '%d', likely due to a timeout\n", pid);
+ } else {
+ ALOGE("Dumping failed for pid '%d': %s\n", pid, strerror(errno));
+ }
+ }
+ if (close(dumpPipe.writeFd()) != 0) {
+ ALOGW("TombstoneSection '%s' failed to close dump pipe writeFd: %d",
+ this->name.string(), errno);
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ close(dumpPipe.writeFd());
+ // Parent process.
+ // Read from the pipe concurrently to avoid blocking the child.
+ FdBuffer buffer;
+ err = buffer.readFully(dumpPipe.readFd());
+ if (err != NO_ERROR) {
+ ALOGW("TombstoneSection '%s' failed to read stack dump: %d", this->name.string(), err);
+ if (close(dumpPipe.readFd()) != 0) {
+ ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %s",
+ this->name.string(), strerror(errno));
+ }
+ break;
+ }
+
+ auto dump = std::make_unique<char[]>(buffer.size());
+ auto iterator = buffer.data();
+ int i = 0;
+ while (iterator.hasNext()) {
+ dump[i] = iterator.next();
+ i++;
+ }
+ long long token = proto.start(android::os::BackTraceProto::TRACES);
+ proto.write(android::os::BackTraceProto::Stack::PID, pid);
+ proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i);
+ proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS,
+ static_cast<long long>(Nanotime() - start));
+ proto.end(token);
+
+ if (close(dumpPipe.readFd()) != 0) {
+ ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %d", this->name.string(),
+ errno);
+ err = -errno;
+ break;
+ }
+ }
+
+ proto.flush(pipeWriteFd);
+ return err;
+}
diff --git a/cmds/incidentd/src/Section.h b/cmds/incidentd/src/Section.h
index 8294be1..19ef7ee 100644
--- a/cmds/incidentd/src/Section.h
+++ b/cmds/incidentd/src/Section.h
@@ -103,7 +103,7 @@
*/
class WorkerThreadSection : public Section {
public:
- WorkerThreadSection(int id);
+ WorkerThreadSection(int id, const int64_t timeoutMs = REMOTE_CALL_TIMEOUT_MS);
virtual ~WorkerThreadSection();
virtual status_t Execute(ReportRequestSet* requests) const;
@@ -161,4 +161,18 @@
bool mBinary;
};
+/**
+ * Section that gets data from tombstoned.
+ */
+class TombstoneSection : public WorkerThreadSection {
+public:
+ TombstoneSection(int id, const char* type, const int64_t timeoutMs = 30000 /* 30 seconds */);
+ virtual ~TombstoneSection();
+
+ virtual status_t BlockingCall(int pipeWriteFd) const;
+
+private:
+ std::string mType;
+};
+
#endif // SECTIONS_H
diff --git a/cmds/incidentd/src/incidentd_util.cpp b/cmds/incidentd/src/incidentd_util.cpp
index c095f2b..c869c7a 100644
--- a/cmds/incidentd/src/incidentd_util.cpp
+++ b/cmds/incidentd/src/incidentd_util.cpp
@@ -80,6 +80,7 @@
close(output->writeFd());
return pid;
}
+
// ================================================================================
const char** varargs(const char* first, va_list rest) {
va_list copied_rest;
@@ -101,3 +102,11 @@
ret[numOfArgs] = NULL;
return ret;
}
+
+// ================================================================================
+const uint64_t NANOS_PER_SEC = 1000000000;
+uint64_t Nanotime() {
+ timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return static_cast<uint64_t>(ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec);
+}
diff --git a/cmds/incidentd/src/incidentd_util.h b/cmds/incidentd/src/incidentd_util.h
index db7ec82..3f7df91 100644
--- a/cmds/incidentd/src/incidentd_util.h
+++ b/cmds/incidentd/src/incidentd_util.h
@@ -60,4 +60,9 @@
*/
const char** varargs(const char* first, va_list rest);
-#endif // INCIDENTD_UTIL_H
\ No newline at end of file
+/**
+ * Returns the current monotonic clock time in nanoseconds.
+ */
+uint64_t Nanotime();
+
+#endif // INCIDENTD_UTIL_H