Merge "debuggerd_fallback: fix race." am: cd2fa1f432 am: 135dacd4d1
am: 9606bfaa42

Change-Id: I5cb3ce094be188b466f90f6aa41085a5d77fda30
diff --git a/debuggerd/handler/debuggerd_fallback.cpp b/debuggerd/handler/debuggerd_fallback.cpp
index 5fddddc..364fca5 100644
--- a/debuggerd/handler/debuggerd_fallback.cpp
+++ b/debuggerd/handler/debuggerd_fallback.cpp
@@ -55,7 +55,7 @@
 using android::base::unique_fd;
 using unwindstack::Regs;
 
-extern "C" void __linker_enable_fallback_allocator();
+extern "C" bool __linker_enable_fallback_allocator();
 extern "C" void __linker_disable_fallback_allocator();
 
 // This is incredibly sketchy to do inside of a signal handler, especially when libbacktrace
@@ -65,7 +65,11 @@
 // This isn't the default method of dumping because it can fail in cases such as address space
 // exhaustion.
 static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
-  __linker_enable_fallback_allocator();
+  if (!__linker_enable_fallback_allocator()) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
+    return;
+  }
+
   {
     std::unique_ptr<Regs> regs;
 
@@ -84,7 +88,11 @@
 
 static void debuggerd_fallback_tombstone(int output_fd, ucontext_t* ucontext, siginfo_t* siginfo,
                                          void* abort_message) {
-  __linker_enable_fallback_allocator();
+  if (!__linker_enable_fallback_allocator()) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
+    return;
+  }
+
   engrave_tombstone_ucontext(output_fd, reinterpret_cast<uintptr_t>(abort_message), siginfo,
                              ucontext);
   __linker_disable_fallback_allocator();
@@ -116,7 +124,7 @@
   closedir(dir);
 }
 
-static bool forward_output(int src_fd, int dst_fd) {
+static bool forward_output(int src_fd, int dst_fd, pid_t expected_tid) {
   // Make sure the thread actually got the signal.
   struct pollfd pfd = {
     .fd = src_fd, .events = POLLIN,
@@ -127,6 +135,18 @@
     return false;
   }
 
+  pid_t tid;
+  if (TEMP_FAILURE_RETRY(read(src_fd, &tid, sizeof(tid))) != sizeof(tid)) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to read tid");
+    return false;
+  }
+
+  if (tid != expected_tid) {
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc", "received tid %d, expected %d", tid,
+                          expected_tid);
+    return false;
+  }
+
   while (true) {
     char buf[512];
     ssize_t rc = TEMP_FAILURE_RETRY(read(src_fd, buf, sizeof(buf)));
@@ -144,16 +164,54 @@
   }
 }
 
+struct __attribute__((__packed__)) packed_thread_output {
+  int32_t tid;
+  int32_t fd;
+};
+
+static uint64_t pack_thread_fd(pid_t tid, int fd) {
+  packed_thread_output packed = {.tid = tid, .fd = fd};
+  uint64_t result;
+  static_assert(sizeof(packed) == sizeof(result));
+  memcpy(&result, &packed, sizeof(packed));
+  return result;
+}
+
+static std::pair<pid_t, int> unpack_thread_fd(uint64_t value) {
+  packed_thread_output result;
+  memcpy(&result, &value, sizeof(value));
+  return std::make_pair(result.tid, result.fd);
+}
+
 static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
-  static std::atomic<int> trace_output_fd(-1);
+  static std::atomic<uint64_t> trace_output(pack_thread_fd(-1, -1));
 
   if (info->si_value.sival_int == ~0) {
     // Asked to dump by the original signal recipient.
-    debuggerd_fallback_trace(trace_output_fd, ucontext);
+    uint64_t val = trace_output.load();
+    auto [tid, fd] = unpack_thread_fd(val);
+    if (tid != gettid()) {
+      // We received some other thread's info request?
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                            "thread %d received output fd for thread %d?", gettid(), tid);
+      return;
+    }
 
-    int tmp = trace_output_fd.load();
-    trace_output_fd.store(-1);
-    close(tmp);
+    if (!trace_output.compare_exchange_strong(val, pack_thread_fd(-1, -1))) {
+      // Presumably, the timeout in forward_output expired, and the main thread moved on.
+      // If this happened, the main thread closed our fd for us, so just return.
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc", "cmpxchg for thread %d failed", gettid());
+      return;
+    }
+
+    // Write our tid to the output fd to let the main thread know that we're working.
+    if (TEMP_FAILURE_RETRY(write(fd, &tid, sizeof(tid))) == sizeof(tid)) {
+      debuggerd_fallback_trace(fd, ucontext);
+    } else {
+      async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to write to output fd");
+    }
+
+    close(fd);
     return;
   }
 
@@ -189,7 +247,14 @@
           return false;
         }
 
-        trace_output_fd.store(pipe_write.get());
+        uint64_t expected = pack_thread_fd(-1, -1);
+        if (!trace_output.compare_exchange_strong(expected,
+                                                  pack_thread_fd(tid, pipe_write.release()))) {
+          auto [tid, fd] = unpack_thread_fd(expected);
+          async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                                "thread %d is already outputting to fd %d?", tid, fd);
+          return false;
+        }
 
         siginfo_t siginfo = {};
         siginfo.si_code = SI_QUEUE;
@@ -203,12 +268,20 @@
           return false;
         }
 
-        bool success = forward_output(pipe_read.get(), output_fd);
-        if (success) {
-          // The signaled thread has closed trace_output_fd already.
-          (void)pipe_write.release();
-        } else {
-          trace_output_fd.store(-1);
+        bool success = forward_output(pipe_read.get(), output_fd, tid);
+        if (!success) {
+          async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                                "timeout expired while waiting for thread %d to dump", tid);
+        }
+
+        // Regardless of whether the poll succeeds, check to see if the thread took fd ownership.
+        uint64_t post_wait = trace_output.exchange(pack_thread_fd(-1, -1));
+        if (post_wait != pack_thread_fd(-1, -1)) {
+          auto [tid, fd] = unpack_thread_fd(post_wait);
+          if (fd != -1) {
+            async_safe_format_log(ANDROID_LOG_ERROR, "libc", "closing fd %d for thread %d", fd, tid);
+            close(fd);
+          }
         }
 
         return true;