debuggerd_client: properly wait for completion.

Use an intermediate pipe to detect and report when a requested dump has
completed.

Bug: http://b/35241370
Bug: http://b/35813071
Test: debuggerd_test
Test: manually triggered a background ANR
Change-Id: If14aedf6071288360f1a7853d5a2ee79db121759
diff --git a/debuggerd/Android.bp b/debuggerd/Android.bp
index 2d6c7f5..024bc3d 100644
--- a/debuggerd/Android.bp
+++ b/debuggerd/Android.bp
@@ -161,6 +161,7 @@
     target: {
         android: {
             srcs: [
+                "client/debuggerd_client_test.cpp",
                 "debuggerd_test.cpp",
                 "util.cpp"
             ],
@@ -171,6 +172,7 @@
         "libbacktrace",
         "libbase",
         "libcutils",
+        "libdebuggerd_client",
     ],
 
     static_libs: [
diff --git a/debuggerd/client/debuggerd_client.cpp b/debuggerd/client/debuggerd_client.cpp
index f2975d1..b9fb512 100644
--- a/debuggerd/client/debuggerd_client.cpp
+++ b/debuggerd/client/debuggerd_client.cpp
@@ -19,12 +19,14 @@
 #include <fcntl.h>
 #include <signal.h>
 #include <stdlib.h>
+#include <sys/poll.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include <chrono>
 
+#include <android-base/file.h>
 #include <android-base/logging.h>
 #include <android-base/stringprintf.h>
 #include <android-base/unique_fd.h>
@@ -33,6 +35,8 @@
 #include <debuggerd/protocol.h>
 #include <debuggerd/util.h>
 
+using namespace std::chrono_literals;
+
 using android::base::unique_fd;
 
 static bool send_signal(pid_t pid, bool backtrace) {
@@ -45,28 +49,31 @@
   return true;
 }
 
+template <typename Duration>
+static void populate_timeval(struct timeval* tv, const Duration& duration) {
+  auto seconds = std::chrono::duration_cast<std::chrono::seconds>(duration);
+  auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(duration - seconds);
+  tv->tv_sec = static_cast<long>(seconds.count());
+  tv->tv_usec = static_cast<long>(microseconds.count());
+}
+
 bool debuggerd_trigger_dump(pid_t pid, unique_fd output_fd, DebuggerdDumpType dump_type,
                             int timeout_ms) {
   LOG(INFO) << "libdebuggerd_client: started dumping process " << pid;
   unique_fd sockfd;
   const auto end = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms);
-  auto set_timeout = [timeout_ms, &sockfd, &end]() {
+  auto time_left = [timeout_ms, &end]() { return end - std::chrono::steady_clock::now(); };
+  auto set_timeout = [timeout_ms, &time_left](int sockfd) {
     if (timeout_ms <= 0) {
       return true;
     }
 
-    auto now = std::chrono::steady_clock::now();
-    if (now > end) {
+    auto remaining = time_left();
+    if (remaining < decltype(remaining)::zero()) {
       return false;
     }
-
-    auto time_left = std::chrono::duration_cast<std::chrono::microseconds>(end - now);
-    auto seconds = std::chrono::duration_cast<std::chrono::seconds>(time_left);
-    auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(time_left - seconds);
-    struct timeval timeout = {
-      .tv_sec = static_cast<long>(seconds.count()),
-      .tv_usec = static_cast<long>(microseconds.count()),
-    };
+    struct timeval timeout;
+    populate_timeval(&timeout, remaining);
 
     if (setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) != 0) {
       return false;
@@ -84,7 +91,7 @@
     return false;
   }
 
-  if (!set_timeout()) {
+  if (!set_timeout(sockfd)) {
     PLOG(ERROR) << "libdebugger_client: failed to set timeout";
     return false;
   }
@@ -96,11 +103,19 @@
   }
 
   InterceptRequest req = {.pid = pid };
-  if (!set_timeout()) {
+  if (!set_timeout(sockfd)) {
     PLOG(ERROR) << "libdebugger_client: failed to set timeout";
+    return false;
   }
 
-  if (send_fd(sockfd.get(), &req, sizeof(req), std::move(output_fd)) != sizeof(req)) {
+  // Create an intermediate pipe to pass to the other end.
+  unique_fd pipe_read, pipe_write;
+  if (!Pipe(&pipe_read, &pipe_write)) {
+    PLOG(ERROR) << "libdebuggerd_client: failed to create pipe";
+    return false;
+  }
+
+  if (send_fd(sockfd.get(), &req, sizeof(req), std::move(pipe_write)) != sizeof(req)) {
     PLOG(ERROR) << "libdebuggerd_client: failed to send output fd to tombstoned";
     return false;
   }
@@ -108,8 +123,9 @@
   bool backtrace = dump_type == kDebuggerdBacktrace;
   send_signal(pid, backtrace);
 
-  if (!set_timeout()) {
-    PLOG(ERROR) << "libdebugger_client: failed to set timeout";
+  if (!set_timeout(sockfd)) {
+    PLOG(ERROR) << "libdebuggerd_client: failed to set timeout";
+    return false;
   }
 
   InterceptResponse response;
@@ -127,6 +143,48 @@
   if (response.success != 1) {
     response.error_message[sizeof(response.error_message) - 1] = '\0';
     LOG(ERROR) << "libdebuggerd_client: tombstoned reported failure: " << response.error_message;
+    return false;
+  }
+
+  // Forward output from the pipe to the output fd.
+  while (true) {
+    auto remaining_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_left());
+    if (remaining_ms <= 1ms) {
+      LOG(ERROR) << "libdebuggerd_client: timeout expired";
+      return false;
+    }
+
+    struct pollfd pfd = {
+        .fd = pipe_read.get(), .events = POLLIN, .revents = 0,
+    };
+
+    rc = poll(&pfd, 1, remaining_ms.count());
+    if (rc == -1) {
+      if (errno == EINTR) {
+        continue;
+      } else {
+        PLOG(ERROR) << "libdebuggerd_client: error while polling";
+        return false;
+      }
+    } else if (rc == 0) {
+      LOG(ERROR) << "libdebuggerd_client: timeout expired";
+      return false;
+    }
+
+    char buf[1024];
+    rc = TEMP_FAILURE_RETRY(read(pipe_read.get(), buf, sizeof(buf)));
+    if (rc == 0) {
+      // Done.
+      break;
+    } else if (rc == -1) {
+      PLOG(ERROR) << "libdebuggerd_client: error while reading";
+      return false;
+    }
+
+    if (!android::base::WriteFully(output_fd.get(), buf, rc)) {
+      PLOG(ERROR) << "libdebuggerd_client: error while writing";
+      return false;
+    }
   }
 
   LOG(INFO) << "libdebuggerd_client: done dumping process " << pid;
diff --git a/debuggerd/client/debuggerd_client_test.cpp b/debuggerd/client/debuggerd_client_test.cpp
new file mode 100644
index 0000000..86d0314
--- /dev/null
+++ b/debuggerd/client/debuggerd_client_test.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2017, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <debuggerd/client.h>
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <android-base/file.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+#include <android-base/unique_fd.h>
+
+#include <debuggerd/util.h>
+
+using namespace std::chrono_literals;
+using android::base::unique_fd;
+
+TEST(debuggerd_client, race) {
+  static constexpr int THREAD_COUNT = 1024;
+  pid_t forkpid = fork();
+
+  ASSERT_NE(-1, forkpid);
+
+  if (forkpid == 0) {
+    // Spawn a bunch of threads, to make crash_dump take longer.
+    std::vector<std::thread> threads;
+    for (int i = 0; i < THREAD_COUNT; ++i) {
+      threads.emplace_back([]() {
+        while (true) {
+          std::this_thread::sleep_for(60s);
+        }
+      });
+    }
+
+    std::this_thread::sleep_for(60s);
+    exit(0);
+  }
+
+  unique_fd pipe_read, pipe_write;
+  ASSERT_TRUE(Pipe(&pipe_read, &pipe_write));
+
+  // 64 kB should be enough for everyone.
+  constexpr int PIPE_SIZE = 64 * 1024 * 1024;
+  ASSERT_EQ(PIPE_SIZE, fcntl(pipe_read.get(), F_SETPIPE_SZ, PIPE_SIZE));
+
+  // Wait for a bit to let the child spawn all of its threads.
+  std::this_thread::sleep_for(250ms);
+
+  ASSERT_TRUE(debuggerd_trigger_dump(forkpid, std::move(pipe_write), kDebuggerdBacktrace, 10000));
+  // Immediately kill the forked child, to make sure that the dump didn't return early.
+  ASSERT_EQ(0, kill(forkpid, SIGKILL)) << strerror(errno);
+
+  // Check the output.
+  std::string result;
+  ASSERT_TRUE(android::base::ReadFdToString(pipe_read.get(), &result));
+
+  // Look for "----- end <PID> -----"
+  int found_end = 0;
+
+  std::string expected_end = android::base::StringPrintf("----- end %d -----", forkpid);
+
+  std::vector<std::string> lines = android::base::Split(result, "\n");
+  for (const std::string& line : lines) {
+    if (line == expected_end) {
+      ++found_end;
+    }
+  }
+
+  EXPECT_EQ(1, found_end) << "\nOutput: \n" << result;
+}