Merge "Switch kernel header parsing to python libclang"
diff --git a/libc/Android.mk b/libc/Android.mk
index fe7b116..75bb616 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -516,6 +516,23 @@
 libc_arch_static_src_files := \
     bionic/dl_iterate_phdr_static.cpp \
 
+# Various kinds of LP32 cruft.
+# ========================================================
+libc_bionic_src_files_32 += \
+    bionic/mmap.cpp \
+
+libc_common_src_files_32 += \
+    bionic/legacy_32_bit_support.cpp \
+    bionic/ndk_cruft.cpp \
+    bionic/time64.c \
+
+libc_netbsd_src_files_32 += \
+    upstream-netbsd/common/lib/libc/hash/sha1/sha1.c \
+
+libc_openbsd_src_files_32 += \
+    upstream-openbsd/lib/libc/stdio/putw.c \
+
+
 # Define some common cflags
 # ========================================================
 libc_common_cflags := \
@@ -572,12 +589,13 @@
     $(LOCAL_PATH)/stdio   \
 
 # ========================================================
-# Add in the arch-specific flags.
+# Add in the arch or 32-bit specific flags
 # Must be called with $(eval).
 # $(1): the LOCAL_ variable name
 # $(2): the bionic variable name to pull in
 define patch-up-arch-specific-flags
 $(1)_$(TARGET_ARCH) += $($(2)_$(TARGET_ARCH))
+$(1)_32 += $($(2)_32)
 ifdef TARGET_2ND_ARCH
 $(1)_$(TARGET_2ND_ARCH) += $($(2)_$(TARGET_2ND_ARCH))
 endif
@@ -676,15 +694,16 @@
     -DINET6 \
     -fvisibility=hidden \
     -Wno-unused-parameter \
-    -I$(LOCAL_PATH)/dns/include \
-    -I$(LOCAL_PATH)/private \
-    -I$(LOCAL_PATH)/upstream-netbsd/lib/libc/include \
-    -I$(LOCAL_PATH)/upstream-netbsd/android/include \
     -include netbsd-compat.h \
 
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
-LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_C_INCLUDES := $(libc_common_c_includes) \
+    $(LOCAL_PATH)/dns/include \
+    $(LOCAL_PATH)/private \
+    $(LOCAL_PATH)/upstream-netbsd/lib/libc/include \
+    $(LOCAL_PATH)/upstream-netbsd/android/include \
+
 LOCAL_MODULE := libc_dns
 LOCAL_CLANG := $(use_clang)
 LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
@@ -710,13 +729,14 @@
 LOCAL_CFLAGS := \
     $(libc_common_cflags) \
     -Wno-sign-compare -Wno-uninitialized \
-    -I$(LOCAL_PATH)/upstream-freebsd/android/include \
-    -I$(LOCAL_PATH)/upstream-freebsd/lib/libc/include \
     -include freebsd-compat.h \
 
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
-LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_C_INCLUDES := $(libc_common_c_includes) \
+    $(LOCAL_PATH)/upstream-freebsd/android/include \
+    $(LOCAL_PATH)/upstream-freebsd/lib/libc/include \
+
 LOCAL_MODULE := libc_freebsd
 LOCAL_CLANG := $(use_clang)
 LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
@@ -744,13 +764,14 @@
     $(libc_common_cflags) \
     -Wno-sign-compare -Wno-uninitialized \
     -DPOSIX_MISTAKE \
-    -I$(LOCAL_PATH)/upstream-netbsd/android/include \
-    -I$(LOCAL_PATH)/upstream-netbsd/lib/libc/include \
     -include netbsd-compat.h \
 
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
-LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_C_INCLUDES := $(libc_common_c_includes) \
+    $(LOCAL_PATH)/upstream-netbsd/android/include \
+    $(LOCAL_PATH)/upstream-netbsd/lib/libc/include \
+
 LOCAL_MODULE := libc_netbsd
 LOCAL_CLANG := $(use_clang)
 LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
@@ -786,15 +807,16 @@
     -Wno-sign-compare \
     -Wno-uninitialized \
     -Wno-unused-parameter \
-    -I$(LOCAL_PATH)/private \
-    -I$(LOCAL_PATH)/upstream-openbsd/android/include \
-    -I$(LOCAL_PATH)/upstream-openbsd/lib/libc/include \
-    -I$(LOCAL_PATH)/upstream-openbsd/lib/libc/gdtoa/ \
     -include openbsd-compat.h \
 
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
-LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_C_INCLUDES := $(libc_common_c_includes) \
+    $(LOCAL_PATH)/private \
+    $(LOCAL_PATH)/upstream-openbsd/android/include \
+    $(LOCAL_PATH)/upstream-openbsd/lib/libc/include \
+    $(LOCAL_PATH)/upstream-openbsd/lib/libc/gdtoa/ \
+
 LOCAL_MODULE := libc_openbsd
 LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
 LOCAL_CXX_STL := none
@@ -829,14 +851,15 @@
     $(libc_common_cflags) \
     -Wno-sign-compare -Wno-uninitialized \
     -fvisibility=hidden \
-    -I$(LOCAL_PATH)/private \
-    -I$(LOCAL_PATH)/upstream-openbsd/android/include \
-    -I$(LOCAL_PATH)/upstream-openbsd/lib/libc/include \
     -include openbsd-compat.h \
 
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
-LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_C_INCLUDES := $(libc_common_c_includes) \
+    $(LOCAL_PATH)/private \
+    $(LOCAL_PATH)/upstream-openbsd/android/include \
+    $(LOCAL_PATH)/upstream-openbsd/lib/libc/include \
+
 LOCAL_MODULE := libc_gdtoa
 LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
 LOCAL_CXX_STL := none
@@ -1035,7 +1058,6 @@
 
 LOCAL_SRC_FILES := \
     $(libc_arch_static_src_files) \
-    $(libc_static_common_src_files) \
     bionic/libc_init_static.cpp
 
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
@@ -1087,7 +1109,6 @@
 
 LOCAL_SRC_FILES := \
     $(libc_arch_static_src_files) \
-    $(libc_static_common_src_files) \
     bionic/malloc_debug_common.cpp \
     bionic/libc_init_static.cpp \
 
@@ -1122,7 +1143,6 @@
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
 LOCAL_SRC_FILES := \
     $(libc_arch_dynamic_src_files) \
-    $(libc_static_common_src_files) \
     bionic/malloc_debug_common.cpp \
     bionic/libc_init_dynamic.cpp \
     bionic/NetdClient.cpp \
@@ -1152,13 +1172,10 @@
 # We'd really like to do this for all architectures, but since this wasn't done
 # before, these symbols must continue to be exported on LP32 for binary
 # compatibility.
-LOCAL_LDFLAGS_arm64 := -Wl,--exclude-libs,libgcc.a
-LOCAL_LDFLAGS_mips64 := -Wl,--exclude-libs,libgcc.a
-LOCAL_LDFLAGS_x86_64 := -Wl,--exclude-libs,libgcc.a
+LOCAL_LDFLAGS_64 := -Wl,--exclude-libs,libgcc.a
 
 $(eval $(call patch-up-arch-specific-flags,LOCAL_CFLAGS,libc_common_cflags))
 $(eval $(call patch-up-arch-specific-flags,LOCAL_SRC_FILES,libc_arch_dynamic_src_files))
-$(eval $(call patch-up-arch-specific-flags,LOCAL_SRC_FILES,libc_static_common_src_files))
 # special for arm
 LOCAL_NO_CRT_arm := true
 LOCAL_CFLAGS_arm += -DCRT_LEGACY_WORKAROUND
@@ -1209,7 +1226,8 @@
 LOCAL_SYSTEM_SHARED_LIBRARIES :=
 # Only need this for arm since libc++ uses its own unwind code that
 # doesn't mix with the other default unwind code.
-LOCAL_STATIC_LIBRARIES_arm := libunwind_llvm libc++abi
+LOCAL_STATIC_LIBRARIES_arm := libunwind_llvm
+LOCAL_STATIC_LIBRARIES += libc++abi
 LOCAL_ALLOW_UNDEFINED_SYMBOLS := true
 
 # Don't install on release build
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index f712c4c..60600e5 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -1,24 +1,6 @@
 # 32-bit arm.
 
 #
-# Various kinds of LP32 cruft.
-#
-
-libc_bionic_src_files_arm += \
-    bionic/mmap.cpp \
-
-libc_common_src_files_arm += \
-    bionic/legacy_32_bit_support.cpp \
-    bionic/ndk_cruft.cpp \
-    bionic/time64.c \
-
-libc_netbsd_src_files_arm += \
-    upstream-netbsd/common/lib/libc/hash/sha1/sha1.c \
-
-libc_openbsd_src_files_arm += \
-    upstream-openbsd/lib/libc/stdio/putw.c \
-
-#
 # Default implementations of functions that are commonly optimized.
 #
 
diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk
index ba78871..8418993 100644
--- a/libc/arch-arm64/arm64.mk
+++ b/libc/arch-arm64/arm64.mk
@@ -59,6 +59,6 @@
 $(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are generic, denver64. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
 endif
 include $(cpu_variant_mk)
-libc_common_additional_dependencies += $(cpu_variank_mk)
+libc_common_additional_dependencies += $(cpu_variant_mk)
 
 cpu_variant_mk :=
diff --git a/libc/arch-mips/mips.mk b/libc/arch-mips/mips.mk
index 7f36635..7e3fe25 100644
--- a/libc/arch-mips/mips.mk
+++ b/libc/arch-mips/mips.mk
@@ -1,24 +1,6 @@
 # 32-bit mips.
 
 #
-# Various kinds of LP32 cruft.
-#
-
-libc_bionic_src_files_mips += \
-    bionic/mmap.cpp \
-
-libc_common_src_files_mips += \
-    bionic/legacy_32_bit_support.cpp \
-    bionic/ndk_cruft.cpp \
-    bionic/time64.c \
-
-libc_netbsd_src_files_mips += \
-    upstream-netbsd/common/lib/libc/hash/sha1/sha1.c \
-
-libc_openbsd_src_files_mips += \
-    upstream-openbsd/lib/libc/stdio/putw.c \
-
-#
 # Default implementations of functions that are commonly optimized.
 #
 
diff --git a/libc/arch-x86/x86.mk b/libc/arch-x86/x86.mk
index 989690c..e5d70a9 100644
--- a/libc/arch-x86/x86.mk
+++ b/libc/arch-x86/x86.mk
@@ -1,24 +1,6 @@
 # 32-bit x86.
 
 #
-# Various kinds of LP32 cruft.
-#
-
-libc_bionic_src_files_x86 += \
-    bionic/mmap.cpp \
-
-libc_common_src_files_x86 += \
-    bionic/legacy_32_bit_support.cpp \
-    bionic/ndk_cruft.cpp \
-    bionic/time64.c \
-
-libc_netbsd_src_files_x86 += \
-    upstream-netbsd/common/lib/libc/hash/sha1/sha1.c \
-
-libc_openbsd_src_files_x86 += \
-    upstream-openbsd/lib/libc/stdio/putw.c \
-
-#
 # Default implementations of functions that are commonly optimized.
 #
 
diff --git a/libc/bionic/debug_stacktrace.cpp b/libc/bionic/debug_stacktrace.cpp
index c6ce714..71e876b 100644
--- a/libc/bionic/debug_stacktrace.cpp
+++ b/libc/bionic/debug_stacktrace.cpp
@@ -47,34 +47,20 @@
 
 typedef struct _Unwind_Context __unwind_context;
 
+extern "C" char* __cxa_demangle(const char*, char*, size_t*, int*);
+
 static mapinfo_t* g_map_info = NULL;
-static void* g_demangler;
-typedef char* (*DemanglerFn)(const char*, char*, size_t*, int*);
-static DemanglerFn g_demangler_fn = NULL;
 
 __LIBC_HIDDEN__ void backtrace_startup() {
   ScopedDisableDebugCalls disable;
 
   g_map_info = mapinfo_create(getpid());
-  g_demangler = dlopen("libgccdemangle.so", RTLD_NOW);
-  if (g_demangler != NULL) {
-    void* sym = dlsym(g_demangler, "__cxa_demangle");
-    g_demangler_fn = reinterpret_cast<DemanglerFn>(sym);
-  }
 }
 
 __LIBC_HIDDEN__ void backtrace_shutdown() {
   ScopedDisableDebugCalls disable;
 
   mapinfo_destroy(g_map_info);
-  dlclose(g_demangler);
-}
-
-static char* demangle(const char* symbol) {
-  if (g_demangler_fn == NULL) {
-    return NULL;
-  }
-  return (*g_demangler_fn)(symbol, NULL, NULL, NULL);
 }
 
 struct stack_crawl_state_t {
@@ -158,8 +144,7 @@
       soname = "<unknown>";
     }
     if (symbol != NULL) {
-      // TODO: we might need a flag to say whether it's safe to allocate (demangling allocates).
-      char* demangled_symbol = demangle(symbol);
+      char* demangled_symbol = __cxa_demangle(symbol, NULL, NULL, NULL);
       const char* best_name = (demangled_symbol != NULL) ? demangled_symbol : symbol;
 
       __libc_format_log(ANDROID_LOG_ERROR, "libc",
diff --git a/libc/bionic/libc_logging.cpp b/libc/bionic/libc_logging.cpp
index 76bc46d..2eb9d68 100644
--- a/libc/bionic/libc_logging.cpp
+++ b/libc/bionic/libc_logging.cpp
@@ -513,9 +513,9 @@
   vec[3].iov_base = &priority;
   vec[3].iov_len = 1;
   vec[4].iov_base = const_cast<char*>(tag);
-  vec[4].iov_len = strlen(tag);
+  vec[4].iov_len = strlen(tag) + 1;
   vec[5].iov_base = const_cast<char*>(msg);
-  vec[5].iov_len = strlen(msg);
+  vec[5].iov_len = strlen(msg) + 1;
 #else
   int main_log_fd = TEMP_FAILURE_RETRY(open("/dev/log/main", O_CLOEXEC | O_WRONLY));
   if (main_log_fd == -1) {
@@ -530,9 +530,9 @@
   vec[0].iov_base = &priority;
   vec[0].iov_len = 1;
   vec[1].iov_base = const_cast<char*>(tag);
-  vec[1].iov_len = strlen(tag);
+  vec[1].iov_len = strlen(tag) + 1;
   vec[2].iov_base = const_cast<char*>(msg);
-  vec[2].iov_len = strlen(msg);
+  vec[2].iov_len = strlen(msg) + 1;
 #endif
 
   int result = TEMP_FAILURE_RETRY(writev(main_log_fd, vec, sizeof(vec) / sizeof(vec[0])));
diff --git a/libc/bionic/semaphore.cpp b/libc/bionic/semaphore.cpp
index dabfea0..0b04650 100644
--- a/libc/bionic/semaphore.cpp
+++ b/libc/bionic/semaphore.cpp
@@ -26,13 +26,19 @@
  * SUCH DAMAGE.
  */
 
+// Memory order requirements for POSIX semaphores appear unclear and are
+// currently interpreted inconsistently.
+// We conservatively prefer sequentially consistent operations for now.
+// CAUTION: This is more conservative than some other major implementations,
+// and may change if and when the issue is resolved.
+
 #include <semaphore.h>
 #include <errno.h>
 #include <limits.h>
+#include <stdatomic.h>
 #include <sys/time.h>
 #include <time.h>
 
-#include "private/bionic_atomic_inline.h"
 #include "private/bionic_constants.h"
 #include "private/bionic_futex.h"
 #include "private/bionic_time_conversions.h"
@@ -66,7 +72,7 @@
 #define SEMCOUNT_FROM_VALUE(val)    (((val) << SEMCOUNT_VALUE_SHIFT) & SEMCOUNT_VALUE_MASK)
 
 // Convert a sem->count bit pattern into the corresponding signed value.
-static inline int SEMCOUNT_TO_VALUE(uint32_t sval) {
+static inline int SEMCOUNT_TO_VALUE(unsigned int sval) {
   return (static_cast<int>(sval) >> SEMCOUNT_VALUE_SHIFT);
 }
 
@@ -79,11 +85,20 @@
 #define SEMCOUNT_DECREMENT(sval)    (((sval) - (1U << SEMCOUNT_VALUE_SHIFT)) & SEMCOUNT_VALUE_MASK)
 #define SEMCOUNT_INCREMENT(sval)    (((sval) + (1U << SEMCOUNT_VALUE_SHIFT)) & SEMCOUNT_VALUE_MASK)
 
-// Return the shared bitflag from a semaphore.
-static inline uint32_t SEM_GET_SHARED(sem_t* sem) {
-  return (sem->count & SEMCOUNT_SHARED_MASK);
+static inline atomic_uint* SEM_TO_ATOMIC_POINTER(sem_t* sem) {
+  static_assert(sizeof(atomic_uint) == sizeof(sem->count),
+                "sem->count should actually be atomic_uint in implementation.");
+
+  // We prefer casting to atomic_uint instead of declaring sem->count to be atomic_uint directly.
+  // Because using the second method pollutes semaphore.h.
+  return reinterpret_cast<atomic_uint*>(&sem->count);
 }
 
+// Return the shared bitflag from a semaphore counter.
+static inline unsigned int SEM_GET_SHARED(atomic_uint* sem_count_ptr) {
+  // memory_order_relaxed is used as SHARED flag will not be changed after init.
+  return (atomic_load_explicit(sem_count_ptr, memory_order_relaxed) & SEMCOUNT_SHARED_MASK);
+}
 
 int sem_init(sem_t* sem, int pshared, unsigned int value) {
   // Ensure that 'value' can be stored in the semaphore.
@@ -92,10 +107,13 @@
     return -1;
   }
 
-  sem->count = SEMCOUNT_FROM_VALUE(value);
+  unsigned int count = SEMCOUNT_FROM_VALUE(value);
   if (pshared != 0) {
-    sem->count |= SEMCOUNT_SHARED_MASK;
+    count |= SEMCOUNT_SHARED_MASK;
   }
+
+  atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
+  atomic_init(sem_count_ptr, count);
   return 0;
 }
 
@@ -122,98 +140,97 @@
 // and return the old one. As a special case,
 // this returns immediately if the value is
 // negative (i.e. -1)
-static int __sem_dec(volatile uint32_t* sem) {
-  volatile int32_t* ptr = reinterpret_cast<volatile int32_t*>(sem);
-  uint32_t shared = (*sem & SEMCOUNT_SHARED_MASK);
-  uint32_t old_value, new_value;
-  int ret;
+static int __sem_dec(atomic_uint* sem_count_ptr) {
+  unsigned int old_value = atomic_load_explicit(sem_count_ptr, memory_order_relaxed);
+  unsigned int shared = old_value & SEMCOUNT_SHARED_MASK;
 
+  // Use memory_order_seq_cst in atomic_compare_exchange operation to ensure all
+  // memory access made by other threads can be seen in current thread.
+  // An acquire fence may be sufficient, but it is still in discussion whether
+  // POSIX semaphores should provide sequential consistency.
   do {
-    old_value = (*sem & SEMCOUNT_VALUE_MASK);
-    ret = SEMCOUNT_TO_VALUE(old_value);
-    if (ret < 0) {
+    if (SEMCOUNT_TO_VALUE(old_value) < 0) {
       break;
     }
+  } while (!atomic_compare_exchange_weak(sem_count_ptr, &old_value,
+           SEMCOUNT_DECREMENT(old_value) | shared));
 
-    new_value = SEMCOUNT_DECREMENT(old_value);
-  } while (__bionic_cmpxchg((old_value|shared), (new_value|shared), ptr) != 0);
-
-  return ret;
+  return SEMCOUNT_TO_VALUE(old_value);
 }
 
 // Same as __sem_dec, but will not touch anything if the
 // value is already negative *or* 0. Returns the old value.
-static int __sem_trydec(volatile uint32_t* sem) {
-  volatile int32_t* ptr = reinterpret_cast<volatile int32_t*>(sem);
-  uint32_t shared = (*sem & SEMCOUNT_SHARED_MASK);
-  uint32_t old_value, new_value;
-  int          ret;
+static int __sem_trydec(atomic_uint* sem_count_ptr) {
+  unsigned int old_value = atomic_load_explicit(sem_count_ptr, memory_order_relaxed);
+  unsigned int shared = old_value & SEMCOUNT_SHARED_MASK;
 
+  // Use memory_order_seq_cst in atomic_compare_exchange operation to ensure all
+  // memory access made by other threads can be seen in current thread.
+  // An acquire fence may be sufficient, but it is still in discussion whether
+  // POSIX semaphores should provide sequential consistency.
   do {
-    old_value = (*sem & SEMCOUNT_VALUE_MASK);
-    ret = SEMCOUNT_TO_VALUE(old_value);
-    if (ret <= 0) {
+    if (SEMCOUNT_TO_VALUE(old_value) <= 0) {
       break;
     }
+  } while (!atomic_compare_exchange_weak(sem_count_ptr, &old_value,
+           SEMCOUNT_DECREMENT(old_value) | shared));
 
-    new_value = SEMCOUNT_DECREMENT(old_value);
-  } while (__bionic_cmpxchg((old_value|shared), (new_value|shared), ptr) != 0);
-
-  return ret;
+  return SEMCOUNT_TO_VALUE(old_value);
 }
 
-
 // "Increment" the value of a semaphore atomically and
 // return its old value. Note that this implements
 // the special case of "incrementing" any negative
 // value to +1 directly.
 //
 // NOTE: The value will _not_ wrap above SEM_VALUE_MAX
-static int __sem_inc(volatile uint32_t* sem) {
-  volatile int32_t* ptr = reinterpret_cast<volatile int32_t*>(sem);
-  uint32_t shared = (*sem & SEMCOUNT_SHARED_MASK);
-  uint32_t old_value, new_value;
-  int ret;
+static int __sem_inc(atomic_uint* sem_count_ptr) {
+  unsigned int old_value = atomic_load_explicit(sem_count_ptr, memory_order_relaxed);
+  unsigned int shared = old_value  & SEMCOUNT_SHARED_MASK;
+  unsigned int new_value;
 
+  // Use memory_order_seq_cst in atomic_compare_exchange operation to ensure all
+  // memory access made before can be seen in other threads.
+  // A release fence may be sufficient, but it is still in discussion whether
+  // POSIX semaphores should provide sequential consistency.
   do {
-    old_value = (*sem & SEMCOUNT_VALUE_MASK);
-    ret = SEMCOUNT_TO_VALUE(old_value);
-
     // Can't go higher than SEM_VALUE_MAX.
-    if (ret == SEM_VALUE_MAX) {
+    if (SEMCOUNT_TO_VALUE(old_value) == SEM_VALUE_MAX) {
       break;
     }
 
-    // If the counter is negative, go directly to +1, otherwise just increment.
-    if (ret < 0) {
-        new_value = SEMCOUNT_ONE;
+    // If the counter is negative, go directly to one, otherwise just increment.
+    if (SEMCOUNT_TO_VALUE(old_value) < 0) {
+      new_value = SEMCOUNT_ONE | shared;
     } else {
-      new_value = SEMCOUNT_INCREMENT(old_value);
+      new_value = SEMCOUNT_INCREMENT(old_value) | shared;
     }
-  } while (__bionic_cmpxchg((old_value|shared), (new_value|shared), ptr) != 0);
+  } while (!atomic_compare_exchange_weak(sem_count_ptr, &old_value,
+           new_value));
 
-  return ret;
+  return SEMCOUNT_TO_VALUE(old_value);
 }
 
 int sem_wait(sem_t* sem) {
-  uint32_t shared = SEM_GET_SHARED(sem);
+  atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
+  unsigned int shared = SEM_GET_SHARED(sem_count_ptr);
 
   while (true) {
-    if (__sem_dec(&sem->count) > 0) {
-      ANDROID_MEMBAR_FULL();
+    if (__sem_dec(sem_count_ptr) > 0) {
       return 0;
     }
 
-    __futex_wait_ex(&sem->count, shared, shared|SEMCOUNT_MINUS_ONE, NULL);
+    __futex_wait_ex(sem_count_ptr, shared, shared | SEMCOUNT_MINUS_ONE, NULL);
   }
 }
 
 int sem_timedwait(sem_t* sem, const timespec* abs_timeout) {
+  atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
+
   // POSIX says we need to try to decrement the semaphore
   // before checking the timeout value. Note that if the
   // value is currently 0, __sem_trydec() does nothing.
-  if (__sem_trydec(&sem->count) > 0) {
-    ANDROID_MEMBAR_FULL();
+  if (__sem_trydec(sem_count_ptr) > 0) {
     return 0;
   }
 
@@ -223,7 +240,7 @@
     return -1;
   }
 
-  uint32_t shared = SEM_GET_SHARED(sem);
+  unsigned int shared = SEM_GET_SHARED(sem_count_ptr);
 
   while (true) {
     // POSIX mandates CLOCK_REALTIME here.
@@ -234,13 +251,12 @@
     }
 
     // Try to grab the semaphore. If the value was 0, this will also change it to -1.
-    if (__sem_dec(&sem->count) > 0) {
-      ANDROID_MEMBAR_FULL();
+    if (__sem_dec(sem_count_ptr) > 0) {
       break;
     }
 
     // Contention detected. Wait for a wakeup event.
-    int ret = __futex_wait_ex(&sem->count, shared, shared|SEMCOUNT_MINUS_ONE, &ts);
+    int ret = __futex_wait_ex(sem_count_ptr, shared, shared | SEMCOUNT_MINUS_ONE, &ts);
 
     // Return in case of timeout or interrupt.
     if (ret == -ETIMEDOUT || ret == -EINTR) {
@@ -252,13 +268,13 @@
 }
 
 int sem_post(sem_t* sem) {
-  uint32_t shared = SEM_GET_SHARED(sem);
+  atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
+  unsigned int shared = SEM_GET_SHARED(sem_count_ptr);
 
-  ANDROID_MEMBAR_FULL();
-  int old_value = __sem_inc(&sem->count);
+  int old_value = __sem_inc(sem_count_ptr);
   if (old_value < 0) {
     // Contention on the semaphore. Wake up all waiters.
-    __futex_wake_ex(&sem->count, shared, INT_MAX);
+    __futex_wake_ex(sem_count_ptr, shared, INT_MAX);
   } else if (old_value == SEM_VALUE_MAX) {
     // Overflow detected.
     errno = EOVERFLOW;
@@ -269,8 +285,8 @@
 }
 
 int sem_trywait(sem_t* sem) {
-  if (__sem_trydec(&sem->count) > 0) {
-    ANDROID_MEMBAR_FULL();
+  atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
+  if (__sem_trydec(sem_count_ptr) > 0) {
     return 0;
   } else {
     errno = EAGAIN;
@@ -279,7 +295,12 @@
 }
 
 int sem_getvalue(sem_t* sem, int* sval) {
-  int val = SEMCOUNT_TO_VALUE(sem->count);
+  atomic_uint* sem_count_ptr = SEM_TO_ATOMIC_POINTER(sem);
+
+  // Use memory_order_seq_cst in atomic_load operation.
+  // memory_order_relaxed may be fine here, but it is still in discussion
+  // whether POSIX semaphores should provide sequential consistency.
+  int val = SEMCOUNT_TO_VALUE(atomic_load(sem_count_ptr));
   if (val < 0) {
     val = 0;
   }
diff --git a/libc/include/semaphore.h b/libc/include/semaphore.h
index 5827870..4ef13af 100644
--- a/libc/include/semaphore.h
+++ b/libc/include/semaphore.h
@@ -36,7 +36,7 @@
 struct timespec;
 
 typedef struct {
-  volatile unsigned int count;
+  unsigned int count;
 #ifdef __LP64__
   int __reserved[3];
 #endif
diff --git a/libc/tools/zoneinfo/update-tzdata.py b/libc/tools/zoneinfo/update-tzdata.py
index 4847356..d5788af 100755
--- a/libc/tools/zoneinfo/update-tzdata.py
+++ b/libc/tools/zoneinfo/update-tzdata.py
@@ -140,7 +140,7 @@
 
   # Regenerate the .dat file.
   os.chdir(icu_working_dir)
-  subprocess.check_call(['make', '-j32'])
+  subprocess.check_call(['make', 'INCLUDE_UNI_CORE_DATA=1', '-j32'])
 
   # Copy the .dat file to its ultimate destination.
   icu_dat_data_dir = '%s/stubdata' % icu_dir
diff --git a/linker/linker.cpp b/linker/linker.cpp
index f7bcd27..3934484 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -798,7 +798,7 @@
 }
 
 ElfW(Sym)* soinfo::gnu_addr_lookup(const void* addr) {
-  ElfW(Addr) soaddr = reinterpret_cast<ElfW(Addr)>(addr) - base;
+  ElfW(Addr) soaddr = reinterpret_cast<ElfW(Addr)>(addr) - load_bias;
 
   for (size_t i = 0; i < nbucket_; ++i) {
     uint32_t n = bucket_[i];
@@ -819,7 +819,7 @@
 }
 
 ElfW(Sym)* soinfo::elf_addr_lookup(const void* addr) {
-  ElfW(Addr) soaddr = reinterpret_cast<ElfW(Addr)>(addr) - base;
+  ElfW(Addr) soaddr = reinterpret_cast<ElfW(Addr)>(addr) - load_bias;
 
   // Search the library's symbol table for any defined symbol which
   // contains this address.
diff --git a/tests/fortify_test.cpp b/tests/fortify_test.cpp
index 6cbc695..5cc728f 100644
--- a/tests/fortify_test.cpp
+++ b/tests/fortify_test.cpp
@@ -26,6 +26,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <time.h>
 
 #if __BIONIC__
 #define ASSERT_FORTIFY(expr) ASSERT_EXIT(expr, testing::KilledBySignal(SIGABRT), "FORTIFY")
@@ -938,11 +939,15 @@
 TEST_F(DEATHTEST, poll_fortified) {
   nfds_t fd_count = atoi("2"); // suppress compiler optimizations
   pollfd buf[1] = {{0, POLLIN, 0}};
-  ASSERT_FORTIFY(poll(buf, fd_count, -1));
+  // Set timeout to zero to prevent waiting in poll when fortify test fails.
+  ASSERT_FORTIFY(poll(buf, fd_count, 0));
 }
 
 TEST_F(DEATHTEST, ppoll_fortified) {
   nfds_t fd_count = atoi("2"); // suppress compiler optimizations
   pollfd buf[1] = {{0, POLLIN, 0}};
-  ASSERT_FORTIFY(ppoll(buf, fd_count, NULL, NULL));
+  // Set timeout to zero to prevent waiting in ppoll when fortify test fails.
+  timespec timeout;
+  timeout.tv_sec = timeout.tv_nsec = 0;
+  ASSERT_FORTIFY(ppoll(buf, fd_count, &timeout, NULL));
 }
diff --git a/tests/gtest_main.cpp b/tests/gtest_main.cpp
index e199449..86d6466 100644
--- a/tests/gtest_main.cpp
+++ b/tests/gtest_main.cpp
@@ -16,9 +16,12 @@
 
 #include <gtest/gtest.h>
 
+#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
+#include <limits.h>
+#include <signal.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
@@ -93,7 +96,10 @@
          "      It takes effect only in isolation mode. Default warnline is 2000 ms.\n"
          "  --gtest-filter=POSITIVE_PATTERNS[-NEGATIVE_PATTERNS]\n"
          "      Used as a synonym for --gtest_filter option in gtest.\n"
-         "\nDefault bionic unit test option is -j.\n"
+         "Default bionic unit test option is -j.\n"
+         "In isolation mode, you can send SIGQUIT to the parent process to show current\n"
+         "running tests, or send SIGINT to the parent process to stop testing and\n"
+         "clean up current running tests.\n"
          "\n");
 }
 
@@ -223,13 +229,13 @@
   int towrite = strlen(buf);
   char* p = buf;
   while (towrite > 0) {
-    ssize_t write_count = TEMP_FAILURE_RETRY(write(child_output_fd, p, towrite));
-    if (write_count == -1) {
+    ssize_t bytes_written = TEMP_FAILURE_RETRY(write(child_output_fd, p, towrite));
+    if (bytes_written == -1) {
       fprintf(stderr, "failed to write child_output_fd: %s\n", strerror(errno));
       exit(1);
     } else {
-      towrite -= write_count;
-      p += write_count;
+      towrite -= bytes_written;
+      p += bytes_written;
     }
   }
 }
@@ -488,6 +494,18 @@
   fclose(fp);
 }
 
+struct ChildProcInfo {
+  pid_t pid;
+  int64_t start_time_ns;
+  int64_t end_time_ns;
+  int64_t deadline_end_time_ns; // The time when the test is thought of as timeout.
+  size_t testcase_id, test_id;
+  bool finished;
+  bool timed_out;
+  int exit_status;
+  int child_read_fd; // File descriptor to read child test failure info.
+};
+
 // Forked Child process, run the single test.
 static void ChildProcessFn(int argc, char** argv, const std::string& test_name) {
   char** new_argv = new char*[argc + 2];
@@ -505,57 +523,142 @@
   exit(result);
 }
 
-struct ChildProcInfo {
-  pid_t pid;
-  int64_t start_time_ns;
-  int64_t deadline_time_ns;
-  size_t testcase_id, test_id;
-  bool done_flag;
-  bool timeout_flag;
-  int exit_status;
-  int child_read_fd;
-  ChildProcInfo() : pid(0) {}
-};
+static ChildProcInfo RunChildProcess(const std::string& test_name, int testcase_id, int test_id,
+                                     sigset_t sigmask, int argc, char** argv) {
+  int pipefd[2];
+  int ret = pipe2(pipefd, O_NONBLOCK);
+  if (ret == -1) {
+    perror("pipe2 in RunTestInSeparateProc");
+    exit(1);
+  }
+  pid_t pid = fork();
+  if (pid == -1) {
+    perror("fork in RunTestInSeparateProc");
+    exit(1);
+  } else if (pid == 0) {
+    // In child process, run a single test.
+    close(pipefd[0]);
+    child_output_fd = pipefd[1];
 
-static void WaitChildProcs(std::vector<ChildProcInfo>& child_proc_list) {
-  pid_t result;
-  int status;
-  bool loop_flag = true;
+    if (sigprocmask(SIG_SETMASK, &sigmask, NULL) == -1) {
+      perror("sigprocmask SIG_SETMASK");
+      exit(1);
+    }
+    ChildProcessFn(argc, argv, test_name);
+    // Unreachable.
+  }
+  // In parent process, initialize child process info.
+  close(pipefd[1]);
+  ChildProcInfo child_proc;
+  child_proc.child_read_fd = pipefd[0];
+  child_proc.pid = pid;
+  child_proc.start_time_ns = NanoTime();
+  child_proc.deadline_end_time_ns = child_proc.start_time_ns + GetDeadlineInfo(test_name) * 1000000LL;
+  child_proc.testcase_id = testcase_id;
+  child_proc.test_id = test_id;
+  child_proc.finished = false;
+  return child_proc;
+}
 
+static void HandleSignals(std::vector<TestCase>& testcase_list,
+                            std::vector<ChildProcInfo>& child_proc_list) {
+  sigset_t waiting_mask;
+  sigemptyset(&waiting_mask);
+  sigaddset(&waiting_mask, SIGINT);
+  sigaddset(&waiting_mask, SIGQUIT);
+  timespec timeout;
+  timeout.tv_sec = timeout.tv_nsec = 0;
   while (true) {
-    while ((result = waitpid(-1, &status, WNOHANG)) == -1) {
-      if (errno != EINTR) {
-        break;
+    int signo = TEMP_FAILURE_RETRY(sigtimedwait(&waiting_mask, NULL, &timeout));
+    if (signo == -1) {
+      if (errno == EAGAIN) {
+        return; // Timeout, no pending signals.
+      }
+      perror("sigtimedwait");
+      exit(1);
+    } else if (signo == SIGQUIT) {
+      // Print current running tests.
+      printf("List of current running tests:\n");
+      for (auto& child_proc : child_proc_list) {
+        if (child_proc.pid != 0) {
+          std::string test_name = testcase_list[child_proc.testcase_id].GetTestName(child_proc.test_id);
+          int64_t current_time_ns = NanoTime();
+          int64_t run_time_ms = (current_time_ns - child_proc.start_time_ns) / 1000000;
+          printf("  %s (%" PRId64 " ms)\n", test_name.c_str(), run_time_ms);
+        }
+      }
+    } else if (signo == SIGINT) {
+      // Kill current running tests.
+      for (auto& child_proc : child_proc_list) {
+        if (child_proc.pid != 0) {
+          // Send SIGKILL to ensure the child process can be killed unconditionally.
+          kill(child_proc.pid, SIGKILL);
+        }
+      }
+      // SIGINT kills the parent process as well.
+      exit(1);
+    }
+  }
+}
+
+static bool CheckChildProcExit(pid_t exit_pid, int exit_status,
+                               std::vector<ChildProcInfo>& child_proc_list) {
+  for (size_t i = 0; i < child_proc_list.size(); ++i) {
+    if (child_proc_list[i].pid == exit_pid) {
+      child_proc_list[i].finished = true;
+      child_proc_list[i].timed_out = false;
+      child_proc_list[i].exit_status = exit_status;
+      child_proc_list[i].end_time_ns = NanoTime();
+      return true;
+    }
+  }
+  return false;
+}
+
+static size_t CheckChildProcTimeout(std::vector<ChildProcInfo>& child_proc_list) {
+  int64_t current_time_ns = NanoTime();
+  size_t timeout_child_count = 0;
+  for (size_t i = 0; i < child_proc_list.size(); ++i) {
+    if (child_proc_list[i].deadline_end_time_ns <= current_time_ns) {
+      child_proc_list[i].finished = true;
+      child_proc_list[i].timed_out = true;
+      child_proc_list[i].end_time_ns = current_time_ns;
+      ++timeout_child_count;
+    }
+  }
+  return timeout_child_count;
+}
+
+static void WaitChildProcs(std::vector<TestCase>& testcase_list,
+                           std::vector<ChildProcInfo>& child_proc_list) {
+  size_t finished_child_count = 0;
+  while (true) {
+    int status;
+    pid_t result;
+    while ((result = TEMP_FAILURE_RETRY(waitpid(-1, &status, WNOHANG))) > 0) {
+      if (CheckChildProcExit(result, status, child_proc_list)) {
+        ++finished_child_count;
       }
     }
 
     if (result == -1) {
-      perror("waitpid");
-      exit(1);
+      if (errno == ECHILD) {
+        // This happens when we have no running child processes.
+        return;
+      } else {
+        perror("waitpid");
+        exit(1);
+      }
     } else if (result == 0) {
-      // Check child timeout.
-      int64_t current_time_ns = NanoTime();
-      for (size_t i = 0; i < child_proc_list.size(); ++i) {
-        if (child_proc_list[i].deadline_time_ns <= current_time_ns) {
-          child_proc_list[i].done_flag = true;
-          child_proc_list[i].timeout_flag = true;
-          loop_flag = false;
-        }
-      }
-    } else {
-      // Check child finish.
-      for (size_t i = 0; i < child_proc_list.size(); ++i) {
-        if (child_proc_list[i].pid == result) {
-          child_proc_list[i].done_flag = true;
-          child_proc_list[i].timeout_flag = false;
-          child_proc_list[i].exit_status = status;
-          loop_flag = false;
-          break;
-        }
-      }
+      finished_child_count += CheckChildProcTimeout(child_proc_list);
     }
 
-    if (!loop_flag) break;
+    if (finished_child_count > 0) {
+      return;
+    }
+
+    HandleSignals(testcase_list, child_proc_list);
+
     // sleep 1 ms to avoid busy looping.
     timespec sleep_time;
     sleep_time.tv_sec = 0;
@@ -564,15 +667,9 @@
   }
 }
 
-static TestResult WaitChildProc(pid_t pid) {
-  pid_t result;
+static TestResult WaitForOneChild(pid_t pid) {
   int exit_status;
-
-  while ((result = waitpid(pid, &exit_status, 0)) == -1) {
-    if (errno != EINTR) {
-      break;
-    }
-  }
+  pid_t result = TEMP_FAILURE_RETRY(waitpid(pid, &exit_status, 0));
 
   TestResult test_result = TEST_SUCCESS;
   if (result != pid || WEXITSTATUS(exit_status) != 0) {
@@ -581,6 +678,57 @@
   return test_result;
 }
 
+static void CollectChildTestResult(const ChildProcInfo& child_proc, TestCase& testcase) {
+  int test_id = child_proc.test_id;
+  testcase.SetTestTime(test_id, child_proc.end_time_ns - child_proc.start_time_ns);
+  if (child_proc.timed_out) {
+    // The child process marked as timed_out has not exited, and we should kill it manually.
+    kill(child_proc.pid, SIGKILL);
+    WaitForOneChild(child_proc.pid);
+  }
+
+  while (true) {
+    char buf[1024];
+    ssize_t bytes_read = TEMP_FAILURE_RETRY(read(child_proc.child_read_fd, buf, sizeof(buf) - 1));
+    if (bytes_read > 0) {
+      buf[bytes_read] = '\0';
+      testcase.GetTest(test_id).AppendFailureMessage(buf);
+    } else if (bytes_read == 0) {
+      break; // Read end.
+    } else {
+      if (errno == EAGAIN) {
+        // No data is available. This rarely happens, only when the child process created other
+        // processes which have not exited so far. But the child process has already exited or
+        // been killed, so the test has finished, and we shouldn't wait further.
+        break;
+      }
+      perror("read child_read_fd in RunTestInSeparateProc");
+      exit(1);
+    }
+  }
+  close(child_proc.child_read_fd);
+
+  if (child_proc.timed_out) {
+    testcase.SetTestResult(test_id, TEST_TIMEOUT);
+    char buf[1024];
+    snprintf(buf, sizeof(buf), "%s killed because of timeout at %" PRId64 " ms.\n",
+             testcase.GetTestName(test_id).c_str(), testcase.GetTestTime(test_id) / 1000000);
+    testcase.GetTest(test_id).AppendFailureMessage(buf);
+
+  } else if (WIFSIGNALED(child_proc.exit_status)) {
+    // Record signal terminated test as failed.
+    testcase.SetTestResult(test_id, TEST_FAILED);
+    char buf[1024];
+    snprintf(buf, sizeof(buf), "%s terminated by signal: %s.\n",
+             testcase.GetTestName(test_id).c_str(), strsignal(WTERMSIG(child_proc.exit_status)));
+    testcase.GetTest(test_id).AppendFailureMessage(buf);
+
+  } else {
+    testcase.SetTestResult(test_id, WEXITSTATUS(child_proc.exit_status) == 0 ?
+                           TEST_SUCCESS : TEST_FAILED);
+  }
+}
+
 // We choose to use multi-fork and multi-wait here instead of multi-thread, because it always
 // makes deadlock to use fork in multi-thread.
 static void RunTestInSeparateProc(int argc, char** argv, std::vector<TestCase>& testcase_list,
@@ -591,13 +739,23 @@
                         testing::UnitTest::GetInstance()->listeners().default_result_printer());
   testing::UnitTest::GetInstance()->listeners().Append(new TestResultPrinter);
 
+  // Signals are blocked here as we want to handle them in HandleSignals() later.
+  sigset_t block_mask, orig_mask;
+  sigemptyset(&block_mask);
+  sigaddset(&block_mask, SIGINT);
+  sigaddset(&block_mask, SIGQUIT);
+  if (sigprocmask(SIG_BLOCK, &block_mask, &orig_mask) == -1) {
+    perror("sigprocmask SIG_BLOCK");
+    exit(1);
+  }
+
   for (size_t iteration = 1; iteration <= iteration_count; ++iteration) {
     OnTestIterationStartPrint(testcase_list, iteration, iteration_count);
     int64_t iteration_start_time_ns = NanoTime();
     time_t epoch_iteration_start_time = time(NULL);
 
     // Run up to job_count tests in parallel, each test in a child process.
-    std::vector<ChildProcInfo> child_proc_list(job_count);
+    std::vector<ChildProcInfo> child_proc_list;
 
     // Next test to run is [next_testcase_id:next_test_id].
     size_t next_testcase_id = 0;
@@ -608,103 +766,40 @@
     size_t finished_testcase_count = 0;
 
     while (finished_testcase_count < testcase_list.size()) {
-      // Fork up to job_count child processes.
-      for (auto& child_proc : child_proc_list) {
-        if (child_proc.pid == 0 && next_testcase_id < testcase_list.size()) {
-          std::string test_name = testcase_list[next_testcase_id].GetTestName(next_test_id);
-          int pipefd[2];
-          int ret = pipe(pipefd);
-          if (ret == -1) {
-            perror("pipe2 in RunTestInSeparateProc");
-            exit(1);
-          }
-          pid_t pid = fork();
-          if (pid == -1) {
-            perror("fork in RunTestInSeparateProc");
-            exit(1);
-          } else if (pid == 0) {
-            close(pipefd[0]);
-            child_output_fd = pipefd[1];
-            // Run child process test, never return.
-            ChildProcessFn(argc, argv, test_name);
-          }
-          // Parent process
-          close(pipefd[1]);
-          child_proc.child_read_fd = pipefd[0];
-          child_proc.pid = pid;
-          child_proc.start_time_ns = NanoTime();
-          child_proc.deadline_time_ns = child_proc.start_time_ns +
-                                        GetDeadlineInfo(test_name) * 1000000LL;
-          child_proc.testcase_id = next_testcase_id;
-          child_proc.test_id = next_test_id;
-          child_proc.done_flag = false;
-          if (++next_test_id == testcase_list[next_testcase_id].TestCount()) {
-            next_test_id = 0;
-            ++next_testcase_id;
-          }
+      // run up to job_count child processes.
+      while (child_proc_list.size() < job_count && next_testcase_id < testcase_list.size()) {
+        std::string test_name = testcase_list[next_testcase_id].GetTestName(next_test_id);
+        ChildProcInfo child_proc = RunChildProcess(test_name, next_testcase_id, next_test_id,
+                                                   orig_mask, argc, argv);
+        child_proc_list.push_back(child_proc);
+        if (++next_test_id == testcase_list[next_testcase_id].TestCount()) {
+          next_test_id = 0;
+          ++next_testcase_id;
         }
       }
 
       // Wait for any child proc finish or timeout.
-      WaitChildProcs(child_proc_list);
+      WaitChildProcs(testcase_list, child_proc_list);
 
       // Collect result.
-      for (auto& child_proc : child_proc_list) {
-        if (child_proc.pid != 0 && child_proc.done_flag == true) {
+      auto it = child_proc_list.begin();
+      while (it != child_proc_list.end()) {
+        auto& child_proc = *it;
+        if (child_proc.finished == true) {
           size_t testcase_id = child_proc.testcase_id;
           size_t test_id = child_proc.test_id;
           TestCase& testcase = testcase_list[testcase_id];
-          testcase.SetTestTime(test_id, NanoTime() - child_proc.start_time_ns);
 
-          // Kill and wait the timeout child process before we read failure message.
-          if (child_proc.timeout_flag) {
-            kill(child_proc.pid, SIGKILL);
-            WaitChildProc(child_proc.pid);
-          }
-
-          while (true) {
-            char buf[1024];
-            int ret = TEMP_FAILURE_RETRY(read(child_proc.child_read_fd, buf, sizeof(buf) - 1));
-            if (ret > 0) {
-              buf[ret] = '\0';
-              testcase.GetTest(test_id).AppendFailureMessage(buf);
-            } else if (ret == 0) {
-              break; // Read end.
-            } else {
-              perror("read child_read_fd in RunTestInSeparateProc");
-              exit(1);
-            }
-          }
-          close(child_proc.child_read_fd);
-
-          if (child_proc.timeout_flag) {
-            testcase.SetTestResult(test_id, TEST_TIMEOUT);
-            char buf[1024];
-            snprintf(buf, sizeof(buf), "%s killed because of timeout at %" PRId64 " ms.\n",
-                     testcase.GetTestName(test_id).c_str(),
-                     testcase.GetTestTime(test_id) / 1000000);
-            testcase.GetTest(test_id).AppendFailureMessage(buf);
-
-          } else if (WIFSIGNALED(child_proc.exit_status)) {
-            // Record signal terminated test as failed.
-            testcase.SetTestResult(test_id, TEST_FAILED);
-            char buf[1024];
-            snprintf(buf, sizeof(buf), "%s terminated by signal: %s.\n",
-                     testcase.GetTestName(test_id).c_str(),
-                     strsignal(WTERMSIG(child_proc.exit_status)));
-            testcase.GetTest(test_id).AppendFailureMessage(buf);
-
-          } else {
-            testcase.SetTestResult(test_id, WEXITSTATUS(child_proc.exit_status) == 0 ?
-                                   TEST_SUCCESS : TEST_FAILED);
-          }
+          CollectChildTestResult(child_proc, testcase);
           OnTestEndPrint(testcase, test_id);
 
           if (++finished_test_count_list[testcase_id] == testcase.TestCount()) {
             ++finished_testcase_count;
           }
-          child_proc.pid = 0;
-          child_proc.done_flag = false;
+
+          it = child_proc_list.erase(it);
+        } else {
+          ++it;
         }
       }
     }
@@ -716,12 +811,36 @@
                                  elapsed_time_ns);
     }
   }
+
+  // Restore signal mask.
+  if (sigprocmask(SIG_SETMASK, &orig_mask, NULL) == -1) {
+    perror("sigprocmask SIG_SETMASK");
+    exit(1);
+  }
 }
 
 static size_t GetProcessorCount() {
   return static_cast<size_t>(sysconf(_SC_NPROCESSORS_ONLN));
 }
 
+static void AddPathSeparatorInTestProgramPath(std::vector<char*>& args) {
+  // To run DeathTest in threadsafe mode, gtest requires that the user must invoke the
+  // test program via a valid path that contains at least one path separator.
+  // The reason is that gtest uses clone() + execve() to run DeathTest in threadsafe mode,
+  // and execve() doesn't read environment variable PATH, so execve() will not success
+  // until we specify the absolute path or relative path of the test program directly.
+  if (strchr(args[0], '/') == NULL) {
+    char path[PATH_MAX];
+    ssize_t path_len = readlink("/proc/self/exe", path, sizeof(path));
+    if (path_len <= 0 || path_len >= static_cast<ssize_t>(sizeof(path))) {
+      perror("readlink");
+      exit(1);
+    }
+    path[path_len] = '\0';
+    args[0] = strdup(path);
+  }
+}
+
 static void AddGtestFilterSynonym(std::vector<char*>& args) {
   // Support --gtest-filter as a synonym for --gtest_filter.
   for (size_t i = 1; i < args.size(); ++i) {
@@ -759,6 +878,7 @@
     }
   }
 
+  AddPathSeparatorInTestProgramPath(args);
   AddGtestFilterSynonym(args);
 
   // if --bionic-selftest argument is used, only enable self tests, otherwise remove self tests.