Revert "Revert "Lose the hand-written futex assembler.""
The problem with the original patch was that using syscall(3) means that
errno can be set, but pthread_create(3) was abusing the TLS errno slot as
a pthread_mutex_t for the thread startup handshake.
There was also a mistake in the check for syscall failures --- it should
have checked against -1 instead of 0 (not just because that's the default
idiom, but also here because futex(2) can legitimately return values > 0).
This patch stops abusing the TLS errno slot and adds a pthread_mutex_t to
pthread_internal_t instead. (Note that for LP64 sizeof(pthread_mutex_t) >
sizeof(uintptr_t), so we could potentially clobber other TLS slots too.)
I've also rewritten the LP32 compatibility stubs to directly reuse the
code from the .h file.
This reverts commit 75c55ff84ebfa686c7ae2cc8ee431c6a33bd46b4.
Bug: 15195455
Change-Id: I6ffb13e5cf6a35d8f59f692d94192aae9ab4593d
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index 4900a8a..1284b9a 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -31,7 +31,6 @@
#include <ctype.h>
#include <inttypes.h>
-#include <linux/futex.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -201,25 +200,20 @@
return vdprintf(fd, fmt, ap);
}
-static inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
- // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
- int saved_errno = errno;
- if (syscall(__NR_futex, ftx, op, value, timeout) == 0) {
- return 0;
- }
- int result = -errno;
- errno = saved_errno;
- return result;
-}
+#define __futex_wake __real_futex_wake
+#define __futex_wait __real_futex_wait
+#include "private/bionic_futex.h"
+#undef __futex_wake
+#undef __futex_wait
// This used to be in <sys/atomics.h>.
extern "C" int __futex_wake(volatile void* ftx, int count) {
- return __futex(ftx, FUTEX_WAKE, count, NULL);
+ return __real_futex_wake(ftx, count);
}
// This used to be in <sys/atomics.h>.
extern "C" int __futex_wait(volatile void* ftx, int value, const struct timespec* timeout) {
- return __futex(ftx, FUTEX_WAIT, value, timeout);
+ return __real_futex_wait(ftx, value, timeout);
}
// Unity's libmono uses this.
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 303af81..c4cb262 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -144,10 +144,8 @@
// notify gdb about this thread before we start doing anything.
// This also provides the memory barrier needed to ensure that all memory
// accesses previously made by the creating thread are visible to us.
- pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
- pthread_mutex_lock(start_mutex);
- pthread_mutex_destroy(start_mutex);
- thread->tls[TLS_SLOT_START_MUTEX] = NULL;
+ pthread_mutex_lock(&thread->startup_handshake_mutex);
+ pthread_mutex_destroy(&thread->startup_handshake_mutex);
__init_alternate_signal_stack(thread);
@@ -204,7 +202,8 @@
// The child stack is the same address, just growing in the opposite direction.
// At offsets >= 0, we have the TLS slots.
// At offsets < 0, we have the child stack.
- thread->tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
+ thread->tls = reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(thread->attr.stack_base) +
+ thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
void* child_stack = thread->tls;
__init_tls(thread);
@@ -214,9 +213,8 @@
// This also provides the memory barrier we need to ensure that all
// memory accesses previously performed by this thread are visible to
// the new thread.
- pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
- pthread_mutex_init(start_mutex, NULL);
- pthread_mutex_lock(start_mutex);
+ pthread_mutex_init(&thread->startup_handshake_mutex, NULL);
+ pthread_mutex_lock(&thread->startup_handshake_mutex);
thread->start_routine = start_routine;
thread->start_routine_arg = arg;
@@ -237,7 +235,7 @@
// We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
// be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
// reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
- pthread_mutex_unlock(start_mutex);
+ pthread_mutex_unlock(&thread->startup_handshake_mutex);
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) {
munmap(thread->attr.stack_base, thread->attr.stack_size);
}
@@ -252,7 +250,7 @@
// Letting the thread run is the easiest way to clean up its resources.
thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
thread->start_routine = __do_nothing;
- pthread_mutex_unlock(start_mutex);
+ pthread_mutex_unlock(&thread->startup_handshake_mutex);
return init_errno;
}
@@ -264,7 +262,7 @@
// Publish the pthread_t and unlock the mutex to let the new thread start running.
*thread_out = reinterpret_cast<pthread_t>(thread);
- pthread_mutex_unlock(start_mutex);
+ pthread_mutex_unlock(&thread->startup_handshake_mutex);
return 0;
}
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 295d9d6..490ae86 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -48,6 +48,8 @@
void* alternate_signal_stack;
+ pthread_mutex_t startup_handshake_mutex;
+
/*
* The dynamic linker implements dlerror(3), which makes it hard for us to implement this
* per-thread buffer by simply using malloc(3) and free(3).