Revert "Revert "Lose the hand-written futex assembler.""

The problem with the original patch was that using syscall(3) means that
errno can be set, but pthread_create(3) was abusing the TLS errno slot as
a pthread_mutex_t for the thread startup handshake.

There was also a mistake in the check for syscall failures --- it should
have checked against -1 instead of 0 (not just because that's the default
idiom, but also here because futex(2) can legitimately return values > 0).

This patch stops abusing the TLS errno slot and adds a pthread_mutex_t to
pthread_internal_t instead. (Note that for LP64 sizeof(pthread_mutex_t) >
sizeof(uintptr_t), so we could potentially clobber other TLS slots too.)

I've also rewritten the LP32 compatibility stubs to directly reuse the
code from the .h file.

This reverts commit 75c55ff84ebfa686c7ae2cc8ee431c6a33bd46b4.

Bug: 15195455
Change-Id: I6ffb13e5cf6a35d8f59f692d94192aae9ab4593d
diff --git a/libc/private/bionic_futex.h b/libc/private/bionic_futex.h
index 11699ce..35d33d0 100644
--- a/libc/private/bionic_futex.h
+++ b/libc/private/bionic_futex.h
@@ -28,31 +28,42 @@
 #ifndef _BIONIC_FUTEX_H
 #define _BIONIC_FUTEX_H
 
+#include <errno.h>
 #include <linux/futex.h>
-#include <sys/cdefs.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <sys/cdefs.h>
+#include <sys/syscall.h>
 
 __BEGIN_DECLS
 
 struct timespec;
 
-extern int __futex_syscall4(volatile void* ftx, int op, int value, const struct timespec* timeout);
+static inline __always_inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
+  // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
+  int saved_errno = errno;
+  if (__predict_false(syscall(__NR_futex, ftx, op, value, timeout) == -1)) {
+    int result = -errno;
+    errno = saved_errno;
+    return result;
+  }
+  return 0;
+}
 
 static inline int __futex_wake(volatile void* ftx, int count) {
-  return __futex_syscall4(ftx, FUTEX_WAKE, count, NULL);
+  return __futex(ftx, FUTEX_WAKE, count, NULL);
 }
 
 static inline int __futex_wake_ex(volatile void* ftx, bool shared, int count) {
-  return __futex_syscall4(ftx, shared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE, count, NULL);
+  return __futex(ftx, shared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE, count, NULL);
 }
 
 static inline int __futex_wait(volatile void* ftx, int value, const struct timespec* timeout) {
-  return __futex_syscall4(ftx, FUTEX_WAIT, value, timeout);
+  return __futex(ftx, FUTEX_WAIT, value, timeout);
 }
 
 static inline int __futex_wait_ex(volatile void* ftx, bool shared, int value, const struct timespec* timeout) {
-  return __futex_syscall4(ftx, shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, value, timeout);
+  return __futex(ftx, shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, value, timeout);
 }
 
 __END_DECLS