Revert "Revert "Lose the hand-written futex assembler.""

The problem with the original patch was that using syscall(3) means that
errno can be set, but pthread_create(3) was abusing the TLS errno slot as
a pthread_mutex_t for the thread startup handshake.

There was also a mistake in the check for syscall failures --- it should
have checked against -1 instead of 0 (not just because that's the default
idiom, but also here because futex(2) can legitimately return values > 0).

This patch stops abusing the TLS errno slot and adds a pthread_mutex_t to
pthread_internal_t instead. (Note that for LP64 sizeof(pthread_mutex_t) >
sizeof(uintptr_t), so we could potentially clobber other TLS slots too.)

I've also rewritten the LP32 compatibility stubs to directly reuse the
code from the .h file.

This reverts commit 75c55ff84ebfa686c7ae2cc8ee431c6a33bd46b4.

Bug: 15195455
Change-Id: I6ffb13e5cf6a35d8f59f692d94192aae9ab4593d
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index 4900a8a..1284b9a 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -31,7 +31,6 @@
 
 #include <ctype.h>
 #include <inttypes.h>
-#include <linux/futex.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -201,25 +200,20 @@
   return vdprintf(fd, fmt, ap);
 }
 
-static inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
-  // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
-  int saved_errno = errno;
-  if (syscall(__NR_futex, ftx, op, value, timeout) == 0) {
-    return 0;
-  }
-  int result = -errno;
-  errno = saved_errno;
-  return result;
-}
+#define __futex_wake __real_futex_wake
+#define __futex_wait __real_futex_wait
+#include "private/bionic_futex.h"
+#undef __futex_wake
+#undef __futex_wait
 
 // This used to be in <sys/atomics.h>.
 extern "C" int __futex_wake(volatile void* ftx, int count) {
-  return __futex(ftx, FUTEX_WAKE, count, NULL);
+  return __real_futex_wake(ftx, count);
 }
 
 // This used to be in <sys/atomics.h>.
 extern "C" int __futex_wait(volatile void* ftx, int value, const struct timespec* timeout) {
-  return __futex(ftx, FUTEX_WAIT, value, timeout);
+  return __real_futex_wait(ftx, value, timeout);
 }
 
 // Unity's libmono uses this.