Switch pthread_create over to __bionic_clone.

Bug: 8206355
Bug: 11693195
Change-Id: I04aadbc36c87e1b7e33324b9a930a1e441fbfed6
diff --git a/libc/bionic/__thread_entry.cpp b/libc/bionic/__thread_entry.cpp
index 8300a64..3505f8b 100644
--- a/libc/bionic/__thread_entry.cpp
+++ b/libc/bionic/__thread_entry.cpp
@@ -32,26 +32,28 @@
 
 #include "private/bionic_tls.h"
 
-// This trampoline is called from the assembly _pthread_clone function.
-// Our 'tls' and __pthread_clone's 'child_stack' are one and the same, just growing in
-// opposite directions.
-extern "C" void __thread_entry(void* (*func)(void*), void* arg, void** tls) {
+// This trampoline is called from the assembly __bionic_clone function.
+int __thread_entry(void* arg) {
+  pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
+
   // Wait for our creating thread to release us. This lets it have time to
   // notify gdb about this thread before we start doing anything.
   // This also provides the memory barrier needed to ensure that all memory
   // accesses previously made by the creating thread are visible to us.
-  pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF];
+  pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
   pthread_mutex_lock(start_mutex);
   pthread_mutex_destroy(start_mutex);
 
-  pthread_internal_t* thread = (pthread_internal_t*) tls[TLS_SLOT_THREAD_ID];
-  thread->tls = tls;
   __init_tls(thread);
 
+  __init_alternate_signal_stack(thread);
+
   if ((thread->internal_flags & PTHREAD_INTERNAL_FLAG_THREAD_INIT_FAILED) != 0) {
     pthread_exit(NULL);
   }
 
-  void* result = func(arg);
+  void* result = thread->start_routine(thread->start_routine_arg);
   pthread_exit(result);
+
+  return 0;
 }
diff --git a/libc/bionic/bionic_clone.c b/libc/bionic/bionic_clone.c
index 8a17e13..518d996 100644
--- a/libc/bionic/bionic_clone.c
+++ b/libc/bionic/bionic_clone.c
@@ -31,14 +31,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 
-extern int  __bionic_clone(unsigned long   clone_flags,
-                           void*           newsp,
-                           int            *parent_tidptr,
-                           void           *new_tls,
-                           int            *child_tidptr,
-                           int            (*fn)(void *),
-                           void          *arg);
-
+extern pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg);
 extern void __exit(int status);
 
 /* this function is called from the __bionic_clone
diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp
index f88a26d..3e092ae 100644
--- a/libc/bionic/libc_init_common.cpp
+++ b/libc/bionic/libc_init_common.cpp
@@ -49,6 +49,7 @@
 extern "C" abort_msg_t** __abort_message_ptr;
 extern "C" uintptr_t __get_sp(void);
 extern "C" int __system_properties_init(void);
+extern "C" int __set_tls(void* ptr);
 
 // Not public, but well-known in the BSDs.
 const char* __progname;
@@ -96,7 +97,10 @@
   pthread_attr_setstack(&thread.attr, (void*) stack_bottom, stack_size);
   _init_thread(&thread, false);
   __init_tls(&thread);
+  __set_tls(thread.tls);
   tls[TLS_SLOT_BIONIC_PREINIT] = &args;
+
+  __init_alternate_signal_stack(&thread);
 }
 
 void __libc_init_common(KernelArgumentBlock& args) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 2153310..386e8d1 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -40,7 +40,7 @@
 #include "private/ErrnoRestorer.h"
 #include "private/ScopedPthreadMutexLocker.h"
 
-extern "C" int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
+extern "C" pid_t __bionic_clone(uint32_t flags, void* child_stack, int* parent_tid, void* tls, int* child_tid, int (*fn)(void*), void* arg);
 
 #ifdef __i386__
 #define ATTRIBUTES __attribute__((noinline)) __attribute__((fastcall))
@@ -50,15 +50,14 @@
 
 extern "C" void ATTRIBUTES _thread_created_hook(pid_t thread_id);
 
-extern "C" int __set_tls(void* ptr);
-
 static pthread_mutex_t gPthreadStackCreationLock = PTHREAD_MUTEX_INITIALIZER;
 
 static pthread_mutex_t gDebuggerNotificationLock = PTHREAD_MUTEX_INITIALIZER;
 
+// This code is used both by each new pthread and the code that initializes the main thread.
 void  __init_tls(pthread_internal_t* thread) {
-  // Zero-initialize all the slots.
-  for (size_t i = 0; i < BIONIC_TLS_SLOTS; ++i) {
+  // Zero-initialize all the slots after TLS_SLOT_SELF and TLS_SLOT_THREAD_ID.
+  for (size_t i = TLS_SLOT_ERRNO; i < BIONIC_TLS_SLOTS; ++i) {
     thread->tls[i] = NULL;
   }
 
@@ -67,11 +66,10 @@
   thread->tls[TLS_SLOT_THREAD_ID] = thread;
   // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
   thread->tls[TLS_SLOT_STACK_GUARD] = (void*) __stack_chk_guard;
+}
 
-  __set_tls(thread->tls);
-
+void __init_alternate_signal_stack(pthread_internal_t* thread) {
   // Create and set an alternate signal stack.
-  // This must happen after __set_tls, in case a system call fails and tries to set errno.
   stack_t ss;
   ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
   if (ss.ss_sp != MAP_FAILED) {
@@ -181,24 +179,26 @@
   // The child stack is the same address, just growing in the opposite direction.
   // At offsets >= 0, we have the TLS slots.
   // At offsets < 0, we have the child stack.
-  void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
-  void* child_stack = tls;
+  thread->tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
+  void* child_stack = thread->tls;
 
-  // Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
+  // Create a mutex for the thread in TLS to wait on once it starts so we can keep
   // it from doing anything until after we notify the debugger about it
   //
   // This also provides the memory barrier we need to ensure that all
   // memory accesses previously performed by this thread are visible to
   // the new thread.
-  pthread_mutex_t* start_mutex = (pthread_mutex_t*) &tls[TLS_SLOT_SELF];
+  pthread_mutex_t* start_mutex = (pthread_mutex_t*) &thread->tls[TLS_SLOT_START_MUTEX];
   pthread_mutex_init(start_mutex, NULL);
   ScopedPthreadMutexLocker start_locker(start_mutex);
 
-  tls[TLS_SLOT_THREAD_ID] = thread;
+  thread->tls[TLS_SLOT_THREAD_ID] = thread;
 
-  int flags = CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;
+  thread->start_routine = start_routine;
+  thread->start_routine_arg = arg;
 
-  int tid = __pthread_clone(start_routine, child_stack, flags, arg);
+  int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS;
+  int tid = __bionic_clone(flags, child_stack, NULL, thread->tls, NULL, __thread_entry, thread);
   if (tid < 0) {
     int clone_errno = errno;
     if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) {
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 8cca83a..52cfbce 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -42,6 +42,9 @@
     __pthread_cleanup_t*        cleanup_stack;
     void**                      tls;         /* thread-local storage area */
 
+    void* (*start_routine)(void*);
+    void* start_routine_arg;
+
     void* alternate_signal_stack;
 
     /*
@@ -52,8 +55,12 @@
     char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
 };
 
+extern "C" {
+  __LIBC_HIDDEN__ int __thread_entry(void* arg); // Called from assembler.
+}
 __LIBC_HIDDEN__ int _init_thread(pthread_internal_t* thread, bool add_to_thread_list);
 __LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
+__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
 __LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread);
 __LIBC_HIDDEN__ pthread_internal_t* __get_thread(void);