Only wipe TLS for user-supplied stacks.

Bug: 16667988
Change-Id: Id180ab2bc6713e1612386120a306db5bbf1d6046
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 174e308..fc8afa2 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -51,9 +51,9 @@
 
 // This code is used both by each new pthread and the code that initializes the main thread.
 void __init_tls(pthread_internal_t* thread) {
-  // Zero-initialize all the slots after TLS_SLOT_SELF and TLS_SLOT_THREAD_ID.
-  for (size_t i = TLS_SLOT_ERRNO; i < BIONIC_TLS_SLOTS; ++i) {
-    thread->tls[i] = NULL;
+  if (thread->user_allocated_stack()) {
+    // We don't know where the user got their stack, so assume the worst and zero the TLS area.
+    memset(&thread->tls[0], 0, BIONIC_TLS_SLOTS * sizeof(void*));
   }
 
   // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
@@ -66,7 +66,7 @@
 void __init_alternate_signal_stack(pthread_internal_t* thread) {
   // Create and set an alternate signal stack.
   stack_t ss;
-  ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+  ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   if (ss.ss_sp != MAP_FAILED) {
     ss.ss_size = SIGSTKSZ;
     ss.ss_flags = 0;
@@ -227,7 +227,7 @@
     // be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
     // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
     pthread_mutex_unlock(&thread->startup_handshake_mutex);
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) == 0) {
+    if (!thread->user_allocated_stack()) {
       munmap(thread->attr.stack_base, thread->attr.stack_size);
     }
     free(thread);
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 2470c9c..a6bb363 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -90,7 +90,7 @@
   // Keep track of what we need to know about the stack before we lose the pthread_internal_t.
   void* stack_base = thread->attr.stack_base;
   size_t stack_size = thread->attr.stack_size;
-  bool user_allocated_stack = ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0);
+  bool user_allocated_stack = thread->user_allocated_stack();
 
   pthread_mutex_lock(&g_thread_list_lock);
   if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index e05d15c..7bcd758 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -30,6 +30,18 @@
 
 #include <pthread.h>
 
+/* Has the thread been detached by a pthread_join or pthread_detach call? */
+#define PTHREAD_ATTR_FLAG_DETACHED 0x00000001
+
+/* Was the thread's stack allocated by the user rather than by us? */
+#define PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK 0x00000002
+
+/* Has the thread been joined by another thread? */
+#define PTHREAD_ATTR_FLAG_JOINED 0x00000004
+
+/* Is this the main thread? */
+#define PTHREAD_ATTR_FLAG_MAIN_THREAD 0x80000000
+
 struct pthread_internal_t {
   struct pthread_internal_t* next;
   struct pthread_internal_t* prev;
@@ -56,6 +68,10 @@
     return (*cached_pid != 0);
   }
 
+  bool user_allocated_stack() {
+    return (attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0;
+  }
+
   void** tls;
 
   pthread_attr_t attr;
@@ -87,20 +103,8 @@
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
 __LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread);
 
-/* Has the thread been detached by a pthread_join or pthread_detach call? */
-#define PTHREAD_ATTR_FLAG_DETACHED 0x00000001
-
-/* Was the thread's stack allocated by the user rather than by us? */
-#define PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK 0x00000002
-
-/* Has the thread been joined by another thread? */
-#define PTHREAD_ATTR_FLAG_JOINED 0x00000004
-
-/* Is this the main thread? */
-#define PTHREAD_ATTR_FLAG_MAIN_THREAD 0x80000000
-
 /*
- * Traditionally we give threads a 1MiB stack. When we started
+ * Traditionally we gave threads a 1MiB stack. When we started
  * allocating per-thread alternate signal stacks to ease debugging of
  * stack overflows, we subtracted the same amount we were using there
  * from the default thread stack size. This should keep memory usage