Fix pthread_join.

Let the kernel keep pthread_internal_t::tid updated, including
across forks and for the main thread. This then lets us fix
pthread_join to only return after the thread has really exited.

Also fix the thread attributes of the main thread so we don't
unmap the main thread's stack (which is really owned by the
dynamic linker and contains things like environment variables),
which fixes crashes when joining with an exited main thread
and also fixes problems reported publicly with accessing environment
variables after the main thread exits (for which I've added a new
unit test).

In passing I also fixed a bug where if the clone(2) inside
pthread_create(3) fails, we'd unmap the child's stack and TLS (which
contains the mutex) and then try to unlock the mutex. Boom! It wasn't
until after I'd uploaded the fix for this that I came across a new
public bug reporting this exact failure.

Bug: 8206355
Bug: 11693195
Bug: https://code.google.com/p/android/issues/detail?id=57421
Bug: https://code.google.com/p/android/issues/detail?id=62392
Change-Id: I2af9cf6e8ae510a67256ad93cad891794ed0580b
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index d8ad544..de1ef26 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -31,28 +31,31 @@
 #include <pthread.h>
 
 struct pthread_internal_t {
-    struct pthread_internal_t*  next;
-    struct pthread_internal_t*  prev;
-    pthread_attr_t              attr;
-    pid_t                       tid;
-    bool                        allocated_on_heap;
-    pthread_cond_t              join_cond;
-    void*                       return_value;
-    int                         internal_flags;
-    __pthread_cleanup_t*        cleanup_stack;
-    void**                      tls;         /* thread-local storage area */
+  struct pthread_internal_t* next;
+  struct pthread_internal_t* prev;
 
-    void* (*start_routine)(void*);
-    void* start_routine_arg;
+  pid_t tid;
 
-    void* alternate_signal_stack;
+  void** tls;
 
-    /*
-     * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
-     * per-thread buffer by simply using malloc(3) and free(3).
-     */
+  pthread_attr_t attr;
+  bool allocated_on_heap; /* TODO: move this into attr.flags? */
+  int internal_flags; /* TODO: move this into attr.flags? */
+
+  __pthread_cleanup_t* cleanup_stack;
+
+  void* (*start_routine)(void*);
+  void* start_routine_arg;
+  void* return_value;
+
+  void* alternate_signal_stack;
+
+  /*
+   * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
+   * per-thread buffer by simply using malloc(3) and free(3).
+   */
 #define __BIONIC_DLERROR_BUFFER_SIZE 512
-    char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
+  char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
 };
 
 __LIBC_HIDDEN__ int _init_thread(pthread_internal_t* thread, bool add_to_thread_list);
@@ -73,9 +76,6 @@
 /* Has the thread been joined by another thread? */
 #define PTHREAD_ATTR_FLAG_JOINED 0x00000004
 
-/* Has the thread already exited but not been joined? */
-#define PTHREAD_ATTR_FLAG_ZOMBIE 0x00000008
-
 #define PTHREAD_INTERNAL_FLAG_THREAD_INIT_FAILED 1
 
 /*