Refactor pthread_key.cpp to be lock-free.

Change-Id: I20dfb9d3cdc40eed10ea12ac34f03caaa94f7a49
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index c6d8494..2bca43f 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -56,7 +56,8 @@
   if (thread->mmap_size == 0) {
     // If the TLS area was not allocated by mmap(), it may not have been cleared to zero.
     // So assume the worst and zero the TLS area.
-    memset(&thread->tls[0], 0, BIONIC_TLS_SLOTS * sizeof(void*));
+    memset(thread->tls, 0, sizeof(thread->tls));
+    memset(thread->key_data, 0, sizeof(thread->key_data));
   }
 
   // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
@@ -155,7 +156,7 @@
   }
 
   // Mapped space(or user allocated stack) is used for:
-  //   thread_internal_t (including tls array)
+  //   thread_internal_t
   //   thread stack (including guard page)
   stack_top -= sizeof(pthread_internal_t);
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);