Make dlerror(3) thread-safe.

I gave up trying to use the usual thread-local buffer idiom; calls to
calloc(3) and free(3) from any of the "dl" functions -- which live in
the dynamic linker -- end up resolving to the dynamic linker's stubs.
I tried to work around that, but was just making things more complicated.
This alternative costs us a well-known TLS slot (instead of the
dynamically-allocated TLS slot we'd have used otherwise, so no difference
there), plus an extra buffer inside every pthread_internal_t.

Bug: 5404023
Change-Id: Ie9614edd05b6d1eeaf7bf9172792d616c6361767
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index da3a551..719bc83 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -163,20 +163,20 @@
 }
 
 
-void  __init_tls(void**  tls, void*  thread)
-{
-    int  nn;
+void  __init_tls(void** tls, void* thread) {
+  ((pthread_internal_t*) thread)->tls = tls;
 
-    ((pthread_internal_t*)thread)->tls = tls;
+  // Zero-initialize all the slots.
+  for (size_t i = 0; i < BIONIC_TLS_SLOTS; ++i) {
+    tls[i] = NULL;
+  }
 
-    // slot 0 must point to the tls area, this is required by the implementation
-    // of the x86 Linux kernel thread-local-storage
-    tls[TLS_SLOT_SELF]      = (void*)tls;
-    tls[TLS_SLOT_THREAD_ID] = thread;
-    for (nn = TLS_SLOT_ERRNO; nn < BIONIC_TLS_SLOTS; nn++)
-       tls[nn] = 0;
+  // Slot 0 must point to the tls area, this is required by the implementation
+  // of the x86 Linux kernel thread-local-storage.
+  tls[TLS_SLOT_SELF]      = (void*) tls;
+  tls[TLS_SLOT_THREAD_ID] = thread;
 
-    __set_tls( (void*)tls );
+  __set_tls((void*) tls);
 }
 
 
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 58a809a..4bc81ef 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -45,6 +45,13 @@
     int                         internal_flags;
     __pthread_cleanup_t*        cleanup_stack;
     void**                      tls;         /* thread-local storage area */
+
+    /*
+     * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
+     * per-thread buffer by simply using malloc(3) and free(3).
+     */
+#define __BIONIC_DLERROR_BUFFER_SIZE 512
+    char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
 } pthread_internal_t;
 
 int _init_thread(pthread_internal_t* thread, pid_t kernel_id, pthread_attr_t* attr,
diff --git a/libc/bionic/strerror.cpp b/libc/bionic/strerror.cpp
index 455dc52..a50c99f 100644
--- a/libc/bionic/strerror.cpp
+++ b/libc/bionic/strerror.cpp
@@ -41,6 +41,6 @@
   }
 
   LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, strerror, NL_TEXTMAX);
-  strerror_r(error_number, strerror_buffer, strerror_buffer_size);
-  return strerror_buffer;
+  strerror_r(error_number, strerror_tls_buffer, strerror_tls_buffer_size);
+  return strerror_tls_buffer;
 }
diff --git a/libc/bionic/strsignal.cpp b/libc/bionic/strsignal.cpp
index 9b046d4..c549e74 100644
--- a/libc/bionic/strsignal.cpp
+++ b/libc/bionic/strsignal.cpp
@@ -42,5 +42,5 @@
   }
 
   LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, strsignal, NL_TEXTMAX);
-  return const_cast<char*>(__strsignal(signal_number, strsignal_buffer, strsignal_buffer_size));
+  return const_cast<char*>(__strsignal(signal_number, strsignal_tls_buffer, strsignal_tls_buffer_size));
 }
diff --git a/libc/bionic/ThreadLocalBuffer.h b/libc/private/ThreadLocalBuffer.h
similarity index 68%
rename from libc/bionic/ThreadLocalBuffer.h
rename to libc/private/ThreadLocalBuffer.h
index 99acdba..1c5e3f4 100644
--- a/libc/bionic/ThreadLocalBuffer.h
+++ b/libc/private/ThreadLocalBuffer.h
@@ -37,23 +37,24 @@
 // TODO: move __cxa_guard_acquire and __cxa_guard_release into libc.
 
 #define GLOBAL_INIT_THREAD_LOCAL_BUFFER(name) \
-  static pthread_once_t name ## _once; \
-  static pthread_key_t name ## _key; \
-  static void name ## _key_destroy(void* buffer) { \
+  static pthread_once_t __bionic_tls_ ## name ## _once; \
+  static pthread_key_t __bionic_tls_ ## name ## _key; \
+  static void __bionic_tls_ ## name ## _key_destroy(void* buffer) { \
     free(buffer); \
   } \
-  static void name ## _key_init() { \
-    pthread_key_create(&name ## _key, name ## _key_destroy); \
+  static void __bionic_tls_ ## name ## _key_init() { \
+    pthread_key_create(&__bionic_tls_ ## name ## _key, __bionic_tls_ ## name ## _key_destroy); \
   }
 
-// Leaves "name_buffer" and "name_byte_count" defined and initialized.
+// Leaves "name_tls_buffer" and "name_tls_buffer_size" defined and initialized.
 #define LOCAL_INIT_THREAD_LOCAL_BUFFER(type, name, byte_count) \
-  pthread_once(&name ## _once, name ## _key_init); \
-  type name ## _buffer = reinterpret_cast<type>(pthread_getspecific(name ## _key)); \
-  if (name ## _buffer == NULL) { \
-    name ## _buffer = reinterpret_cast<type>(malloc(byte_count)); \
-    pthread_setspecific(name ## _key, name ## _buffer); \
+  pthread_once(&__bionic_tls_ ## name ## _once, __bionic_tls_ ## name ## _key_init); \
+  type name ## _tls_buffer = \
+      reinterpret_cast<type>(pthread_getspecific(__bionic_tls_ ## name ## _key)); \
+  if (name ## _tls_buffer == NULL) { \
+    name ## _tls_buffer = reinterpret_cast<type>(calloc(1, byte_count)); \
+    pthread_setspecific(__bionic_tls_ ## name ## _key, name ## _tls_buffer); \
   } \
-  const size_t name ## _buffer_size = byte_count
+  const size_t name ## _tls_buffer_size __attribute__((unused)) = byte_count
 
 #endif // _BIONIC_THREAD_LOCAL_BUFFER_H_included
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 4658866..a626d21 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -52,7 +52,7 @@
  * thread-specific segment descriptor...
  */
 
-/* Well known TLS slots */
+/* Well-known TLS slots. */
 #define TLS_SLOT_SELF               0
 #define TLS_SLOT_THREAD_ID          1
 #define TLS_SLOT_ERRNO              2
@@ -60,23 +60,16 @@
 #define TLS_SLOT_OPENGL_API         3
 #define TLS_SLOT_OPENGL             4
 
-/* this slot is only used to pass information from the dynamic linker to
+#define TLS_SLOT_DLERROR            5
+
+#define TLS_SLOT_MAX_WELL_KNOWN     TLS_SLOT_DLERROR
+
+/* This slot is only used to pass information from the dynamic linker to
  * libc.so when the C library is loaded in to memory. The C runtime init
  * function will then clear it. Since its use is extremely temporary,
  * we reuse an existing location.
  */
-#define  TLS_SLOT_BIONIC_PREINIT    (TLS_SLOT_ERRNO+1)
-
-/* small technical note: it is not possible to call pthread_setspecific
- * on keys that are <= TLS_SLOT_MAX_WELL_KNOWN, which is why it is set to
- * TLS_SLOT_ERRNO.
- *
- * later slots like TLS_SLOT_OPENGL are pre-allocated through the use of
- * TLS_DEFAULT_ALLOC_MAP. this means that there is no need to use
- * pthread_key_create() to initialize them. on the other hand, there is
- * no destructor associated to them (we might need to implement this later)
- */
-#define TLS_SLOT_MAX_WELL_KNOWN     TLS_SLOT_ERRNO
+#define  TLS_SLOT_BIONIC_PREINIT    TLS_SLOT_OPENGL_API
 
 #define TLS_DEFAULT_ALLOC_MAP       0x0000001F