Fix __errno for LP64 and clean up __get_tls.

If __get_tls has the right type, a lot of confusing casting can disappear.

It was probably a mistake that __get_tls was exposed as a function for mips
and x86 (but not arm), so let's (a) ensure that the __get_tls function
always matches the macro, (b) that we have the function for arm too, and
(c) that we don't have the function for any 64-bit architecture.

Change-Id: Ie9cb989b66e2006524ad7733eb6e1a65055463be
diff --git a/libc/bionic/pthread_key.cpp b/libc/bionic/pthread_key.cpp
index 7e8b4cd..706758b 100644
--- a/libc/bionic/pthread_key.cpp
+++ b/libc/bionic/pthread_key.cpp
@@ -133,7 +133,7 @@
   // from this thread's TLS area. This must call the destructor of all keys
   // that have a non-NULL data value and a non-NULL destructor.
   void CleanAll() {
-    void** tls = (void**)__get_tls();
+    void** tls = __get_tls();
 
     // Because destructors can do funky things like deleting/creating other
     // keys, we need to implement this in a loop.
@@ -239,8 +239,7 @@
   // to check that the key is properly allocated. If the key was not
   // allocated, the value read from the TLS should always be NULL
   // due to pthread_key_delete() clearing the values for all threads.
-  uintptr_t address = reinterpret_cast<volatile uintptr_t*>(__get_tls())[key];
-  return reinterpret_cast<void*>(address);
+  return __get_tls()[key];
 }
 
 int pthread_setspecific(pthread_key_t key, const void* ptr) {
@@ -250,6 +249,6 @@
     return EINVAL;
   }
 
-  reinterpret_cast<volatile uintptr_t*>(__get_tls())[key] = reinterpret_cast<uintptr_t>(ptr);
+  __get_tls()[key] = const_cast<void*>(ptr);
   return 0;
 }