Fix TLS access for ARMv6 and beyond.

For performance reasons, we don't call the kernel helper. Instead, we directly
access the TLS register on ARMv6 and higher. For ARMv5TE, keep using the hard-coded
address populated by the kernel on each task switch.

NOTE: Since we don't call the kernel helper, this must precisely match your
      kernel configuration. This is controlled by setting the ARCH_ARM_HAVE_TLS_REGISTER
      variable to 'true' in your board configuration file.
diff --git a/libc/Android.mk b/libc/Android.mk
index 59a4c6b..12d5c92 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -366,6 +366,16 @@
 ifeq ($(TARGET_ARCH),arm)
   libc_common_cflags += -fstrict-aliasing
   libc_crt_target_cflags := -mthumb-interwork
+  #
+  # Define HAVE_ARM_TLS_REGISTER macro to indicate to the C library
+  # that it should access the hardware TLS register directly in
+  # private/bionic_tls.h
+  #
+  # The value must match your kernel configuration
+  #
+  ifeq ($(ARCH_ARM_HAVE_TLS_REGISTER),true)
+    libc_common_cflags += -DHAVE_ARM_TLS_REGISTER
+  endif
 else # !arm
   ifeq ($(TARGET_ARCH),x86)
     libc_crt_target_cflags := -m32
diff --git a/libc/arch-arm/include/machine/cpu-features.h b/libc/arch-arm/include/machine/cpu-features.h
index f836006..925067e 100644
--- a/libc/arch-arm/include/machine/cpu-features.h
+++ b/libc/arch-arm/include/machine/cpu-features.h
@@ -149,7 +149,6 @@
 #  define __ARM_HAVE_PC_INTERWORK
 #endif
 
-
 /* Assembly-only macros */
 
 /* define a handy PLD(address) macro since the cache preload
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 82c8cd9..2e7a82b 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -88,7 +88,18 @@
 
 /* get the TLS */
 #ifdef __arm__
-#  define __get_tls() ( *((volatile void **) 0xffff0ff0) )
+/* For performance reasons, avoid calling the kernel helper
+ * Note that HAVE_ARM_TLS_REGISTER is build-specific
+ * (it must match your kernel configuration)
+ */
+#  ifdef HAVE_ARM_TLS_REGISTER
+#    define __get_tls() \
+    ({ register unsigned int __val asm("r0"); \
+       asm ("mrc p15, 0, r0, c13, c0, 3" : "=r"(__val) ); \
+       (volatile void*)__val; })
+#  else /* !HAVE_ARM_TLS_REGISTER */
+#    define __get_tls() ( *((volatile void **) 0xffff0ff0) )
+#  endif
 #else
 extern void*  __get_tls( void );
 #endif