Use the TLS register if HAVE_ARM_TLS_REGISTER is defined

this will be needed for SMP, but would improve dispatching GL calls a bit
on armv6 and above.
diff --git a/opengl/libs/GLES2/gl2.cpp b/opengl/libs/GLES2/gl2.cpp
index 4c0ba88..b8e3283 100644
--- a/opengl/libs/GLES2/gl2.cpp
+++ b/opengl/libs/GLES2/gl2.cpp
@@ -41,12 +41,20 @@
 
 #if USE_FAST_TLS_KEY
 
+    #ifdef HAVE_ARM_TLS_REGISTER
+        #define GET_TLS(reg) \
+            "mrc p15, 0, " #reg ", c13, c0, 3 \n"
+    #else
+        #define GET_TLS(reg) \
+            "mov   " #reg ", #0xFFFF0FFF      \n"  \
+            "ldr   " #reg ", [" #reg ", #-15] \n"
+    #endif
+
     #define API_ENTRY(_api) __attribute__((naked)) _api
 
     #define CALL_GL_API(_api, ...)                              \
          asm volatile(                                          \
-            "mov   r12, #0xFFFF0FFF   \n"                       \
-            "ldr   r12, [r12, #-15]   \n"                       \
+            GET_TLS(r12)                                        \
             "ldr   r12, [r12, %[tls]] \n"                       \
             "cmp   r12, #0            \n"                       \
             "ldrne pc,  [r12, %[api]] \n"                       \
@@ -56,7 +64,7 @@
               [api] "J"(__builtin_offsetof(gl_hooks_t, gl._api))    \
             :                                                   \
             );
-    
+
     #define CALL_GL_API_RETURN(_api, ...) \
         CALL_GL_API(_api, __VA_ARGS__) \
         return 0; // placate gcc's warnings. never reached.