improve GLES jumptables

in the common case this saves one instructions per jump
(which will help with the i-cache).

this change also gets rid of the "use slow tls" option,
which was useless. So at least now architectures that don't have
assembly bindings will perform much better.

Change-Id: I31be6c06ad2136b50ef3a1ac14682d7812ad40d2
diff --git a/opengl/libs/GLES2/gl2.cpp b/opengl/libs/GLES2/gl2.cpp
index fad2176..3134e56 100644
--- a/opengl/libs/GLES2/gl2.cpp
+++ b/opengl/libs/GLES2/gl2.cpp
@@ -40,13 +40,11 @@
 #undef CALL_GL_API
 #undef CALL_GL_API_RETURN
 
-#if USE_FAST_TLS_KEY
-
-  #if defined(__arm__)
+#if defined(__arm__) && !USE_SLOW_BINDING
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
-    #define API_ENTRY(_api) __attribute__((naked)) _api
+    #define API_ENTRY(_api) __attribute__((noinline)) _api
 
     #define CALL_GL_API(_api, ...)                              \
          asm volatile(                                          \
@@ -54,15 +52,13 @@
             "ldr   r12, [r12, %[tls]] \n"                       \
             "cmp   r12, #0            \n"                       \
             "ldrne pc,  [r12, %[api]] \n"                       \
-            "mov   r0, #0             \n"                       \
-            "bx    lr                 \n"                       \
             :                                                   \
             : [tls] "J"(TLS_SLOT_OPENGL_API*4),                 \
               [api] "J"(__builtin_offsetof(gl_hooks_t, gl._api))    \
             :                                                   \
             );
 
-  #elif defined(__mips__)
+#elif defined(__mips__) && !USE_SLOW_BINDING
 
     #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -94,30 +90,21 @@
             :                                                    \
             );
 
-  #else
-
-    #error Unsupported architecture
-
-  #endif
-
-    #define CALL_GL_API_RETURN(_api, ...) \
-        CALL_GL_API(_api, __VA_ARGS__) \
-        return 0; // placate gcc's warnings. never reached.
-
 #else
 
     #define API_ENTRY(_api) _api
 
     #define CALL_GL_API(_api, ...)                                       \
         gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
-        _c->_api(__VA_ARGS__);
-
-    #define CALL_GL_API_RETURN(_api, ...)                                \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
-        return _c->_api(__VA_ARGS__)
+        if (_c) return _c->_api(__VA_ARGS__);
 
 #endif
 
+#define CALL_GL_API_RETURN(_api, ...) \
+    CALL_GL_API(_api, __VA_ARGS__) \
+    return 0;
+
+
 
 extern "C" {
 #include "gl3_api.in"
@@ -139,7 +126,8 @@
 {
     const GLubyte * ret = egl_get_string_for_current_context(name);
     if (ret == NULL) {
-        ret = __glGetString(name);
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
+        ret = _c->glGetString(name);
     }
     return ret;
 }