Clean up the pthread-only atomic stuff a little.

It looks like we can probably just use the generic GCC stuff instead;
the generated code looks pretty similar. We should come back to that.

These routines are only used by the pthread implementation, and
__bionic_atomic_inc isn't used, so we can remove it.

Change-Id: I8b5b8cb30a1b159f0e85c3675aee06ddef39b429
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
index e94129c..3957d46 100644
--- a/libc/private/bionic_atomic_arm.h
+++ b/libc/private/bionic_atomic_arm.h
@@ -16,172 +16,64 @@
 #ifndef BIONIC_ATOMIC_ARM_H
 #define BIONIC_ATOMIC_ARM_H
 
-#include <machine/cpu-features.h>
-
-/* Some of the harware instructions used below are not available in Thumb-1
- * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
- * problem, we're going to use the same technique than libatomics_ops,
- * which is to temporarily switch to ARM, do the operation, then switch
- * back to Thumb-1.
- *
- * This results in two 'bx' jumps, just like a normal function call, but
- * everything is kept inlined, avoids loading or computing the function's
- * address, and prevents a little I-cache trashing too.
- *
- * However, it is highly recommended to avoid compiling any C library source
- * file that use these functions in Thumb-1 mode.
- *
- * Define three helper macros to implement this:
- */
-#if defined(__thumb__) && !defined(__thumb2__)
-#  define  __ATOMIC_SWITCH_TO_ARM \
-            "adr r3, 5f\n" \
-            "bx  r3\n" \
-            ".align\n" \
-            ".arm\n" \
-        "5:\n"
-/* note: the leading \n below is intentional */
-#  define __ATOMIC_SWITCH_TO_THUMB \
-            "\n" \
-            "adr r3, 6f\n" \
-            "bx  r3\n" \
-            ".thumb" \
-        "6:\n"
-
-#  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */
-
-/* Warn the user that ARM mode should really be preferred! */
-#  warning Rebuilding this source file in ARM mode is highly recommended for performance!!
-
-#else
-#  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
-#  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
-#  define  __ATOMIC_CLOBBERS        /* nothing */
-#endif
-
-
-/* Define a full memory barrier, this is only needed if we build the
- * platform for a multi-core device. For the record, using a 'dmb'
- * instruction on a Nexus One device can take up to 180 ns even if
- * it is completely un-necessary on this device.
- *
- * NOTE: This is where the platform and NDK headers atomic headers are
- *        going to diverge. With the NDK, we don't know if the generated
- *        code is going to run on a single or multi-core device, so we
- *        need to be cautious.
- *
- *        I.e. on single-core devices, the helper immediately returns,
- *        on multi-core devices, it uses "dmb" or any other means to
- *        perform a full-memory barrier.
- *
- * There are three cases to consider for the platform:
- *
- *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
- *    - multi-core ARMv6         => use the coprocessor
- *    - single core ARMv6+       => do not use any hardware barrier
- */
+__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
 #if defined(ANDROID_SMP) && ANDROID_SMP == 1
-
-/* For ARMv7-A, we can use the 'dmb' instruction directly */
-__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
-    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
-     * bother with __ATOMIC_SWITCH_TO_ARM */
-    __asm__ __volatile__ ( "dmb" : : : "memory" );
+  __asm__ __volatile__ ( "dmb" : : : "memory" );
+#else
+  /* A simple compiler barrier. */
+  __asm__ __volatile__ ( "" : : : "memory" );
+#endif
 }
 
-#else /* !ANDROID_SMP */
-
-__ATOMIC_INLINE__ void __bionic_memory_barrier(void) {
-    /* A simple compiler barrier */
-    __asm__ __volatile__ ( "" : : : "memory" );
-}
-
-#endif /* !ANDROID_SMP */
-
 /* Compare-and-swap, without any explicit barriers. Note that this functions
  * returns 0 on success, and 1 on failure. The opposite convention is typically
  * used on other platforms.
  */
-__ATOMIC_INLINE__ int
-__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
-{
-    int32_t prev, status;
-    do {
-        __asm__ __volatile__ (
-            __ATOMIC_SWITCH_TO_ARM
-            "ldrex %0, [%3]\n"
-            "mov %1, #0\n"
-            "teq %0, %4\n"
+__ATOMIC_INLINE__ int __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) {
+  int32_t prev, status;
+  do {
+    __asm__ __volatile__ (
+          "ldrex %0, [%3]\n"
+          "mov %1, #0\n"
+          "teq %0, %4\n"
 #ifdef __thumb2__
-            "it eq\n"
+          "it eq\n"
 #endif
-            "strexeq %1, %5, [%3]"
-            __ATOMIC_SWITCH_TO_THUMB
-            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
-            : "r" (ptr), "Ir" (old_value), "r" (new_value)
-            : __ATOMIC_CLOBBERS "cc");
-    } while (__builtin_expect(status != 0, 0));
-    return prev != old_value;
+          "strexeq %1, %5, [%3]"
+          : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+          : "r" (ptr), "Ir" (old_value), "r" (new_value)
+          : "cc");
+  } while (__builtin_expect(status != 0, 0));
+  return prev != old_value;
 }
 
-/* Swap operation, without any explicit barriers. */
-__ATOMIC_INLINE__ int32_t
-__bionic_swap(int32_t new_value, volatile int32_t* ptr)
-{
-    int32_t prev, status;
-    do {
-        __asm__ __volatile__ (
-            __ATOMIC_SWITCH_TO_ARM
-            "ldrex %0, [%3]\n"
-            "strex %1, %4, [%3]"
-            __ATOMIC_SWITCH_TO_THUMB
-            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
-            : "r" (ptr), "r" (new_value)
-            : __ATOMIC_CLOBBERS "cc");
-    } while (__builtin_expect(status != 0, 0));
-    return prev;
+/* Swap, without any explicit barriers. */
+__ATOMIC_INLINE__ int32_t __bionic_swap(int32_t new_value, volatile int32_t* ptr) {
+  int32_t prev, status;
+  do {
+    __asm__ __volatile__ (
+          "ldrex %0, [%3]\n"
+          "strex %1, %4, [%3]"
+          : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+          : "r" (ptr), "r" (new_value)
+          : "cc");
+  } while (__builtin_expect(status != 0, 0));
+  return prev;
 }
 
-/* Atomic increment - without any barriers
- * This returns the old value
- */
-__ATOMIC_INLINE__ int32_t
-__bionic_atomic_inc(volatile int32_t* ptr)
-{
-    int32_t prev, tmp, status;
-    do {
-        __asm__ __volatile__ (
-            __ATOMIC_SWITCH_TO_ARM
-            "ldrex %0, [%4]\n"
-            "add %1, %0, #1\n"
-            "strex %2, %1, [%4]"
-            __ATOMIC_SWITCH_TO_THUMB
-            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
-            : "r" (ptr)
-            : __ATOMIC_CLOBBERS "cc");
-    } while (__builtin_expect(status != 0, 0));
-    return prev;
-}
-
-/* Atomic decrement - without any barriers
- * This returns the old value.
- */
-__ATOMIC_INLINE__ int32_t
-__bionic_atomic_dec(volatile int32_t* ptr)
-{
-    int32_t prev, tmp, status;
-    do {
-        __asm__ __volatile__ (
-            __ATOMIC_SWITCH_TO_ARM
-            "ldrex %0, [%4]\n"
-            "sub %1, %0, #1\n"
-            "strex %2, %1, [%4]"
-            __ATOMIC_SWITCH_TO_THUMB
-            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
-            : "r" (ptr)
-            : __ATOMIC_CLOBBERS "cc");
-    } while (__builtin_expect(status != 0, 0));
-    return prev;
+/* Atomic decrement, without explicit barriers. */
+__ATOMIC_INLINE__ int32_t __bionic_atomic_dec(volatile int32_t* ptr) {
+  int32_t prev, tmp, status;
+  do {
+    __asm__ __volatile__ (
+          "ldrex %0, [%4]\n"
+          "sub %1, %0, #1\n"
+          "strex %2, %1, [%4]"
+          : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+          : "r" (ptr)
+          : "cc");
+  } while (__builtin_expect(status != 0, 0));
+  return prev;
 }
 
 #endif /* SYS_ATOMICS_ARM_H */