Define inline atomic operations for x86 and ARM.

This change moves the ARM definitions into GCC extended inline
assembler.  In addition, the same set of x86 definitions are now
shared among all x86 targets.

Change-Id: I6e5aa3a413d0af2acbe5d32994983d35a01fdcb3
diff --git a/include/cutils/atomic-arm.h b/include/cutils/atomic-arm.h
new file mode 100644
index 0000000..0dd629d
--- /dev/null
+++ b/include/cutils/atomic-arm.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_CUTILS_ATOMIC_ARM_H
+#define ANDROID_CUTILS_ATOMIC_ARM_H
+
+#include <stdint.h>
+#include <machine/cpu-features.h>
+
+extern inline void android_compiler_barrier(void)
+{
+    __asm__ __volatile__ ("" : : : "memory");
+}
+
+#if ANDROID_SMP == 0
+extern inline void android_memory_barrier(void)
+{
+  android_compiler_barrier();
+}
+#elif defined(__ARM_HAVE_DMB)
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("dmb" : : : "memory");
+}
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5"
+                          : : "r" (0) : "memory");
+}
+#else
+extern inline void android_memory_barrier(void)
+{
+    typedef void (kuser_memory_barrier)(void);
+    (*(kuser_memory_barrier *)0xffff0fa0)();
+}
+#endif
+
+extern inline int32_t android_atomic_acquire_load(volatile int32_t *ptr)
+{
+    int32_t value = *ptr;
+    android_memory_barrier();
+    return value;
+}
+
+extern inline int32_t android_atomic_release_load(volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    return *ptr;
+}
+
+extern inline void android_atomic_acquire_store(int32_t value,
+                                                volatile int32_t *ptr)
+{
+    *ptr = value;
+    android_memory_barrier();
+}
+
+extern inline void android_atomic_release_store(int32_t value,
+                                                volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    *ptr = value;
+}
+
+#if defined(__thumb__)
+extern int android_atomic_cas(int32_t old_value, int32_t new_value,
+                              volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%3]\n"
+                              "mov %1, #0\n"
+                              "teq %0, %4\n"
+                              "strexeq %1, %5, [%3]"
+                              : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+                              : "r" (ptr), "Ir" (old_value), "r" (new_value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev != old_value;
+}
+#else
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    typedef int (kuser_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+    int32_t prev, status;
+    prev = *ptr;
+    do {
+        status = (*(kuser_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+        if (__builtin_expect(status == 0, 1))
+            return 0;
+        prev = *ptr;
+    } while (prev == old_value);
+    return 1;
+}
+#endif
+
+extern inline int android_atomic_acquire_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    int status = android_atomic_cas(old_value, new_value, ptr);
+    android_memory_barrier();
+    return status;
+}
+
+extern inline int android_atomic_release_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+
+#if defined(__thumb__)
+extern int32_t android_atomic_swap(int32_t new_value,
+                                   volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%3]\n"
+                              "strex %1, %4, [%3]"
+                              : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "r" (new_value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    android_memory_barrier();
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("swp %0, %2, [%3]"
+                          : "=&r" (prev), "+m" (*ptr)
+                          : "r" (new_value), "r" (ptr)
+                          : "cc");
+    android_memory_barrier();
+    return prev;
+}
+#endif
+
+#if defined(__thumb__)
+extern int32_t android_atomic_add(int32_t increment,
+                                  volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, tmp, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%4]\n"
+                              "add %1, %0, %5\n"
+                              "strex %2, %1, [%4]"
+                              : "=&r" (prev), "=&r" (tmp),
+                                "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "Ir" (increment)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev + increment, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+extern inline int32_t android_atomic_inc(volatile int32_t *addr) {
+    return android_atomic_add(1, addr);
+}
+
+extern inline int32_t android_atomic_dec(volatile int32_t *addr) {
+    return android_atomic_add(-1, addr);
+}
+
+#if defined(__thumb__)
+extern int32_t android_atomic_and(int32_t value, volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_and(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, tmp, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%4]\n"
+                              "and %1, %0, %5\n"
+                              "strex %2, %1, [%4]"
+                              : "=&r" (prev), "=&r" (tmp),
+                                "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "Ir" (value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_and(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev & value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#if defined(__thumb__)
+extern int32_t android_atomic_or(int32_t value, volatile int32_t *ptr);
+#elif defined(__ARM_HAVE_LDREX_STREX)
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, tmp, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ ("ldrex %0, [%4]\n"
+                              "orr %1, %0, %5\n"
+                              "strex %2, %1, [%4]"
+                              : "=&r" (prev), "=&r" (tmp),
+                                "=&r" (status), "+m" (*ptr)
+                              : "r" (ptr), "Ir" (value)
+                              : "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev | value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#endif /* ANDROID_CUTILS_ATOMIC_ARM_H */
diff --git a/include/cutils/atomic-inline.h b/include/cutils/atomic-inline.h
index 1c23be9..715e0aa 100644
--- a/include/cutils/atomic-inline.h
+++ b/include/cutils/atomic-inline.h
@@ -39,69 +39,20 @@
 # error "Must define ANDROID_SMP before including atomic-inline.h"
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
- */
-
-#ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
-#else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
-#endif
-
+#if defined(__arm__)
+#include <cutils/atomic-arm.h>
 #elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
+#include <cutils/atomic-x86.h>
+#elif defined(__sh__)
+/* implementation is in atomic-android-sh.c */
 #else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
+#error atomic operations are unsupported
 #endif
 
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
+#if ANDROID_SMP == 0
+#define ANDROID_MEMBAR_FULL android_compiler_barrier
 #else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
+#define ANDROID_MEMBAR_FULL android_memory_barrier
 #endif
 
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // ANDROID_CUTILS_ATOMIC_INLINE_H
+#endif /* ANDROID_CUTILS_ATOMIC_INLINE_H */
diff --git a/include/cutils/atomic-x86.h b/include/cutils/atomic-x86.h
new file mode 100644
index 0000000..06b643f
--- /dev/null
+++ b/include/cutils/atomic-x86.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_CUTILS_ATOMIC_X86_H
+#define ANDROID_CUTILS_ATOMIC_X86_H
+
+#include <stdint.h>
+
+extern inline void android_compiler_barrier(void)
+{
+    __asm__ __volatile__ ("" : : : "memory");
+}
+
+#if ANDROID_SMP == 0
+extern inline void android_memory_barrier(void)
+{
+    android_compiler_barrier();
+}
+#else
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("mfence" : : : "memory");
+}
+#endif
+
+extern inline int32_t android_atomic_acquire_load(volatile int32_t *ptr) {
+    int32_t value = *ptr;
+    android_compiler_barrier();
+    return value;
+}
+
+extern inline int32_t android_atomic_release_load(volatile int32_t *ptr) {
+    android_memory_barrier();
+    return *ptr;
+}
+
+extern inline void android_atomic_acquire_store(int32_t value,
+                                                volatile int32_t *ptr) {
+    *ptr = value;
+    android_memory_barrier();
+}
+
+extern inline void android_atomic_release_store(int32_t value,
+                                                volatile int32_t *ptr) {
+    android_compiler_barrier();
+    *ptr = value;
+}
+
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+                          : "=a" (prev)
+                          : "q" (new_value), "m" (*ptr), "0" (old_value)
+                          : "memory");
+    return prev != old_value;
+}
+
+extern inline int android_atomic_acquire_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    /* Loads are not reordered with other loads. */
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+extern inline int android_atomic_release_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    /* Stores are not reordered with other stores. */
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("xchgl %1, %0"
+                          : "=r" (new_value)
+                          : "m" (*ptr), "0" (new_value)
+                          : "memory");
+    /* new_value now holds the old value of *ptr */
+    return new_value;
+}
+
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+extern inline int32_t android_atomic_inc(volatile int32_t *addr) {
+    return android_atomic_add(1, addr);
+}
+
+extern inline int32_t android_atomic_dec(volatile int32_t *addr) {
+    return android_atomic_add(-1, addr);
+}
+
+extern inline int32_t android_atomic_and(int32_t value,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev & value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        prev = *ptr;
+        status = android_atomic_cas(prev, prev | value, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+
+#endif /* ANDROID_CUTILS_ATOMIC_X86_H */
diff --git a/include/cutils/atomic.h b/include/cutils/atomic.h
index 0200709..3866848 100644
--- a/include/cutils/atomic.h
+++ b/include/cutils/atomic.h
@@ -90,12 +90,11 @@
 void android_atomic_release_store(int32_t value, volatile int32_t* addr);
 
 /*
- * Unconditional swap operation with "acquire" or "release" ordering.
+ * Unconditional swap operation with release ordering.
  *
  * Stores the new value at *addr, and returns the previous value.
  */
-int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr);
-int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr);
+int32_t android_atomic_swap(int32_t value, volatile int32_t* addr);
 
 /*
  * Compare-and-set operation with "acquire" or "release" ordering.
diff --git a/libcutils/Android.mk b/libcutils/Android.mk
index 5b05a1e..778b5bd 100644
--- a/libcutils/Android.mk
+++ b/libcutils/Android.mk
@@ -26,7 +26,7 @@
 commonSources := \
 	array.c \
 	hashmap.c \
-	atomic.c \
+	atomic.c.arm \
 	native_handle.c \
 	buffer.c \
 	socket_inaddr_any_server.c \
@@ -112,7 +112,7 @@
 LOCAL_SRC_FILES := $(commonSources) ashmem-dev.c mq.c
 
 ifeq ($(TARGET_ARCH),arm)
-LOCAL_SRC_FILES += memset32.S atomic-android-arm.S
+LOCAL_SRC_FILES += memset32.S
 else  # !arm
 ifeq ($(TARGET_ARCH),sh)
 LOCAL_SRC_FILES += memory.c atomic-android-sh.c
diff --git a/libcutils/atomic-android-arm.S b/libcutils/atomic-android-arm.S
deleted file mode 100644
index d8ee15c..0000000
--- a/libcutils/atomic-android-arm.S
+++ /dev/null
@@ -1,561 +0,0 @@
-/*
- * Copyright (C) 2005 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <machine/cpu-features.h>
-
-    .text
-    .align
-
-    .global android_atomic_acquire_load
-    .type android_atomic_acquire_load, %function
-    .global android_atomic_release_load
-    .type android_atomic_release_load, %function
-
-    .global android_atomic_acquire_store
-    .type android_atomic_acquire_store, %function
-    .global android_atomic_release_store
-    .type android_atomic_release_store, %function
-
-    .global android_atomic_inc
-    .type android_atomic_inc, %function
-    .global android_atomic_dec
-    .type android_atomic_dec, %function
-
-    .global android_atomic_add
-    .type android_atomic_add, %function
-    .global android_atomic_and
-    .type android_atomic_and, %function
-    .global android_atomic_or
-    .type android_atomic_or, %function
-
-    .global android_atomic_release_swap
-    .type android_atomic_release_swap, %function
-    .global android_atomic_acquire_swap
-    .type android_atomic_acquire_swap, %function
-
-    .global android_atomic_release_cas
-    .type android_atomic_release_cas, %function
-    .global android_atomic_acquire_cas
-    .type android_atomic_acquire_cas, %function
-
-/* must be on or off; cannot be left undefined */
-#if !defined(ANDROID_SMP)
-# error "ANDROID_SMP not defined"
-#endif
-
-
-#if defined(__ARM_HAVE_LDREX_STREX)
-/*
- * ===========================================================================
- *      ARMv6+ implementation
- * ===========================================================================
- *
- * These functions use the LDREX/STREX instructions to perform atomic
- * operations ("LL/SC" approach).  On an SMP build they will include
- * an appropriate memory barrier.
- */
-
-/* generate the memory barrier instruction when the build requires it */
-#if ANDROID_SMP == 1
-# if defined(__ARM_HAVE_DMB)
-#  define SMP_DMB dmb
-# else
-   /* Data Memory Barrier operation, initated by writing a value into a
-      specific register with the Move to Coprocessor instruction.  We
-      arbitrarily use r0 here. */
-#  define SMP_DMB mcr p15, 0, r0, c7, c10, 5
-# endif
-#else
-# define SMP_DMB
-#endif
-
-/*
- * Sidebar: do we need to use the -EX instructions for atomic load/store?
- *
- * Consider the following situation (time advancing downward):
- *
- * P1                  P2
- *  val = LDREX(mem)
- *  val = val + 1
- *                      STR(mem, otherval)
- *  STREX(mem, val)
- *
- * If these instructions issue on separate cores, the STREX will correctly
- * fail because of the intervening store from the other core.  If this same
- * sequence of instructions executes in two threads on the same core, the
- * STREX will incorrectly succeed.
- *
- * There are two ways to fix this:
- * (1) Use LDREX/STREX for the atomic store operations.  This doesn't
- *   prevent the program from doing a non-exclusive store, but at least
- *   this way if they always use atomic ops to access the memory location
- *   there won't be any problems.
- * (2) Have the kernel clear the LDREX reservation on thread context switch.
- *  This will sometimes clear the reservation unnecessarily, but guarantees
- *  correct behavior.
- *
- * The Android kernel performs a CLREX (v7) or dummy STREX (pre-v7), so we
- * can get away with a non-exclusive store here.
- *
- * -----
- *
- * It's worth noting that using non-exclusive LDR and STR means the "load"
- * and "store" operations aren't quite the same as read-modify-write or
- * swap operations.  By definition those must read and write memory in a
- * in a way that is coherent across all cores, whereas our non-exclusive
- * load and store have no such requirement.
- *
- * In practice this doesn't matter, because the only guarantees we make
- * about who sees what when are tied to the acquire/release semantics.
- * Other cores may not see our atomic releasing store as soon as they would
- * if the code used LDREX/STREX, but a store-release operation doesn't make
- * any guarantees as to how soon the store will be visible.  It's allowable
- * for operations that happen later in program order to become visible
- * before the store.  For an acquring store we issue a full barrier after
- * the STREX, ensuring that other processors see events in the proper order.
- */
-
-/*
- * android_atomic_acquire_load / android_atomic_release_load
- * input: r0 = address
- * output: r0 = value
- */
-android_atomic_acquire_load:
-    .fnstart
-    ldr     r0, [r0]
-    SMP_DMB
-    bx      lr
-    .fnend
-
-android_atomic_release_load:
-    .fnstart
-    SMP_DMB
-    ldr     r0, [r0]
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_acquire_store / android_atomic_release_store
- * input: r0 = value, r1 = address
- * output: void
- */
-android_atomic_acquire_store:
-    .fnstart
-    str     r0, [r1]
-    SMP_DMB
-    bx      lr
-    .fnend
-
-android_atomic_release_store:
-    .fnstart
-    SMP_DMB
-    str     r0, [r1]
-    bx      lr
-    .fnend
-
-/*
- * Common sequence for read-modify-write operations.
- *
- * input: r1 = address
- * output: r0 = original value, returns to caller
- */
-    .macro  RMWEX   op, arg
-1:  ldrex   r0, [r1]                    @ load current value into r0
-    \op     r2, r0, \arg                @ generate new value into r2
-    strex   r3, r2, [r1]                @ try to store new value; result in r3
-    cmp     r3, #0                      @ success?
-    bxeq    lr                          @ yes, return
-    b       1b                          @ no, retry
-    .endm
-
-
-/*
- * android_atomic_inc
- * input: r0 = address
- * output: r0 = old value
- */
-android_atomic_inc:
-    .fnstart
-    SMP_DMB
-    mov     r1, r0
-    RMWEX   add, #1
-    .fnend
-
-
-/*
- * android_atomic_dec
- * input: r0 = address
- * output: r0 = old value
- */
-android_atomic_dec:
-    .fnstart
-    SMP_DMB
-    mov     r1, r0
-    RMWEX   sub, #1
-    .fnend
-
-
-/*
- * android_atomic_add
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_add:
-    .fnstart
-    SMP_DMB
-    mov     ip, r0
-    RMWEX   add, ip
-    .fnend
-
-
-/*
- * android_atomic_and
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_and:
-    .fnstart
-    SMP_DMB
-    mov     ip, r0
-    RMWEX   and, ip
-    .fnend
-
-
-/*
- * android_atomic_or
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_or:
-    .fnstart
-    SMP_DMB
-    mov     ip, r0
-    RMWEX   orr, ip
-    .fnend
-
-
-/*
- * android_atomic_acquire_swap / android_atomic_release_swap
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_acquire_swap:
-    .fnstart
-1:  ldrex   r2, [r1]                    @ load current value into r2
-    strex   r3, r0, [r1]                @ store new value
-    teq     r3, #0                      @ strex success?
-    bne     1b                          @ no, loop
-    mov     r0, r2                      @ return old value
-    SMP_DMB
-    bx      lr
-    .fnend
-
-android_atomic_release_swap:
-    .fnstart
-    SMP_DMB
-1:  ldrex   r2, [r1]
-    strex   r3, r0, [r1]
-    teq     r3, #0
-    bne     1b
-    mov     r0, r2
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_acquire_cas / android_atomic_release_cas
- * input: r0 = oldvalue, r1 = newvalue, r2 = address
- * output: r0 = 0 (xchg done) or non-zero (xchg not done)
- */
-android_atomic_acquire_cas:
-    .fnstart
-1:  mov     ip, #2                      @ ip=2 means "new != old"
-    ldrex   r3, [r2]                    @ load current value into r3
-    teq     r0, r3                      @ new == old?
-    strexeq ip, r1, [r2]                @ yes, try store, set ip to 0 or 1
-    teq     ip, #1                      @ strex failure?
-    beq     1b                          @ yes, retry
-    mov     r0, ip                      @ return 0 on success, 2 on failure
-    SMP_DMB
-    bx      lr
-    .fnend
-
-android_atomic_release_cas:
-    .fnstart
-    SMP_DMB
-1:  mov     ip, #2
-    ldrex   r3, [r2]
-    teq     r0, r3
-    strexeq ip, r1, [r2]
-    teq     ip, #1
-    beq     1b
-    mov     r0, ip
-    bx      lr
-    .fnend
-
-
-#else /*not defined __ARM_HAVE_LDREX_STREX*/
-/*
- * ===========================================================================
- *      Pre-ARMv6 implementation
- * ===========================================================================
- *
- * These functions call through the kernel cmpxchg facility, or use the
- * (now deprecated) SWP instruction.  They are not SMP-safe.
- */
-#if ANDROID_SMP == 1
-# error "SMP defined, but LDREX/STREX not available"
-#endif
-
-/*
- * int __kernel_cmpxchg(int oldval, int newval, int *ptr)
- * clobbered: r3, ip, flags
- * return 0 if a swap was made, non-zero otherwise.
- */ 
-   .equ     kernel_cmpxchg, 0xFFFF0FC0
-   .equ     kernel_atomic_base, 0xFFFF0FFF
-
-
-/*
- * android_atomic_acquire_load / android_atomic_release_load
- * input: r0 = address
- * output: r0 = value
- */
-android_atomic_acquire_load:
-android_atomic_release_load:
-    .fnstart
-    ldr     r0, [r0]
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_acquire_store / android_atomic_release_store
- * input: r0 = value, r1 = address
- * output: void
- */
-android_atomic_acquire_store:
-android_atomic_release_store:
-    .fnstart
-    str     r0, [r1]
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_inc
- * input: r0 = address
- * output: r0 = old value
- */
-android_atomic_inc:
-    .fnstart
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0
-1: @ android_atomic_inc
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-#ifdef __ARM_HAVE_PC_INTERWORK
-    add     lr, pc, #4
-    add     r1, r0, #1
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-#else
-    add     r1, r0, #1
-    add     r3, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    mov     lr, pc
-    bx      r3
-#endif
-    bcc     1b
-    sub     r0, r1, #1
-    ldmia   sp!, {r4, lr}
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_dec
- * input: r0 = address
- * output: r0 = old value
- */
-android_atomic_dec:
-    .fnstart
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0
-1: @ android_atomic_dec
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-#ifdef __ARM_HAVE_PC_INTERWORK
-    add     lr, pc, #4
-    sub     r1, r0, #1
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-#else
-    sub     r1, r0, #1
-    add     r3, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    mov     lr, pc
-    bx      r3
-#endif
-    bcc     1b
-    add     r0, r1, #1
-    ldmia   sp!, {r4, lr}
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_add
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_add:
-    .fnstart
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r1
-    mov     r4, r0
-1: @ android_atomic_add
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-#ifdef __ARM_HAVE_PC_INTERWORK
-    add     lr, pc, #4
-    add     r1, r0, r4
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-#else
-    add     r1, r0, r4
-    add     r3, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    mov     lr, pc
-    bx      r3
-#endif
-    bcc     1b
-    sub     r0, r1, r4
-    ldmia   sp!, {r4, lr}
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_and
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_and:
-    .fnstart
-    .save {r4, r5, ip, lr}      /* include ip for 64-bit stack alignment */
-    stmdb   sp!, {r4, r5, ip, lr}
-    mov     r2, r1              /* r2 = address */
-    mov     r4, r0              /* r4 = the value */
-1: @ android_atomic_and
-    ldr     r0, [r2]            /* r0 = address[0] */
-    mov     r3, #kernel_atomic_base
-#ifdef __ARM_HAVE_PC_INTERWORK
-    add     lr, pc, #8
-    mov     r5, r0              /* r5 = save address[0] */
-    and     r1, r0, r4          /* r1 = new value */
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)  /* call cmpxchg() */
-#else
-    mov     r5, r0              /* r5 = save address[0] */
-    and     r1, r0, r4          /* r1 = new value */
-    add     r3, r3, #(kernel_cmpxchg - kernel_atomic_base)  /* call cmpxchg() */
-    mov     lr, pc
-    bx      r3
-#endif
-    bcc     1b
-    mov     r0, r5
-    ldmia   sp!, {r4, r5, ip, lr}
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_or
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_or:
-    .fnstart
-    .save {r4, r5, ip, lr}      /* include ip for 64-bit stack alignment */
-    stmdb   sp!, {r4, r5, ip, lr}
-    mov     r2, r1              /* r2 = address */
-    mov     r4, r0              /* r4 = the value */
-1: @ android_atomic_or
-    ldr     r0, [r2]            /* r0 = address[0] */
-    mov     r3, #kernel_atomic_base
-#ifdef __ARM_HAVE_PC_INTERWORK
-    add     lr, pc, #8
-    mov     r5, r0              /* r5 = save address[0] */
-    orr     r1, r0, r4          /* r1 = new value */
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)  /* call cmpxchg() */
-#else
-    mov     r5, r0              /* r5 = save address[0] */
-    orr     r1, r0, r4          /* r1 = new value */
-    add     r3, r3, #(kernel_cmpxchg - kernel_atomic_base)  /* call cmpxchg() */
-    mov     lr, pc
-    bx      r3
-#endif
-    bcc     1b
-    mov     r0, r5
-    ldmia   sp!, {r4, r5, ip, lr}
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_acquire_swap / android_atomic_release_swap
- * input: r0 = value, r1 = address
- * output: r0 = old value
- */
-android_atomic_acquire_swap:
-android_atomic_release_swap:
-    .fnstart
-    swp     r0, r0, [r1]
-    bx      lr
-    .fnend
-
-
-/*
- * android_atomic_acquire_cas / android_atomic_release_cas
- * input: r0 = oldvalue, r1 = newvalue, r2 = address
- * output: r0 = 0 (xchg done) or non-zero (xchg not done)
- */
-android_atomic_acquire_cas:
-android_atomic_release_cas:
-    .fnstart
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r4, r0          /* r4 = save oldvalue */
-1: @ android_atomic_cmpxchg
-    mov     r3, #kernel_atomic_base
-#ifdef __ARM_HAVE_PC_INTERWORK
-    add     lr, pc, #4
-    mov     r0, r4          /* r0 = oldvalue */
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-#else
-    mov     r0, r4          /* r0 = oldvalue */
-    add     r3, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    mov     lr, pc
-    bx      r3
-#endif
-    bcs     2f              /* swap was made. we're good, return. */
-    ldr     r3, [r2]        /* swap not made, see if it's because *ptr!=oldvalue */
-    cmp     r3, r4
-    beq     1b
-2: @ android_atomic_cmpxchg
-    ldmia   sp!, {r4, lr}
-    bx      lr
-    .fnend
-
-#endif /*not defined __ARM_HAVE_LDREX_STREX*/
diff --git a/libcutils/atomic.c b/libcutils/atomic.c
index 4cefa6b..f6cd8b0 100644
--- a/libcutils/atomic.c
+++ b/libcutils/atomic.c
@@ -14,207 +14,6 @@
  * limitations under the License.
  */
 
-#include <cutils/atomic.h>
+#define inline
+
 #include <cutils/atomic-inline.h>
-#ifdef HAVE_WIN32_THREADS
-#include <windows.h>
-#else
-#include <sched.h>
-#endif
-
-/*****************************************************************************/
-#if defined(HAVE_MACOSX_IPC)
-
-#include <libkern/OSAtomic.h>
-
-int32_t android_atomic_acquire_load(volatile int32_t* addr) {
-    int32_t value = *addr;
-    OSMemoryBarrier();
-    return value;
-}
-
-int32_t android_atomic_release_load(volatile int32_t* addr) {
-    OSMemoryBarrier();
-    return *addr;
-}
-
-void android_atomic_acquire_store(int32_t value, volatile int32_t* addr) {
-    *addr = value;
-    OSMemoryBarrier();
-}
-
-void android_atomic_release_store(int32_t value, volatile int32_t* addr) {
-    OSMemoryBarrier();
-    *addr = value;
-}
-
-int32_t android_atomic_inc(volatile int32_t* addr) {
-    return OSAtomicIncrement32Barrier((int32_t*)addr)-1;
-}
-
-int32_t android_atomic_dec(volatile int32_t* addr) {
-    return OSAtomicDecrement32Barrier((int32_t*)addr)+1;
-}
-
-int32_t android_atomic_add(int32_t value, volatile int32_t* addr) {
-    return OSAtomicAdd32Barrier(value, (int32_t*)addr)-value;
-}
-
-int32_t android_atomic_and(int32_t value, volatile int32_t* addr) {
-    return OSAtomicAnd32OrigBarrier(value, (int32_t*)addr);
-}
-
-int32_t android_atomic_or(int32_t value, volatile int32_t* addr) {
-    return OSAtomicOr32OrigBarrier(value, (int32_t*)addr);
-}
-
-int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr) {
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (android_atomic_acquire_cas(oldValue, value, addr));
-    return oldValue;
-}
-
-int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr) {
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (android_atomic_release_cas(oldValue, value, addr));
-    return oldValue;
-}
-
-int android_atomic_release_cas(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
-    /* OS X CAS returns zero on failure; invert to return zero on success */
-    return OSAtomicCompareAndSwap32Barrier(oldvalue, newvalue, (int32_t*)addr) == 0;
-}
-
-int android_atomic_acquire_cas(int32_t oldvalue, int32_t newvalue,
-        volatile int32_t* addr) {
-    int result = (OSAtomicCompareAndSwap32(oldvalue, newvalue, (int32_t*)addr) == 0);
-    if (result == 0) {
-        /* success, perform barrier */
-        OSMemoryBarrier();
-    }
-    return result;
-}
-
-/*****************************************************************************/
-#elif defined(__i386__) || defined(__x86_64__)
-
-int32_t android_atomic_acquire_load(volatile int32_t* addr) {
-    int32_t value = *addr;
-    ANDROID_MEMBAR_FULL();
-    return value;
-}
-
-int32_t android_atomic_release_load(volatile int32_t* addr) {
-    ANDROID_MEMBAR_FULL();
-    return *addr;
-}
-
-void android_atomic_acquire_store(int32_t value, volatile int32_t* addr) {
-    *addr = value;
-    ANDROID_MEMBAR_FULL();
-}
-
-void android_atomic_release_store(int32_t value, volatile int32_t* addr) {
-    ANDROID_MEMBAR_FULL();
-    *addr = value;
-}
-
-int32_t android_atomic_inc(volatile int32_t* addr) {
-    return android_atomic_add(1, addr);
-}
-
-int32_t android_atomic_dec(volatile int32_t* addr) {
-    return android_atomic_add(-1, addr);
-}
-
-int32_t android_atomic_add(int32_t value, volatile int32_t* addr) {
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (android_atomic_release_cas(oldValue, oldValue+value, addr));
-    return oldValue;
-}
-
-int32_t android_atomic_and(int32_t value, volatile int32_t* addr) {
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (android_atomic_release_cas(oldValue, oldValue&value, addr));
-    return oldValue;
-}
-
-int32_t android_atomic_or(int32_t value, volatile int32_t* addr) {
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (android_atomic_release_cas(oldValue, oldValue|value, addr));
-    return oldValue;
-}
-
-/* returns 0 on successful swap */
-static inline int cas(int32_t oldvalue, int32_t newvalue,
-        volatile int32_t* addr) {
-    int xchg;
-    asm volatile
-    (
-    "   lock; cmpxchg %%ecx, (%%edx);"
-    "   setne %%al;"
-    "   andl $1, %%eax"
-    : "=a" (xchg)
-    : "a" (oldvalue), "c" (newvalue), "d" (addr)
-    );
-    return xchg;
-}
-
-int32_t android_atomic_acquire_swap(int32_t value, volatile int32_t* addr) {
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (cas(oldValue, value, addr));
-    ANDROID_MEMBAR_FULL();
-    return oldValue;
-}
-
-int32_t android_atomic_release_swap(int32_t value, volatile int32_t* addr) {
-    ANDROID_MEMBAR_FULL();
-    int32_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (cas(oldValue, value, addr));
-    return oldValue;
-}
-
-int android_atomic_acquire_cas(int32_t oldvalue, int32_t newvalue,
-        volatile int32_t* addr) {
-    int xchg = cas(oldvalue, newvalue, addr);
-    if (xchg == 0)
-        ANDROID_MEMBAR_FULL();
-    return xchg;
-}
-
-int android_atomic_release_cas(int32_t oldvalue, int32_t newvalue,
-        volatile int32_t* addr) {
-    ANDROID_MEMBAR_FULL();
-    int xchg = cas(oldvalue, newvalue, addr);
-    return xchg;
-}
-
-
-/*****************************************************************************/
-#elif __arm__
-// implementation for ARM is in atomic-android-arm.s.
-
-/*****************************************************************************/
-#elif __sh__
-// implementation for SuperH is in atomic-android-sh.c.
-
-#else
-
-#error "Unsupported atomic operations for this platform"
-
-#endif
-