Atomic/SMP update, part 3.

Update ARM atomic ops to use LDREX/STREX.  Stripped out #if 0 chunk.

Insert explicit memory barriers in pthread and semaphore code.

For bug 2721865.

Change-Id: I0f153b797753a655702d8be41679273d1d5d6ae7
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index ae44b06..709e612 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -44,6 +44,7 @@
 #include <assert.h>
 #include <malloc.h>
 #include <linux/futex.h>
+#include <cutils/atomic-inline.h>
 
 extern int  __pthread_clone(int (*fn)(void*), void *child_stack, int flags, void *arg);
 extern void _exit_with_stack_teardown(void * stackBase, int stackSize, int retCode);
@@ -936,6 +937,7 @@
         while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
             __futex_syscall4(&mutex->value, wait_op, shared|2, 0);
     }
+    ANDROID_MEMBAR_FULL();
 }
 
 /*
@@ -945,6 +947,8 @@
 static __inline__ void
 _normal_unlock(pthread_mutex_t*  mutex)
 {
+    ANDROID_MEMBAR_FULL();
+
     /* We need to preserve the shared flag during operations */
     int  shared = mutex->value & MUTEX_SHARED_MASK;
 
@@ -1144,8 +1148,10 @@
     /* Handle common case first */
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0)
+        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+            ANDROID_MEMBAR_FULL();
             return 0;
+        }
 
         return EBUSY;
     }
@@ -1241,9 +1247,11 @@
     {
         int  wait_op = shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
 
-        /* fast path for unconteded lock */
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0)
+        /* fast path for uncontended lock */
+        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+            ANDROID_MEMBAR_FULL();
             return 0;
+        }
 
         /* loop while needed */
         while (__atomic_swap(shared|2, &mutex->value) != (shared|0)) {
@@ -1252,6 +1260,7 @@
 
             __futex_syscall4(&mutex->value, wait_op, shared|2, &ts);
         }
+        ANDROID_MEMBAR_FULL();
         return 0;
     }