Use private futexes for semaphores, unless they are initialized with pshared != 0.

Change-Id: I534e36a7171cd37037ae03b910ba71ea6968286d
Note: previously, sem_init() would return an error if pshared != 0.
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index 6fd47f2..ac5e6bf 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -43,7 +43,7 @@
 #include <memory.h>
 #include <assert.h>
 #include <malloc.h>
-#include <linux/futex.h>
+#include <bionic_futex.h>
 #include <cutils/atomic-inline.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
@@ -54,6 +54,16 @@
 extern void _exit_thread(int  retCode);
 extern int  __set_errno(int);
 
+int  __futex_wake_ex(volatile void *ftx, int pshared, int val)
+{
+    return __futex_syscall3(ftx, pshared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE, val);
+}
+
+int  __futex_wait_ex(volatile void *ftx, int pshared, int val, const struct timespec *timeout)
+{
+    return __futex_syscall4(ftx, pshared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, val, timeout);
+}
+
 #define  __likely(cond)    __builtin_expect(!!(cond), 1)
 #define  __unlikely(cond)  __builtin_expect(!!(cond), 0)
 
@@ -716,24 +726,6 @@
 }
 
 
-int __futex_wait(volatile void *ftx, int val, const struct timespec *timeout);
-int __futex_wake(volatile void *ftx, int count);
-
-int __futex_syscall3(volatile void *ftx, int op, int val);
-int __futex_syscall4(volatile void *ftx, int op, int val, const struct timespec *timeout);
-
-#ifndef FUTEX_PRIVATE_FLAG
-#define FUTEX_PRIVATE_FLAG  128
-#endif
-
-#ifndef FUTEX_WAIT_PRIVATE
-#define FUTEX_WAIT_PRIVATE  (FUTEX_WAIT|FUTEX_PRIVATE_FLAG)
-#endif
-
-#ifndef FUTEX_WAKE_PRIVATE
-#define FUTEX_WAKE_PRIVATE  (FUTEX_WAKE|FUTEX_PRIVATE_FLAG)
-#endif
-
 // mutex lock states
 //
 // 0: unlocked
@@ -935,10 +927,8 @@
          * that the mutex is in state 2 when we go to sleep on it, which
          * guarantees a wake-up call.
          */
-        int  wait_op = shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
-
         while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
-            __futex_syscall4(&mutex->value, wait_op, shared|2, 0);
+            __futex_wait_ex(&mutex->value, shared, shared|2, 0);
     }
     ANDROID_MEMBAR_FULL();
 }
@@ -961,7 +951,6 @@
      * if it wasn't 1 we have to do some additional work.
      */
     if (__atomic_dec(&mutex->value) != (shared|1)) {
-        int  wake_op = shared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE;
         /*
          * Start by releasing the lock.  The decrement changed it from
          * "contended lock" to "uncontended lock", which means we still
@@ -999,7 +988,7 @@
          * Either way we have correct behavior and nobody is orphaned on
          * the wait queue.
          */
-        __futex_syscall3(&mutex->value, wake_op, 1);
+        __futex_wake_ex(&mutex->value, shared, 1);
     }
 }
 
@@ -1019,7 +1008,7 @@
 
 int pthread_mutex_lock(pthread_mutex_t *mutex)
 {
-    int mtype, tid, new_lock_type, shared, wait_op;
+    int mtype, tid, new_lock_type, shared;
 
     if (__unlikely(mutex == NULL))
         return EINVAL;
@@ -1064,8 +1053,7 @@
     new_lock_type = 1;
 
     /* compute futex wait opcode and restore shared flag in mtype */
-    wait_op = shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
-    mtype  |= shared;
+    mtype |= shared;
 
     for (;;) {
         int  oldv;
@@ -1091,7 +1079,7 @@
          */
         new_lock_type = 2;
 
-        __futex_syscall4(&mutex->value, wait_op, oldv, NULL);
+        __futex_wait_ex(&mutex->value, shared, oldv, NULL);
     }
     return 0;
 }
@@ -1131,8 +1119,7 @@
 
     /* Wake one waiting thread, if any */
     if ((oldv & 3) == 2) {
-        int wake_op = shared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE;
-        __futex_syscall3(&mutex->value, wake_op, 1);
+        __futex_wake_ex(&mutex->value, shared, 1);
     }
     return 0;
 }
@@ -1234,7 +1221,7 @@
     clockid_t        clock = CLOCK_MONOTONIC;
     struct timespec  abstime;
     struct timespec  ts;
-    int              mtype, tid, oldv, new_lock_type, shared, wait_op;
+    int              mtype, tid, oldv, new_lock_type, shared;
 
     /* compute absolute expiration time */
     __timespec_to_relative_msec(&abstime, msecs, clock);
@@ -1248,8 +1235,6 @@
     /* Handle common case first */
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
-        int  wait_op = shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
-
         /* fast path for uncontended lock */
         if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
@@ -1261,7 +1246,7 @@
             if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
                 return EBUSY;
 
-            __futex_syscall4(&mutex->value, wait_op, shared|2, &ts);
+            __futex_wait_ex(&mutex->value, shared, shared|2, &ts);
         }
         ANDROID_MEMBAR_FULL();
         return 0;
@@ -1294,7 +1279,6 @@
     new_lock_type = 1;
 
     /* Compute wait op and restore sharing bit in mtype */
-    wait_op = shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
     mtype  |= shared;
 
     for (;;) {
@@ -1325,7 +1309,7 @@
         if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
             return EBUSY;
 
-        __futex_syscall4(&mutex->value, wait_op, oldv, &ts);
+        __futex_wait_ex(&mutex->value, shared, oldv, &ts);
     }
     return 0;
 }
@@ -1418,7 +1402,6 @@
 __pthread_cond_pulse(pthread_cond_t *cond, int  counter)
 {
     long flags;
-    int  wake_op;
 
     if (__unlikely(cond == NULL))
         return EINVAL;
@@ -1432,8 +1415,7 @@
             break;
     }
 
-    wake_op = COND_IS_SHARED(cond) ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE;
-    __futex_syscall3(&cond->value, wake_op, counter);
+    __futex_wake_ex(&cond->value, COND_IS_SHARED(cond), counter);
     return 0;
 }
 
@@ -1458,10 +1440,9 @@
 {
     int  status;
     int  oldvalue = cond->value;
-    int  wait_op  = COND_IS_SHARED(cond) ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE;
 
     pthread_mutex_unlock(mutex);
-    status = __futex_syscall4(&cond->value, wait_op, oldvalue, reltime);
+    status = __futex_wait_ex(&cond->value, COND_IS_SHARED(cond), oldvalue, reltime);
     pthread_mutex_lock(mutex);
 
     if (status == (-ETIMEDOUT)) return ETIMEDOUT;
diff --git a/libc/bionic/semaphore.c b/libc/bionic/semaphore.c
index b624943..39d049a 100644
--- a/libc/bionic/semaphore.c
+++ b/libc/bionic/semaphore.c
@@ -31,6 +31,16 @@
 #include <sys/atomics.h>
 #include <time.h>
 #include <cutils/atomic-inline.h>
+#include <bionic_futex.h>
+
+/* Use the lower 31-bits for the counter, and the high bit for
+ * the shared flag.
+ */
+#define SEM_VALUE_MASK  0x7fffffff
+#define SEM_SHARED_MASK 0x80000000
+
+#define SEM_GET_SHARED(sem)  ((sem)->count & SEM_SHARED_MASK)
+#define SEM_GET_VALUE(sem)   ((sem)->count & SEM_VALUE_MASK)
 
 int sem_init(sem_t *sem, int pshared, unsigned int value)
 {
@@ -39,12 +49,16 @@
         return -1;
     }
 
-    if (pshared != 0) {
-        errno = ENOSYS;
+    /* ensure that 'value' can be stored in the semaphore */
+    if ((value & SEM_VALUE_MASK) != value) {
+        errno = EINVAL;
         return -1;
     }
 
     sem->count = value;
+    if (pshared != 0)
+        sem->count |= SEM_SHARED_MASK;
+
     return 0;
 }
 
@@ -55,10 +69,11 @@
         errno = EINVAL;
         return -1;
     }
-    if (sem->count == 0) {
+    if ((sem->count & SEM_VALUE_MASK) == 0) {
         errno = EBUSY;
         return -1;
     }
+    sem->count = 0;
     return 0;
 }
 
@@ -91,32 +106,60 @@
 }
 
 
+/* Return 0 if a semaphore's value is 0
+ * Otherwise, decrement the value and return the old value.
+ */
 static int
-__atomic_dec_if_positive( volatile unsigned int*  pvalue )
+__sem_dec_if_positive(volatile unsigned int *pvalue)
 {
+    unsigned int  shared = (*pvalue & SEM_SHARED_MASK);
     unsigned int  old;
 
     do {
-        old = *pvalue;
+        old = (*pvalue & SEM_VALUE_MASK);
     }
-    while ( old != 0 && __atomic_cmpxchg( (int)old, (int)old-1, (volatile int*)pvalue ) != 0 );
+    while ( old != 0 &&
+            __atomic_cmpxchg((int)(old|shared),
+                             (int)((old-1)|shared),
+                             (volatile int*)pvalue) != 0 );
 
     return old;
 }
 
+/* Increment the value of a semaphore atomically.
+ * NOTE: the value will wrap above SEM_VALUE_MASK
+ */
+static int
+__sem_inc(volatile unsigned int *pvalue)
+{
+    unsigned int  shared = (*pvalue & SEM_SHARED_MASK);
+    unsigned int  old;
+
+    do {
+        old = (*pvalue & SEM_VALUE_MASK);
+    } while ( __atomic_cmpxchg((int)(old|shared),
+                               (int)(((old+1)&SEM_VALUE_MASK)|shared),
+                               (volatile int*)pvalue) != 0);
+    return old;
+}
+
 /* lock a semaphore */
 int sem_wait(sem_t *sem)
 {
+    unsigned shared;
+
     if (sem == NULL) {
         errno = EINVAL;
         return -1;
     }
 
+    shared = SEM_GET_SHARED(sem);
+
     for (;;) {
-        if (__atomic_dec_if_positive(&sem->count))
+        if (__sem_dec_if_positive(&sem->count))
             break;
 
-        __futex_wait(&sem->count, 0, 0);
+        __futex_wait_ex(&sem->count, shared, shared, NULL);
     }
     ANDROID_MEMBAR_FULL();
     return 0;
@@ -125,6 +168,7 @@
 int sem_timedwait(sem_t *sem, const struct timespec *abs_timeout)
 {
     int  ret;
+    unsigned int shared;
 
     if (sem == NULL) {
         errno = EINVAL;
@@ -133,7 +177,7 @@
 
     /* POSIX says we need to try to decrement the semaphore
      * before checking the timeout value */
-    if (__atomic_dec_if_positive(&sem->count)) {
+    if (__sem_dec_if_positive(&sem->count)) {
         ANDROID_MEMBAR_FULL();
         return 0;
     }
@@ -148,6 +192,8 @@
         return -1;
     }
 
+    shared = SEM_GET_SHARED(sem);
+
     for (;;) {
         struct timespec ts;
         int             ret;
@@ -166,7 +212,7 @@
             return -1;
         }
 
-        ret = __futex_wait(&sem->count, 0, &ts);
+        ret = __futex_wait_ex(&sem->count, shared, shared, &ts);
 
         /* return in case of timeout or interrupt */
         if (ret == -ETIMEDOUT || ret == -EINTR) {
@@ -174,7 +220,7 @@
             return -1;
         }
 
-        if (__atomic_dec_if_positive(&sem->count)) {
+        if (__sem_dec_if_positive(&sem->count)) {
             ANDROID_MEMBAR_FULL();
             break;
         }
@@ -185,12 +231,16 @@
 /* unlock a semaphore */
 int sem_post(sem_t *sem)
 {
+    unsigned int shared;
+
     if (sem == NULL)
         return EINVAL;
 
+    shared = SEM_GET_SHARED(sem);
+
     ANDROID_MEMBAR_FULL();
-    if (__atomic_inc((volatile int*)&sem->count) >= 0)
-        __futex_wake(&sem->count, 1);
+    if (__sem_inc(&sem->count) >= 0)
+        __futex_wake_ex(&sem->count, shared, 1);
 
     return 0;
 }
@@ -202,7 +252,7 @@
         return -1;
     }
 
-    if (__atomic_dec_if_positive(&sem->count) > 0) {
+    if (__sem_dec_if_positive(&sem->count) > 0) {
         ANDROID_MEMBAR_FULL();
         return 0;
     } else {
@@ -218,6 +268,6 @@
         return -1;
     }
 
-    *sval = sem->count;
+    *sval = SEM_GET_VALUE(sem);
     return 0;
 }