Provide writer preference option in rwlock.

Previous implementation of rwlock contains four atomic variables, which
is hard to maintain and change. So I make following changes in this CL:

1. Add pending flags in rwlock.state, so we don't need to synchronize
between different atomic variables. Using compare_and_swap operations
on rwlock.state is enough for all state change.

2. Add pending_lock to protect readers/writers waiting and wake up
operations. As waiting/wakeup is not performance critical, using a
lock is easier to maintain.

3. Add writer preference option.

4. Add unit tests for rwlock.

Bug: 19109156

Change-Id: Idcaa58d695ea401d64445610b465ac5cff23ec7c
diff --git a/libc/bionic/pthread_rwlock.cpp b/libc/bionic/pthread_rwlock.cpp
index 8aa40ae..934210e 100644
--- a/libc/bionic/pthread_rwlock.cpp
+++ b/libc/bionic/pthread_rwlock.cpp
@@ -28,9 +28,11 @@
 
 #include <errno.h>
 #include <stdatomic.h>
+#include <string.h>
 
 #include "pthread_internal.h"
 #include "private/bionic_futex.h"
+#include "private/bionic_lock.h"
 #include "private/bionic_time_conversions.h"
 
 /* Technical note:
@@ -53,18 +55,39 @@
  *  - This implementation will return EDEADLK in "write after write" and "read after
  *    write" cases and will deadlock in write after read case.
  *
- * TODO: As it stands now, pending_readers and pending_writers could be merged into a
- * a single waiters variable.  Keeping them separate adds a bit of clarity and keeps
- * the door open for a writer-biased implementation.
- *
  */
 
-#define RWLOCKATTR_DEFAULT     0
-#define RWLOCKATTR_SHARED_MASK 0x0010
+// A rwlockattr is implemented as a 32-bit integer which has following fields:
+//  bits    name              description
+//   1     rwlock_kind       have rwlock preference like PTHREAD_RWLOCK_PREFER_READER_NP.
+//   0      process_shared    set to 1 if the rwlock is shared between processes.
+
+#define RWLOCKATTR_PSHARED_SHIFT 0
+#define RWLOCKATTR_KIND_SHIFT    1
+
+#define RWLOCKATTR_PSHARED_MASK  1
+#define RWLOCKATTR_KIND_MASK     2
+#define RWLOCKATTR_RESERVED_MASK (~3)
+
+static inline __always_inline __always_inline bool __rwlockattr_getpshared(const pthread_rwlockattr_t* attr) {
+  return (*attr & RWLOCKATTR_PSHARED_MASK) >> RWLOCKATTR_PSHARED_SHIFT;
+}
+
+static inline __always_inline __always_inline void __rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
+  *attr = (*attr & ~RWLOCKATTR_PSHARED_MASK) | (pshared << RWLOCKATTR_PSHARED_SHIFT);
+}
+
+static inline __always_inline int __rwlockattr_getkind(const pthread_rwlockattr_t* attr) {
+  return (*attr & RWLOCKATTR_KIND_MASK) >> RWLOCKATTR_KIND_SHIFT;
+}
+
+static inline __always_inline void __rwlockattr_setkind(pthread_rwlockattr_t* attr, int kind) {
+  *attr = (*attr & ~RWLOCKATTR_KIND_MASK) | (kind << RWLOCKATTR_KIND_SHIFT);
+}
 
 
 int pthread_rwlockattr_init(pthread_rwlockattr_t* attr) {
-  *attr = PTHREAD_PROCESS_PRIVATE;
+  *attr = 0;
   return 0;
 }
 
@@ -73,40 +96,121 @@
   return 0;
 }
 
+int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
+  if (__rwlockattr_getpshared(attr)) {
+    *pshared = PTHREAD_PROCESS_SHARED;
+  } else {
+    *pshared = PTHREAD_PROCESS_PRIVATE;
+  }
+  return 0;
+}
+
 int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
   switch (pshared) {
     case PTHREAD_PROCESS_PRIVATE:
+      __rwlockattr_setpshared(attr, 0);
+      return 0;
     case PTHREAD_PROCESS_SHARED:
-      *attr = pshared;
+      __rwlockattr_setpshared(attr, 1);
       return 0;
     default:
       return EINVAL;
   }
 }
 
-int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
-  *pshared = *attr;
+int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t* attr, int* pref) {
+  *pref = __rwlockattr_getkind(attr);
   return 0;
 }
 
-struct pthread_rwlock_internal_t {
-  atomic_int state; // 0=unlock, -1=writer lock, +n=reader lock
-  atomic_int writer_thread_id;
-  atomic_uint pending_readers;
-  atomic_uint pending_writers;
-  int32_t attr;
-
-  bool process_shared() const {
-    return attr == PTHREAD_PROCESS_SHARED;
+int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t* attr, int pref) {
+  switch (pref) {
+    case PTHREAD_RWLOCK_PREFER_READER_NP:   // Fall through.
+    case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
+      __rwlockattr_setkind(attr, pref);
+      return 0;
+    default:
+      return EINVAL;
   }
+}
+
+// A rwlock state is implemented as a 32-bit integer which has following rules:
+//  bits      name                              description
+//   31      owned_by_writer_flag              set to 1 if the lock is owned by a writer now.
+//  30-2     reader_count                      the count of readers holding the lock.
+//   1       have_pending_writers              set to 1 if having pending writers.
+//   0       have_pending_readers              set to 1 if having pending readers.
+
+#define STATE_HAVE_PENDING_READERS_SHIFT    0
+#define STATE_HAVE_PENDING_WRITERS_SHIFT    1
+#define STATE_READER_COUNT_SHIFT            2
+#define STATE_OWNED_BY_WRITER_SHIFT        31
+
+#define STATE_HAVE_PENDING_READERS_FLAG     (1 << STATE_HAVE_PENDING_READERS_SHIFT)
+#define STATE_HAVE_PENDING_WRITERS_FLAG     (1 << STATE_HAVE_PENDING_WRITERS_SHIFT)
+#define STATE_READER_COUNT_CHANGE_STEP  (1 << STATE_READER_COUNT_SHIFT)
+#define STATE_OWNED_BY_WRITER_FLAG      (1 << STATE_OWNED_BY_WRITER_SHIFT)
+
+#define STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG \
+          (STATE_HAVE_PENDING_READERS_FLAG | STATE_HAVE_PENDING_WRITERS_FLAG)
+
+struct pthread_rwlock_internal_t {
+  atomic_int state;
+  atomic_int writer_tid;
+
+  bool pshared;
+  bool writer_nonrecursive_preferred;
+  uint16_t __pad;
+
+// When a reader thread plans to suspend on the rwlock, it will add STATE_HAVE_PENDING_READERS_FLAG
+// in state, increase pending_reader_count, and wait on pending_reader_wakeup_serial. After woken
+// up, the reader thread decreases pending_reader_count, and the last pending reader thread should
+// remove STATE_HAVE_PENDING_READERS_FLAG in state. A pending writer thread works in a similar way,
+// except that it uses flag and members for writer threads.
+
+  Lock pending_lock;  // All pending members below are protected by pending_lock.
+  uint32_t pending_reader_count;  // Count of pending reader threads.
+  uint32_t pending_writer_count;  // Count of pending writer threads.
+  uint32_t pending_reader_wakeup_serial;  // Pending reader threads wait on this address by futex_wait.
+  uint32_t pending_writer_wakeup_serial;  // Pending writer threads wait on this address by futex_wait.
 
 #if defined(__LP64__)
-  char __reserved[36];
-#else
   char __reserved[20];
+#else
+  char __reserved[4];
 #endif
 };
 
+static inline __always_inline bool __state_owned_by_writer(int state) {
+  return state < 0;
+}
+
+static inline __always_inline bool __state_owned_by_readers(int state) {
+  // If state >= 0, the owned_by_writer_flag is not set.
+  // And if state >= STATE_READER_COUNT_CHANGE_STEP, the reader_count field is not empty.
+  return state >= STATE_READER_COUNT_CHANGE_STEP;
+}
+
+static inline __always_inline bool __state_owned_by_readers_or_writer(int state) {
+  return state < 0 || state >= STATE_READER_COUNT_CHANGE_STEP;
+}
+
+static inline __always_inline int __state_add_writer_flag(int state) {
+  return state | STATE_OWNED_BY_WRITER_FLAG;
+}
+
+static inline __always_inline bool __state_is_last_reader(int state) {
+  return (state >> STATE_READER_COUNT_SHIFT) == 1;
+}
+
+static inline __always_inline bool __state_have_pending_writers(int state) {
+  return state & STATE_HAVE_PENDING_WRITERS_FLAG;
+}
+
+static inline __always_inline bool __state_have_pending_readers_or_writers(int state) {
+  return state & STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG;
+}
+
 static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
               "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
 
@@ -115,31 +219,35 @@
 static_assert(alignof(pthread_rwlock_t) == 4,
              "pthread_rwlock_t should fulfill the alignment requirement of pthread_rwlock_internal_t.");
 
-static inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
+static inline __always_inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
   return reinterpret_cast<pthread_rwlock_internal_t*>(rwlock_interface);
 }
 
 int pthread_rwlock_init(pthread_rwlock_t* rwlock_interface, const pthread_rwlockattr_t* attr) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
 
-  if (__predict_true(attr == NULL)) {
-    rwlock->attr = 0;
-  } else {
-    switch (*attr) {
-      case PTHREAD_PROCESS_SHARED:
-      case PTHREAD_PROCESS_PRIVATE:
-        rwlock->attr= *attr;
+  memset(rwlock, 0, sizeof(pthread_rwlock_internal_t));
+
+  if (__predict_false(attr != NULL)) {
+    rwlock->pshared = __rwlockattr_getpshared(attr);
+    int kind = __rwlockattr_getkind(attr);
+    switch (kind) {
+      case PTHREAD_RWLOCK_PREFER_READER_NP:
+        rwlock->writer_nonrecursive_preferred = false;
+        break;
+      case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
+        rwlock->writer_nonrecursive_preferred = true;
         break;
       default:
         return EINVAL;
     }
+    if ((*attr & RWLOCKATTR_RESERVED_MASK) != 0) {
+      return EINVAL;
+    }
   }
 
   atomic_init(&rwlock->state, 0);
-  atomic_init(&rwlock->writer_thread_id, 0);
-  atomic_init(&rwlock->pending_readers, 0);
-  atomic_init(&rwlock->pending_writers, 0);
-
+  rwlock->pending_lock.init(rwlock->pshared);
   return 0;
 }
 
@@ -152,105 +260,173 @@
   return 0;
 }
 
+static inline __always_inline bool __can_acquire_read_lock(int old_state,
+                                                             bool writer_nonrecursive_preferred) {
+  // If writer is preferred with nonrecursive reader, we prevent further readers from acquiring
+  // the lock when there are writers waiting for the lock.
+  bool cannot_apply = __state_owned_by_writer(old_state) ||
+                      (writer_nonrecursive_preferred && __state_have_pending_writers(old_state));
+  return !cannot_apply;
+}
+
+static inline __always_inline int __pthread_rwlock_tryrdlock(pthread_rwlock_internal_t* rwlock) {
+  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
+
+  while (__predict_true(__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred))) {
+
+    int new_state = old_state + STATE_READER_COUNT_CHANGE_STEP;
+    if (__predict_false(!__state_owned_by_readers(new_state))) { // Happens when reader count overflows.
+      return EAGAIN;
+    }
+    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, new_state,
+                                              memory_order_acquire, memory_order_relaxed))) {
+      return 0;
+    }
+  }
+  return EBUSY;
+}
+
 static int __pthread_rwlock_timedrdlock(pthread_rwlock_internal_t* rwlock,
                                         const timespec* abs_timeout_or_null) {
 
-  if (__predict_false(__get_thread()->tid == atomic_load_explicit(&rwlock->writer_thread_id,
-                                                                  memory_order_relaxed))) {
+  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
     return EDEADLK;
   }
 
   while (true) {
+    int ret = __pthread_rwlock_tryrdlock(rwlock);
+    if (ret == 0 || ret == EAGAIN) {
+      return ret;
+    }
+
     int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-    if (__predict_true(old_state >= 0)) {
-      if (atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, old_state + 1,
-                                                memory_order_acquire, memory_order_relaxed)) {
-        return 0;
-      }
-    } else {
-      timespec ts;
-      timespec* rel_timeout = NULL;
+    if (__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
+      continue;
+    }
 
-      if (abs_timeout_or_null != NULL) {
-        rel_timeout = &ts;
-        if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
-          return ETIMEDOUT;
-        }
-      }
+    timespec ts;
+    timespec* rel_timeout = NULL;
 
-      // To avoid losing wake ups, the pending_readers increment should be observed before
-      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
-      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
-      // operations in futex_wait.
-      atomic_fetch_add_explicit(&rwlock->pending_readers, 1, memory_order_relaxed);
-
-      atomic_thread_fence(memory_order_seq_cst);
-
-      int ret = __futex_wait_ex(&rwlock->state, rwlock->process_shared(), old_state,
-                                rel_timeout);
-
-      atomic_fetch_sub_explicit(&rwlock->pending_readers, 1, memory_order_relaxed);
-
-      if (ret == -ETIMEDOUT) {
+    if (abs_timeout_or_null != NULL) {
+      rel_timeout = &ts;
+      if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
         return ETIMEDOUT;
       }
     }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_reader_count++;
+
+    // We rely on the fact that all atomic exchange operations on the same object (here it is
+    // rwlock->state) always appear to occur in a single total order. If the pending flag is added
+    // before unlocking, the unlocking thread will wakeup the waiter. Otherwise, we will see the
+    // state is unlocked and will not wait anymore.
+    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_READERS_FLAG,
+                                         memory_order_relaxed);
+
+    int old_serial = rwlock->pending_reader_wakeup_serial;
+    rwlock->pending_lock.unlock();
+
+    int futex_ret = 0;
+    if (!__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
+      futex_ret = __futex_wait_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared,
+                                  old_serial, rel_timeout);
+    }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_reader_count--;
+    if (rwlock->pending_reader_count == 0) {
+      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_READERS_FLAG,
+                                memory_order_relaxed);
+    }
+    rwlock->pending_lock.unlock();
+
+    if (futex_ret == -ETIMEDOUT) {
+      return ETIMEDOUT;
+    }
   }
 }
 
+static inline __always_inline bool __can_acquire_write_lock(int old_state) {
+  return !__state_owned_by_readers_or_writer(old_state);
+}
+
+static inline __always_inline int __pthread_rwlock_trywrlock(pthread_rwlock_internal_t* rwlock) {
+  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
+
+  while (__predict_true(__can_acquire_write_lock(old_state))) {
+    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
+          __state_add_writer_flag(old_state), memory_order_acquire, memory_order_relaxed))) {
+
+      atomic_store_explicit(&rwlock->writer_tid, __get_thread()->tid, memory_order_relaxed);
+      return 0;
+    }
+  }
+  return EBUSY;
+}
+
 static int __pthread_rwlock_timedwrlock(pthread_rwlock_internal_t* rwlock,
                                         const timespec* abs_timeout_or_null) {
 
-  if (__predict_false(__get_thread()->tid == atomic_load_explicit(&rwlock->writer_thread_id,
-                                                                  memory_order_relaxed))) {
+  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
     return EDEADLK;
   }
-
   while (true) {
+    int ret = __pthread_rwlock_trywrlock(rwlock);
+    if (ret == 0) {
+      return ret;
+    }
+
     int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-    if (__predict_true(old_state == 0)) {
-      if (atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, -1,
-                                                memory_order_acquire, memory_order_relaxed)) {
-        // writer_thread_id is protected by rwlock and can only be modified in rwlock write
-        // owner thread. Other threads may read it for EDEADLK error checking, atomic operation
-        // is safe enough for it.
-        atomic_store_explicit(&rwlock->writer_thread_id, __get_thread()->tid, memory_order_relaxed);
-        return 0;
-      }
-    } else {
-      timespec ts;
-      timespec* rel_timeout = NULL;
+    if (__can_acquire_write_lock(old_state)) {
+      continue;
+    }
 
-      if (abs_timeout_or_null != NULL) {
-        rel_timeout = &ts;
-        if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
-          return ETIMEDOUT;
-        }
-      }
+    timespec ts;
+    timespec* rel_timeout = NULL;
 
-      // To avoid losing wake ups, the pending_writers increment should be observed before
-      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
-      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
-      // operations in futex_wait.
-      atomic_fetch_add_explicit(&rwlock->pending_writers, 1, memory_order_relaxed);
-
-      atomic_thread_fence(memory_order_seq_cst);
-
-      int ret = __futex_wait_ex(&rwlock->state, rwlock->process_shared(), old_state,
-                                rel_timeout);
-
-      atomic_fetch_sub_explicit(&rwlock->pending_writers, 1, memory_order_relaxed);
-
-      if (ret == -ETIMEDOUT) {
+    if (abs_timeout_or_null != NULL) {
+      rel_timeout = &ts;
+      if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
         return ETIMEDOUT;
       }
     }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_writer_count++;
+
+    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_WRITERS_FLAG,
+                                         memory_order_relaxed);
+
+    int old_serial = rwlock->pending_writer_wakeup_serial;
+    rwlock->pending_lock.unlock();
+
+    int futex_ret = 0;
+    if (!__can_acquire_write_lock(old_state)) {
+      futex_ret = __futex_wait_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared,
+                                  old_serial, rel_timeout);
+    }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_writer_count--;
+    if (rwlock->pending_writer_count == 0) {
+      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_WRITERS_FLAG,
+                                memory_order_relaxed);
+    }
+    rwlock->pending_lock.unlock();
+
+    if (futex_ret == -ETIMEDOUT) {
+      return ETIMEDOUT;
+    }
   }
 }
 
 int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock_interface) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
+  // Avoid slowing down fast path of rdlock.
+  if (__predict_true(__pthread_rwlock_tryrdlock(rwlock) == 0)) {
+    return 0;
+  }
   return __pthread_rwlock_timedrdlock(rwlock, NULL);
 }
 
@@ -261,19 +437,15 @@
 }
 
 int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock_interface) {
-  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
-  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-
-  while (old_state >= 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
-                             old_state + 1, memory_order_acquire, memory_order_relaxed)) {
-  }
-  return (old_state >= 0) ? 0 : EBUSY;
+  return __pthread_rwlock_tryrdlock(__get_internal_rwlock(rwlock_interface));
 }
 
 int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock_interface) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
+  // Avoid slowing down fast path of wrlock.
+  if (__predict_true(__pthread_rwlock_trywrlock(rwlock) == 0)) {
+    return 0;
+  }
   return __pthread_rwlock_timedwrlock(rwlock, NULL);
 }
 
@@ -284,65 +456,52 @@
 }
 
 int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock_interface) {
-  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
-  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-
-  while (old_state == 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, -1,
-                                              memory_order_acquire, memory_order_relaxed)) {
-  }
-  if (old_state == 0) {
-    atomic_store_explicit(&rwlock->writer_thread_id, __get_thread()->tid, memory_order_relaxed);
-    return 0;
-  }
-  return EBUSY;
+  return __pthread_rwlock_trywrlock(__get_internal_rwlock(rwlock_interface));
 }
 
-
 int pthread_rwlock_unlock(pthread_rwlock_t* rwlock_interface) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
 
   int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-  if (__predict_false(old_state == 0)) {
-    return EPERM;
-  } else if (old_state == -1) {
-    if (atomic_load_explicit(&rwlock->writer_thread_id, memory_order_relaxed) != __get_thread()->tid) {
+  if (__state_owned_by_writer(old_state)) {
+    if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) != __get_thread()->tid) {
       return EPERM;
     }
-    // We're no longer the owner.
-    atomic_store_explicit(&rwlock->writer_thread_id, 0, memory_order_relaxed);
-    // Change state from -1 to 0.
-    atomic_store_explicit(&rwlock->state, 0, memory_order_release);
-
-  } else { // old_state > 0
-    // Reduce state by 1.
-    while (old_state > 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
-                               old_state - 1, memory_order_release, memory_order_relaxed)) {
-    }
-
-    if (old_state <= 0) {
-      return EPERM;
-    } else if (old_state > 1) {
+    atomic_store_explicit(&rwlock->writer_tid, 0, memory_order_relaxed);
+    old_state = atomic_fetch_and_explicit(&rwlock->state, ~STATE_OWNED_BY_WRITER_FLAG,
+                                          memory_order_release);
+    if (!__state_have_pending_readers_or_writers(old_state)) {
       return 0;
     }
-    // old_state = 1, which means the last reader calling unlock. It has to wake up waiters.
+
+  } else if (__state_owned_by_readers(old_state)) {
+    old_state = atomic_fetch_sub_explicit(&rwlock->state, STATE_READER_COUNT_CHANGE_STEP,
+                                          memory_order_release);
+    if (!__state_is_last_reader(old_state) || !__state_have_pending_readers_or_writers(old_state)) {
+      return 0;
+    }
+
+  } else {
+    return EPERM;
   }
 
-  // If having waiters, wake up them.
-  // To avoid losing wake ups, the update of state should be observed before reading
-  // pending_readers/pending_writers by all threads. Use read locking as an example:
-  //     read locking thread                        unlocking thread
-  //      pending_readers++;                         state = 0;
-  //      seq_cst fence                              seq_cst fence
-  //      read state for futex_wait                  read pending_readers for futex_wake
-  //
-  // So when locking and unlocking threads are running in parallel, we will not get
-  // in a situation that the locking thread reads state as negative and needs to wait,
-  // while the unlocking thread reads pending_readers as zero and doesn't need to wake up waiters.
-  atomic_thread_fence(memory_order_seq_cst);
-  if (__predict_false(atomic_load_explicit(&rwlock->pending_readers, memory_order_relaxed) > 0 ||
-                      atomic_load_explicit(&rwlock->pending_writers, memory_order_relaxed) > 0)) {
-    __futex_wake_ex(&rwlock->state, rwlock->process_shared(), INT_MAX);
+  // Wake up pending readers or writers.
+  rwlock->pending_lock.lock();
+  if (rwlock->pending_writer_count != 0) {
+    rwlock->pending_writer_wakeup_serial++;
+    rwlock->pending_lock.unlock();
+
+    __futex_wake_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared, 1);
+
+  } else if (rwlock->pending_reader_count != 0) {
+    rwlock->pending_reader_wakeup_serial++;
+    rwlock->pending_lock.unlock();
+
+    __futex_wake_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared, INT_MAX);
+
+  } else {
+    // It happens when waiters are woken up by timeout.
+    rwlock->pending_lock.unlock();
   }
   return 0;
 }
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 83a56d6..26d68e4 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -85,6 +85,11 @@
 
 #define PTHREAD_RWLOCK_INITIALIZER  { { 0 } }
 
+enum {
+  PTHREAD_RWLOCK_PREFER_READER_NP = 0,
+  PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP = 1,
+};
+
 typedef int pthread_key_t;
 
 typedef int pthread_once_t;
@@ -178,10 +183,12 @@
 
 int pthread_once(pthread_once_t*, void (*)(void)) __nonnull((1, 2));
 
+int pthread_rwlockattr_init(pthread_rwlockattr_t*) __nonnull((1));
 int pthread_rwlockattr_destroy(pthread_rwlockattr_t*) __nonnull((1));
 int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t*, int*) __nonnull((1, 2));
-int pthread_rwlockattr_init(pthread_rwlockattr_t*) __nonnull((1));
 int pthread_rwlockattr_setpshared(pthread_rwlockattr_t*, int) __nonnull((1));
+int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t*, int*) __nonnull((1, 2));
+int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t*, int) __nonnull((1));
 
 int pthread_rwlock_destroy(pthread_rwlock_t*) __nonnull((1));
 int pthread_rwlock_init(pthread_rwlock_t*, const pthread_rwlockattr_t*) __nonnull((1));
diff --git a/libc/private/bionic_lock.h b/libc/private/bionic_lock.h
new file mode 100644
index 0000000..6a0fd06
--- /dev/null
+++ b/libc/private/bionic_lock.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _BIONIC_LOCK_H
+#define _BIONIC_LOCK_H
+
+#include <stdatomic.h>
+#include "private/bionic_futex.h"
+
+class Lock {
+ private:
+  enum LockState {
+    Unlocked = 0,
+    LockedWithoutWaiter,
+    LockedWithWaiter,
+  };
+  _Atomic(LockState) state;
+  bool process_shared;
+
+ public:
+  Lock(bool process_shared = false) {
+    init(process_shared);
+  }
+
+  void init(bool process_shared) {
+    atomic_init(&state, Unlocked);
+    this->process_shared = process_shared;
+  }
+
+  void lock() {
+    LockState old_state = Unlocked;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(&state, &old_state,
+                         LockedWithoutWaiter, memory_order_acquire, memory_order_relaxed))) {
+      return;
+    }
+    while (atomic_exchange_explicit(&state, LockedWithWaiter, memory_order_acquire) != Unlocked) {
+      // TODO: As the critical section is brief, it is a better choice to spin a few times befor sleeping.
+      __futex_wait_ex(&state, process_shared, LockedWithWaiter, NULL);
+    }
+    return;
+  }
+
+  void unlock() {
+    if (atomic_exchange_explicit(&state, Unlocked, memory_order_release) == LockedWithWaiter) {
+      __futex_wake_ex(&state, process_shared, 1);
+    }
+  }
+};
+
+#endif  // _BIONIC_LOCK_H
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index f96ccf9..2d21e30 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -660,6 +660,37 @@
 #endif // __BIONIC__
 }
 
+TEST(pthread, pthread_rwlockattr_smoke) {
+  pthread_rwlockattr_t attr;
+  ASSERT_EQ(0, pthread_rwlockattr_init(&attr));
+
+  int pshared_value_array[] = {PTHREAD_PROCESS_PRIVATE, PTHREAD_PROCESS_SHARED};
+  for (size_t i = 0; i < sizeof(pshared_value_array) / sizeof(pshared_value_array[0]); ++i) {
+    ASSERT_EQ(0, pthread_rwlockattr_setpshared(&attr, pshared_value_array[i]));
+    int pshared;
+    ASSERT_EQ(0, pthread_rwlockattr_getpshared(&attr, &pshared));
+    ASSERT_EQ(pshared_value_array[i], pshared);
+  }
+
+  int kind_array[] = {PTHREAD_RWLOCK_PREFER_READER_NP,
+                      PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP};
+  for (size_t i = 0; i < sizeof(kind_array) / sizeof(kind_array[0]); ++i) {
+    ASSERT_EQ(0, pthread_rwlockattr_setkind_np(&attr, kind_array[i]));
+    int kind;
+    ASSERT_EQ(0, pthread_rwlockattr_getkind_np(&attr, &kind));
+    ASSERT_EQ(kind_array[i], kind);
+  }
+
+  ASSERT_EQ(0, pthread_rwlockattr_destroy(&attr));
+}
+
+TEST(pthread, pthread_rwlock_init_same_as_PTHREAD_RWLOCK_INITIALIZER) {
+  pthread_rwlock_t lock1 = PTHREAD_RWLOCK_INITIALIZER;
+  pthread_rwlock_t lock2;
+  ASSERT_EQ(0, pthread_rwlock_init(&lock2, NULL));
+  ASSERT_EQ(0, memcmp(&lock1, &lock2, sizeof(lock1)));
+}
+
 TEST(pthread, pthread_rwlock_smoke) {
   pthread_rwlock_t l;
   ASSERT_EQ(0, pthread_rwlock_init(&l, NULL));
@@ -695,7 +726,6 @@
   ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(0, pthread_rwlock_unlock(&l));
 
-#ifdef __BIONIC__
   // EDEADLK in "read after write"
   ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(EDEADLK, pthread_rwlock_rdlock(&l));
@@ -705,7 +735,6 @@
   ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(EDEADLK, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(0, pthread_rwlock_unlock(&l));
-#endif
 
   ASSERT_EQ(0, pthread_rwlock_destroy(&l));
 }
@@ -807,6 +836,111 @@
   ASSERT_EQ(0, pthread_rwlock_destroy(&wakeup_arg.lock));
 }
 
+class RwlockKindTestHelper {
+ private:
+  struct ThreadArg {
+    RwlockKindTestHelper* helper;
+    std::atomic<pid_t>& tid;
+
+    ThreadArg(RwlockKindTestHelper* helper, std::atomic<pid_t>& tid)
+      : helper(helper), tid(tid) { }
+  };
+
+ public:
+  pthread_rwlock_t lock;
+
+ public:
+  RwlockKindTestHelper(int kind_type) {
+    InitRwlock(kind_type);
+  }
+
+  ~RwlockKindTestHelper() {
+    DestroyRwlock();
+  }
+
+  void CreateWriterThread(pthread_t& thread, std::atomic<pid_t>& tid) {
+    tid = 0;
+    ThreadArg* arg = new ThreadArg(this, tid);
+    ASSERT_EQ(0, pthread_create(&thread, NULL,
+                                reinterpret_cast<void* (*)(void*)>(WriterThreadFn), arg));
+  }
+
+  void CreateReaderThread(pthread_t& thread, std::atomic<pid_t>& tid) {
+    tid = 0;
+    ThreadArg* arg = new ThreadArg(this, tid);
+    ASSERT_EQ(0, pthread_create(&thread, NULL,
+                                reinterpret_cast<void* (*)(void*)>(ReaderThreadFn), arg));
+  }
+
+ private:
+  void InitRwlock(int kind_type) {
+    pthread_rwlockattr_t attr;
+    ASSERT_EQ(0, pthread_rwlockattr_init(&attr));
+    ASSERT_EQ(0, pthread_rwlockattr_setkind_np(&attr, kind_type));
+    ASSERT_EQ(0, pthread_rwlock_init(&lock, &attr));
+    ASSERT_EQ(0, pthread_rwlockattr_destroy(&attr));
+  }
+
+  void DestroyRwlock() {
+    ASSERT_EQ(0, pthread_rwlock_destroy(&lock));
+  }
+
+  static void WriterThreadFn(ThreadArg* arg) {
+    arg->tid = gettid();
+
+    RwlockKindTestHelper* helper = arg->helper;
+    ASSERT_EQ(0, pthread_rwlock_wrlock(&helper->lock));
+    ASSERT_EQ(0, pthread_rwlock_unlock(&helper->lock));
+    delete arg;
+  }
+
+  static void ReaderThreadFn(ThreadArg* arg) {
+    arg->tid = gettid();
+
+    RwlockKindTestHelper* helper = arg->helper;
+    ASSERT_EQ(0, pthread_rwlock_rdlock(&helper->lock));
+    ASSERT_EQ(0, pthread_rwlock_unlock(&helper->lock));
+    delete arg;
+  }
+};
+
+TEST(pthread, pthread_rwlock_kind_PTHREAD_RWLOCK_PREFER_READER_NP) {
+  RwlockKindTestHelper helper(PTHREAD_RWLOCK_PREFER_READER_NP);
+  ASSERT_EQ(0, pthread_rwlock_rdlock(&helper.lock));
+
+  pthread_t writer_thread;
+  std::atomic<pid_t> writer_tid;
+  helper.CreateWriterThread(writer_thread, writer_tid);
+  WaitUntilThreadSleep(writer_tid);
+
+  pthread_t reader_thread;
+  std::atomic<pid_t> reader_tid;
+  helper.CreateReaderThread(reader_thread, reader_tid);
+  ASSERT_EQ(0, pthread_join(reader_thread, NULL));
+
+  ASSERT_EQ(0, pthread_rwlock_unlock(&helper.lock));
+  ASSERT_EQ(0, pthread_join(writer_thread, NULL));
+}
+
+TEST(pthread, pthread_rwlock_kind_PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP) {
+  RwlockKindTestHelper helper(PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+  ASSERT_EQ(0, pthread_rwlock_rdlock(&helper.lock));
+
+  pthread_t writer_thread;
+  std::atomic<pid_t> writer_tid;
+  helper.CreateWriterThread(writer_thread, writer_tid);
+  WaitUntilThreadSleep(writer_tid);
+
+  pthread_t reader_thread;
+  std::atomic<pid_t> reader_tid;
+  helper.CreateReaderThread(reader_thread, reader_tid);
+  WaitUntilThreadSleep(reader_tid);
+
+  ASSERT_EQ(0, pthread_rwlock_unlock(&helper.lock));
+  ASSERT_EQ(0, pthread_join(writer_thread, NULL));
+  ASSERT_EQ(0, pthread_join(reader_thread, NULL));
+}
+
 static int g_once_fn_call_count = 0;
 static void OnceFn() {
   ++g_once_fn_call_count;