Merge "Support loading shared libraries from zip files"
diff --git a/benchmarks/pthread_benchmark.cpp b/benchmarks/pthread_benchmark.cpp
index 2f6572d..ad31e7e 100644
--- a/benchmarks/pthread_benchmark.cpp
+++ b/benchmarks/pthread_benchmark.cpp
@@ -121,8 +121,8 @@
   StopBenchmarkTiming();
 }
 
-BENCHMARK_NO_ARG(BM_pthread_rw_lock_read);
-void BM_pthread_rw_lock_read::Run(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_rwlock_read);
+void BM_pthread_rwlock_read::Run(int iters) {
   StopBenchmarkTiming();
   pthread_rwlock_t lock;
   pthread_rwlock_init(&lock, NULL);
@@ -137,8 +137,8 @@
   pthread_rwlock_destroy(&lock);
 }
 
-BENCHMARK_NO_ARG(BM_pthread_rw_lock_write);
-void BM_pthread_rw_lock_write::Run(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_rwlock_write);
+void BM_pthread_rwlock_write::Run(int iters) {
   StopBenchmarkTiming();
   pthread_rwlock_t lock;
   pthread_rwlock_init(&lock, NULL);
diff --git a/libc/bionic/getauxval.cpp b/libc/bionic/getauxval.cpp
index bc41824..22922b9 100644
--- a/libc/bionic/getauxval.cpp
+++ b/libc/bionic/getauxval.cpp
@@ -31,6 +31,7 @@
 #include <sys/auxv.h>
 #include <private/bionic_auxv.h>
 #include <elf.h>
+#include <errno.h>
 
 __LIBC_HIDDEN__ ElfW(auxv_t)* __libc_auxv = NULL;
 
@@ -40,5 +41,6 @@
       return v->a_un.a_val;
     }
   }
+  errno = ENOENT;
   return 0;
 }
diff --git a/libc/bionic/pthread_attr.cpp b/libc/bionic/pthread_attr.cpp
index be1c252..7ad3431 100644
--- a/libc/bionic/pthread_attr.cpp
+++ b/libc/bionic/pthread_attr.cpp
@@ -170,6 +170,11 @@
 int pthread_getattr_np(pthread_t t, pthread_attr_t* attr) {
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(t);
   *attr = thread->attr;
+  // We prefer reading join_state here to setting thread->attr.flags in pthread_detach.
+  // Because data race exists in the latter case.
+  if (atomic_load(&thread->join_state) == THREAD_DETACHED) {
+    attr->flags |= PTHREAD_ATTR_FLAG_DETACHED;
+  }
   // The main thread's stack information is not stored in thread->attr, and we need to
   // collect that at runtime.
   if (thread->tid == getpid()) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 2bca43f..a4bd054 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -86,6 +86,12 @@
 int __init_thread(pthread_internal_t* thread, bool add_to_thread_list) {
   int error = 0;
 
+  if (__predict_true((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) == 0)) {
+    atomic_init(&thread->join_state, THREAD_NOT_JOINED);
+  } else {
+    atomic_init(&thread->join_state, THREAD_DETACHED);
+  }
+
   // Set the scheduling policy/priority of the thread.
   if (thread->attr.sched_policy != SCHED_NORMAL) {
     sched_param param;
@@ -263,7 +269,7 @@
   if (init_errno != 0) {
     // Mark the thread detached and replace its start_routine with a no-op.
     // Letting the thread run is the easiest way to clean up its resources.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
+    atomic_store(&thread->join_state, THREAD_DETACHED);
     thread->start_routine = __do_nothing;
     pthread_mutex_unlock(&thread->startup_handshake_mutex);
     return init_errno;
diff --git a/libc/bionic/pthread_detach.cpp b/libc/bionic/pthread_detach.cpp
index c800660..7ae5eb4 100644
--- a/libc/bionic/pthread_detach.cpp
+++ b/libc/bionic/pthread_detach.cpp
@@ -38,21 +38,18 @@
       return ESRCH;
     }
 
-    if (thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) {
-      return EINVAL; // Already detached.
+    ThreadJoinState old_state = THREAD_NOT_JOINED;
+    while (old_state == THREAD_NOT_JOINED &&
+           !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_DETACHED)) {
     }
-
-    if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
-      return 0; // Already being joined; silently do nothing, like glibc.
-    }
-
-    // If the thread has not exited, we can detach it safely.
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_ZOMBIE) == 0) {
-      thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
-      return 0;
+    switch (old_state) {
+      case THREAD_NOT_JOINED: return 0;
+      case THREAD_JOINED:     return 0;  // Already being joined; silently do nothing, like glibc.
+      case THREAD_DETACHED:   return THREAD_DETACHED;
+      case THREAD_EXITED_NOT_JOINED: break;  // Call pthread_join out of scope of pthread_accessor.
     }
   }
 
-  // The thread is in zombie state, use pthread_join to clean it up.
+  // The thread is in THREAD_EXITED_NOT_JOINED, use pthread_join to clean it up.
   return pthread_join(t, NULL);
 }
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index d0d64b0..81cc67b 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -87,9 +87,12 @@
     thread->alternate_signal_stack = NULL;
   }
 
-  bool free_mapped_space = false;
-  pthread_mutex_lock(&g_thread_list_lock);
-  if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while (old_state == THREAD_NOT_JOINED &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
+  }
+
+  if (old_state == THREAD_DETACHED) {
     // The thread is detached, no one will use pthread_internal_t after pthread_exit.
     // So we can free mapped space, which includes pthread_internal_t and thread stack.
     // First make sure that the kernel does not try to clear the tid field
@@ -97,28 +100,25 @@
     __set_tid_address(NULL);
 
     // pthread_internal_t is freed below with stack, not here.
+    pthread_mutex_lock(&g_thread_list_lock);
     _pthread_internal_remove_locked(thread, false);
-    free_mapped_space = true;
-  } else {
-    // Mark the thread as exiting without freeing pthread_internal_t.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_ZOMBIE;
+    pthread_mutex_unlock(&g_thread_list_lock);
+
+    if (thread->mmap_size != 0) {
+      // We need to free mapped space for detached threads when they exit.
+      // That's not something we can do in C.
+
+      // We don't want to take a signal after we've unmapped the stack.
+      // That's one last thing we can handle in C.
+      sigset_t mask;
+      sigfillset(&mask);
+      sigprocmask(SIG_SETMASK, &mask, NULL);
+
+      _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
+    }
   }
-  pthread_mutex_unlock(&g_thread_list_lock);
 
-  if (free_mapped_space && thread->mmap_size != 0) {
-    // We need to free mapped space for detached threads when they exit.
-    // That's not something we can do in C.
-
-    // We don't want to take a signal after we've unmapped the stack.
-    // That's one last thing we can handle in C.
-    sigset_t mask;
-    sigfillset(&mask);
-    sigprocmask(SIG_SETMASK, &mask, NULL);
-
-    _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
-  } else {
-    // No need to free mapped space. Either there was no space mapped, or it is left for
-    // the pthread_join caller to clean up.
-    __exit(0);
-  }
+  // No need to free mapped space. Either there was no space mapped, or it is left for
+  // the pthread_join caller to clean up.
+  __exit(0);
 }
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 6ace301..8da99dd 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -29,6 +29,7 @@
 #define _PTHREAD_INTERNAL_H_
 
 #include <pthread.h>
+#include <stdatomic.h>
 
 #include "private/bionic_tls.h"
 
@@ -46,6 +47,13 @@
   void* data;
 };
 
+enum ThreadJoinState {
+  THREAD_NOT_JOINED,
+  THREAD_EXITED_NOT_JOINED,
+  THREAD_JOINED,
+  THREAD_DETACHED
+};
+
 struct pthread_internal_t {
   struct pthread_internal_t* next;
   struct pthread_internal_t* prev;
@@ -74,6 +82,8 @@
 
   pthread_attr_t attr;
 
+  _Atomic(ThreadJoinState) join_state;
+
   __pthread_cleanup_t* cleanup_stack;
 
   void* (*start_routine)(void*);
diff --git a/libc/bionic/pthread_join.cpp b/libc/bionic/pthread_join.cpp
index e3350ef..15543b4 100644
--- a/libc/bionic/pthread_join.cpp
+++ b/libc/bionic/pthread_join.cpp
@@ -44,16 +44,15 @@
       return ESRCH;
     }
 
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
+    ThreadJoinState old_state = THREAD_NOT_JOINED;
+    while ((old_state == THREAD_NOT_JOINED || old_state == THREAD_EXITED_NOT_JOINED) &&
+           !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_JOINED)) {
+    }
+
+    if (old_state == THREAD_DETACHED || old_state == THREAD_JOINED) {
       return EINVAL;
     }
 
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) != 0) {
-      return EINVAL;
-    }
-
-    // Okay, looks like we can signal our intention to join.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_JOINED;
     tid = thread->tid;
     tid_ptr = &thread->tid;
   }
diff --git a/libc/bionic/pthread_mutex.cpp b/libc/bionic/pthread_mutex.cpp
index 83d6b54..24066ae 100644
--- a/libc/bionic/pthread_mutex.cpp
+++ b/libc/bionic/pthread_mutex.cpp
@@ -237,7 +237,7 @@
     return 0;
 }
 
-static inline atomic_int* MUTEX_TO_ATOMIC_POINTER(pthread_mutex_t* mutex) {
+static inline atomic_int* get_mutex_value_pointer(pthread_mutex_t* mutex) {
     static_assert(sizeof(atomic_int) == sizeof(mutex->value),
                   "mutex->value should actually be atomic_int in implementation.");
 
@@ -247,7 +247,7 @@
 }
 
 int pthread_mutex_init(pthread_mutex_t* mutex, const pthread_mutexattr_t* attr) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+    atomic_int* mutex_value_ptr = get_mutex_value_pointer(mutex);
 
     if (__predict_true(attr == NULL)) {
         atomic_init(mutex_value_ptr, MUTEX_TYPE_BITS_NORMAL);
@@ -277,6 +277,19 @@
     return 0;
 }
 
+static inline int __pthread_normal_mutex_trylock(atomic_int* mutex_value_ptr, int shared) {
+    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
+    const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+
+    int mvalue = unlocked;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue,
+                                                locked_uncontended,
+                                                memory_order_acquire,
+                                                memory_order_relaxed))) {
+        return 0;
+    }
+    return EBUSY;
+}
 
 /*
  * Lock a mutex of type NORMAL.
@@ -290,25 +303,17 @@
  * "type" value is zero, so the only bits that will be set are the ones in
  * the lock state field.
  */
-static inline void _normal_mutex_lock(atomic_int* mutex_value_ptr, int shared) {
-    /* convenience shortcuts */
-    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
-    const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-
-    // The common case is an unlocked mutex, so we begin by trying to
-    // change the lock's state from unlocked to locked_uncontended.
-    // If exchanged successfully, An acquire fence is required to make
-    // all memory accesses made by other threads visible in current CPU.
-    int mvalue = unlocked;
-    if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue,
-                                                locked_uncontended,
-                                                memory_order_acquire,
-                                                memory_order_relaxed))) {
-        return;
+static inline int __pthread_normal_mutex_lock(atomic_int* mutex_value_ptr, int shared,
+                                              const timespec* abs_timeout_or_null, clockid_t clock) {
+    if (__predict_true(__pthread_normal_mutex_trylock(mutex_value_ptr, shared) == 0)) {
+        return 0;
     }
 
     ScopedTrace trace("Contending for pthread mutex");
 
+    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
+    const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+
     // We want to go to sleep until the mutex is available, which requires
     // promoting it to locked_contended. We need to swap in the new state
     // value and then wait until somebody wakes us up.
@@ -316,20 +321,29 @@
     // If it returns unlocked, we have acquired the lock, otherwise another
     // thread still holds the lock and we should wait again.
     // If lock is acquired, an acquire fence is needed to make all memory accesses
-    // made by other threads visible in current CPU.
-    const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+    // made by other threads visible to the current CPU.
     while (atomic_exchange_explicit(mutex_value_ptr, locked_contended,
                                     memory_order_acquire) != unlocked) {
-
-        __futex_wait_ex(mutex_value_ptr, shared, locked_contended, NULL);
+        timespec ts;
+        timespec* rel_timeout = NULL;
+        if (abs_timeout_or_null != NULL) {
+            rel_timeout = &ts;
+            if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, clock)) {
+                return ETIMEDOUT;
+            }
+        }
+        if (__futex_wait_ex(mutex_value_ptr, shared, locked_contended, rel_timeout) == -ETIMEDOUT) {
+            return ETIMEDOUT;
+        }
     }
+    return 0;
 }
 
 /*
  * Release a mutex of type NORMAL.  The caller is responsible for determining
  * that we are in fact the owner of this lock.
  */
-static inline void _normal_mutex_unlock(atomic_int* mutex_value_ptr, int shared) {
+static inline void __pthread_normal_mutex_unlock(atomic_int* mutex_value_ptr, int shared) {
     const int unlocked         = shared | MUTEX_STATE_BITS_UNLOCKED;
     const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
 
@@ -362,25 +376,13 @@
     }
 }
 
-/* This common inlined function is used to increment the counter of an
- * errorcheck or recursive mutex.
+/* This common inlined function is used to increment the counter of a recursive mutex.
  *
- * For errorcheck mutexes, it will return EDEADLK
- * If the counter overflows, it will return EAGAIN
- * Otherwise, it atomically increments the counter and returns 0
- * after providing an acquire barrier.
+ * If the counter overflows, it will return EAGAIN.
+ * Otherwise, it atomically increments the counter and returns 0.
  *
- * mtype is the current mutex type
- * mvalue is the current mutex value (already loaded)
- * mutex pointers to the mutex.
  */
-static inline __always_inline
-int _recursive_increment(atomic_int* mutex_value_ptr, int mvalue, int mtype) {
-    if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
-        // Trying to re-lock a mutex we already acquired.
-        return EDEADLK;
-    }
-
+static inline int __recursive_increment(atomic_int* mutex_value_ptr, int mvalue) {
     // Detect recursive lock overflow and return EAGAIN.
     // This is safe because only the owner thread can modify the
     // counter bits in the mutex value.
@@ -393,15 +395,13 @@
     // loop to update the counter. The counter will not overflow in the loop,
     // as only the owner thread can change it.
     // The mutex is still locked, so we don't need a release fence.
-    while (!atomic_compare_exchange_weak_explicit(mutex_value_ptr, &mvalue,
-                                                  mvalue + MUTEX_COUNTER_BITS_ONE,
-                                                  memory_order_relaxed,
-                                                  memory_order_relaxed)) { }
+    atomic_fetch_add_explicit(mutex_value_ptr, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
     return 0;
 }
 
-int pthread_mutex_lock(pthread_mutex_t* mutex) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+static int __pthread_mutex_lock_with_timeout(pthread_mutex_t* mutex,
+                                           const timespec* abs_timeout_or_null, clockid_t clock) {
+    atomic_int* mutex_value_ptr = get_mutex_value_pointer(mutex);
 
     int mvalue, mtype, tid, shared;
 
@@ -411,24 +411,28 @@
 
     // Handle common case first.
     if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
-        _normal_mutex_lock(mutex_value_ptr, shared);
-        return 0;
+        return __pthread_normal_mutex_lock(mutex_value_ptr, shared, abs_timeout_or_null, clock);
     }
 
     // Do we already own this recursive or error-check mutex?
     tid = __get_thread()->tid;
-    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
-        return _recursive_increment(mutex_value_ptr, mvalue, mtype);
+    if (tid == MUTEX_OWNER_FROM_BITS(mvalue)) {
+        if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
+            return EDEADLK;
+        }
+        return __recursive_increment(mutex_value_ptr, mvalue);
+    }
 
-    // Add in shared state to avoid extra 'or' operations below.
-    mtype |= shared;
+    const int unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    const int locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+    const int locked_contended   = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
 
     // First, if the mutex is unlocked, try to quickly acquire it.
     // In the optimistic case where this works, set the state to locked_uncontended.
-    if (mvalue == mtype) {
-        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
+    if (mvalue == unlocked) {
+        int newval = MUTEX_OWNER_TO_BITS(tid) | locked_uncontended;
+        // If exchanged successfully, an acquire fence is required to make
+        // all memory accesses made by other threads visible to the current CPU.
         if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue,
                            newval, memory_order_acquire, memory_order_relaxed))) {
             return 0;
@@ -438,16 +442,14 @@
     ScopedTrace trace("Contending for pthread mutex");
 
     while (true) {
-        if (mvalue == mtype) {
-            // If the mutex is unlocked, its value should be 'mtype' and
-            // we try to acquire it by setting its owner and state atomically.
+        if (mvalue == unlocked) {
             // NOTE: We put the state to locked_contended since we _know_ there
             // is contention when we are in this loop. This ensures all waiters
             // will be unlocked.
 
-            int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-            // If exchanged successfully, An acquire fence is required to make
-            // all memory accesses made by other threads visible in current CPU.
+            int newval = MUTEX_OWNER_TO_BITS(tid) | locked_contended;
+            // If exchanged successfully, an acquire fence is required to make
+            // all memory accesses made by other threads visible to the current CPU.
             if (__predict_true(atomic_compare_exchange_weak_explicit(mutex_value_ptr,
                                                                      &mvalue, newval,
                                                                      memory_order_acquire,
@@ -456,8 +458,7 @@
             }
             continue;
         } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
-            // The mutex is already locked by another thread, if the state is locked_uncontended,
-            // we should set it to locked_contended beforing going to sleep. This can make
+            // We should set it to locked_contended beforing going to sleep. This can make
             // sure waiters will be woken up eventually.
 
             int newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue);
@@ -470,14 +471,39 @@
             mvalue = newval;
         }
 
-        // We are in locked_contended state, sleep until someone wake us up.
-        __futex_wait_ex(mutex_value_ptr, shared, mvalue, NULL);
+        // We are in locked_contended state, sleep until someone wakes us up.
+        timespec ts;
+        timespec* rel_timeout = NULL;
+        if (abs_timeout_or_null != NULL) {
+            rel_timeout = &ts;
+            if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, clock)) {
+                return ETIMEDOUT;
+            }
+        }
+        if (__futex_wait_ex(mutex_value_ptr, shared, mvalue, rel_timeout) == -ETIMEDOUT) {
+            return ETIMEDOUT;
+        }
         mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
     }
 }
 
+int pthread_mutex_lock(pthread_mutex_t* mutex) {
+    atomic_int* mutex_value_ptr = get_mutex_value_pointer(mutex);
+
+    int mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
+    int mtype = (mvalue & MUTEX_TYPE_MASK);
+    int shared = (mvalue & MUTEX_SHARED_MASK);
+    // Avoid slowing down fast path of normal mutex lock operation.
+    if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
+      if (__predict_true(__pthread_normal_mutex_trylock(mutex_value_ptr, shared) == 0)) {
+        return 0;
+      }
+    }
+    return __pthread_mutex_lock_with_timeout(mutex, NULL, 0);
+}
+
 int pthread_mutex_unlock(pthread_mutex_t* mutex) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+    atomic_int* mutex_value_ptr = get_mutex_value_pointer(mutex);
 
     int mvalue, mtype, tid, shared;
 
@@ -487,7 +513,7 @@
 
     // Handle common case first.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        _normal_mutex_unlock(mutex_value_ptr, shared);
+        __pthread_normal_mutex_unlock(mutex_value_ptr, shared);
         return 0;
     }
 
@@ -501,10 +527,7 @@
     // lower state bits), use a compare_exchange loop to do it.
     if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) {
         // We still own the mutex, so a release fence is not needed.
-        while (!atomic_compare_exchange_weak_explicit(mutex_value_ptr, &mvalue,
-                                                      mvalue - MUTEX_COUNTER_BITS_ONE,
-                                                      memory_order_relaxed,
-                                                      memory_order_relaxed)) { }
+        atomic_fetch_sub_explicit(mutex_value_ptr, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
         return 0;
     }
 
@@ -514,9 +537,8 @@
     // to awake.
     // A release fence is required to make previous stores visible to next
     // lock owner threads.
-    mvalue = atomic_exchange_explicit(mutex_value_ptr,
-                                      mtype | shared | MUTEX_STATE_BITS_UNLOCKED,
-                                      memory_order_release);
+    const int unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    mvalue = atomic_exchange_explicit(mutex_value_ptr, unlocked, memory_order_release);
     if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
         __futex_wake_ex(mutex_value_ptr, shared, 1);
     }
@@ -525,25 +547,18 @@
 }
 
 int pthread_mutex_trylock(pthread_mutex_t* mutex) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+    atomic_int* mutex_value_ptr = get_mutex_value_pointer(mutex);
 
     int mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
     int mtype  = (mvalue & MUTEX_TYPE_MASK);
     int shared = (mvalue & MUTEX_SHARED_MASK);
 
+    const int unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    const int locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+
     // Handle common case first.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        mvalue = shared | MUTEX_STATE_BITS_UNLOCKED;
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        if (atomic_compare_exchange_strong_explicit(mutex_value_ptr,
-                                                    &mvalue,
-                                                    shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED,
-                                                    memory_order_acquire,
-                                                    memory_order_relaxed)) {
-            return 0;
-        }
-        return EBUSY;
+        return __pthread_normal_mutex_trylock(mutex_value_ptr, shared);
     }
 
     // Do we already own this recursive or error-check mutex?
@@ -552,19 +567,17 @@
         if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
             return EBUSY;
         }
-        return _recursive_increment(mutex_value_ptr, mvalue, mtype);
+        return __recursive_increment(mutex_value_ptr, mvalue);
     }
 
     // Same as pthread_mutex_lock, except that we don't want to wait, and
     // the only operation that can succeed is a single compare_exchange to acquire the
     // lock if it is released / not owned by anyone. No need for a complex loop.
-    // If exchanged successfully, An acquire fence is required to make
-    // all memory accesses made by other threads visible in current CPU.
-    mtype |= shared | MUTEX_STATE_BITS_UNLOCKED;
-    mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-
-    if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr,
-                                                               &mtype, mvalue,
+    // If exchanged successfully, an acquire fence is required to make
+    // all memory accesses made by other threads visible to the current CPU.
+    mvalue = unlocked;
+    int newval = MUTEX_OWNER_TO_BITS(tid) | locked_uncontended;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, newval,
                                                                memory_order_acquire,
                                                                memory_order_relaxed))) {
         return 0;
@@ -572,112 +585,6 @@
     return EBUSY;
 }
 
-static int __pthread_mutex_timedlock(pthread_mutex_t* mutex, const timespec* abs_ts, clockid_t clock) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
-
-    timespec ts;
-
-    int mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    int mtype  = (mvalue & MUTEX_TYPE_MASK);
-    int shared = (mvalue & MUTEX_SHARED_MASK);
-
-    // Handle common case first.
-    if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
-        const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        const int locked_contended   = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        mvalue = unlocked;
-        if (atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, locked_uncontended,
-                                                    memory_order_acquire, memory_order_relaxed)) {
-            return 0;
-        }
-
-        ScopedTrace trace("Contending for timed pthread mutex");
-
-        // Same as pthread_mutex_lock, except that we can only wait for a specified
-        // time interval. If lock is acquired, an acquire fence is needed to make
-        // all memory accesses made by other threads visible in current CPU.
-        while (atomic_exchange_explicit(mutex_value_ptr, locked_contended,
-                                        memory_order_acquire) != unlocked) {
-            if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
-                return ETIMEDOUT;
-            }
-            __futex_wait_ex(mutex_value_ptr, shared, locked_contended, &ts);
-        }
-
-        return 0;
-    }
-
-    // Do we already own this recursive or error-check mutex?
-    pid_t tid = __get_thread()->tid;
-    if (tid == MUTEX_OWNER_FROM_BITS(mvalue)) {
-        return _recursive_increment(mutex_value_ptr, mvalue, mtype);
-    }
-
-    mtype |= shared;
-
-    // First try a quick lock.
-    if (mvalue == mtype) {
-        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr,
-                                                                   &mvalue, newval,
-                                                                   memory_order_acquire,
-                                                                   memory_order_relaxed))) {
-            return 0;
-        }
-    }
-
-    ScopedTrace trace("Contending for timed pthread mutex");
-
-    // The following implements the same loop as pthread_mutex_lock,
-    // but adds checks to ensure that the operation never exceeds the
-    // absolute expiration time.
-    while (true) {
-        if (mvalue == mtype) { // Unlocked.
-            int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-            // An acquire fence is needed for successful exchange.
-            if (!atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, newval,
-                                                         memory_order_acquire,
-                                                         memory_order_relaxed)) {
-                goto check_time;
-            }
-
-            return 0;
-        } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
-            // The value is locked. If the state is locked_uncontended, we need to switch
-            // it to locked_contended before sleep, so we can get woken up later.
-            int newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue);
-            if (!atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, newval,
-                                                         memory_order_relaxed,
-                                                         memory_order_relaxed)) {
-                goto check_time;
-            }
-            mvalue = newval;
-        }
-
-        if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
-            return ETIMEDOUT;
-        }
-
-        if (__futex_wait_ex(mutex_value_ptr, shared, mvalue, &ts) == -ETIMEDOUT) {
-            return ETIMEDOUT;
-        }
-
-check_time:
-        if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
-            return ETIMEDOUT;
-        }
-        // After futex_wait or time costly timespec_from_absolte_timespec,
-        // we'd better read mvalue again in case it is changed.
-        mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    }
-}
-
 #if !defined(__LP64__)
 extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex, unsigned ms) {
     timespec abs_timeout;
@@ -689,7 +596,7 @@
         abs_timeout.tv_nsec -= NS_PER_S;
     }
 
-    int error = __pthread_mutex_timedlock(mutex, &abs_timeout, CLOCK_MONOTONIC);
+    int error = __pthread_mutex_lock_with_timeout(mutex, &abs_timeout, CLOCK_MONOTONIC);
     if (error == ETIMEDOUT) {
         error = EBUSY;
     }
@@ -698,7 +605,7 @@
 #endif
 
 int pthread_mutex_timedlock(pthread_mutex_t* mutex, const timespec* abs_timeout) {
-    return __pthread_mutex_timedlock(mutex, abs_timeout, CLOCK_REALTIME);
+    return __pthread_mutex_lock_with_timeout(mutex, abs_timeout, CLOCK_REALTIME);
 }
 
 int pthread_mutex_destroy(pthread_mutex_t* mutex) {
@@ -708,7 +615,7 @@
         return error;
     }
 
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+    atomic_int* mutex_value_ptr = get_mutex_value_pointer(mutex);
     atomic_store_explicit(mutex_value_ptr, 0xdead10cc, memory_order_relaxed);
     return 0;
 }
diff --git a/libc/include/paths.h b/libc/include/paths.h
index 33c2eee..82c2804 100644
--- a/libc/include/paths.h
+++ b/libc/include/paths.h
@@ -32,18 +32,13 @@
 #ifndef _PATHS_H_
 #define	_PATHS_H_
 
-/* Default search path. */
-#define	_PATH_DEFPATH	"/system/bin:/system/xbin"
-
 #define	_PATH_BSHELL	"/system/bin/sh"
 #define	_PATH_CONSOLE	"/dev/console"
+#define	_PATH_DEFPATH	"/sbin:/vendor/bin:/system/sbin:/system/bin:/system/xbin"
+#define	_PATH_DEV	"/dev/"
 #define	_PATH_DEVNULL	"/dev/null"
 #define	_PATH_KLOG	"/proc/kmsg"
-
 #define	_PATH_MOUNTED	"/proc/mounts"
 #define	_PATH_TTY	"/dev/tty"
 
-/* Provide trailing slash, since mostly used for building pathnames. */
-#define	_PATH_DEV	"/dev/"
-
 #endif /* !_PATHS_H_ */
diff --git a/tests/getauxval_test.cpp b/tests/getauxval_test.cpp
index b331150..6ce00f1 100644
--- a/tests/getauxval_test.cpp
+++ b/tests/getauxval_test.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <errno.h>
 #include <sys/cdefs.h>
 #include <gtest/gtest.h>
 
@@ -53,7 +54,9 @@
 
 TEST(getauxval, unexpected_values) {
 #if defined(GETAUXVAL_CAN_COMPILE)
+  errno = 0;
   ASSERT_EQ((unsigned long int) 0, getauxval(0xdeadbeef));
+  ASSERT_EQ(ENOENT, errno);
 #else
   GTEST_LOG_(INFO) << "This test does nothing.\n";
 #endif
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index d3b5332..de60f28 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -732,8 +732,10 @@
   pthread_t thread;
   ASSERT_EQ(0, pthread_create(&thread, NULL,
     reinterpret_cast<void* (*)(void*)>(pthread_rwlock_reader_wakeup_writer_helper), &wakeup_arg));
-  sleep(1);
-  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_WAITING, wakeup_arg.progress);
+  while (wakeup_arg.progress != RwlockWakeupHelperArg::LOCK_WAITING) {
+    usleep(5000);
+  }
+  usleep(5000);
   wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_RELEASED;
   ASSERT_EQ(0, pthread_rwlock_unlock(&wakeup_arg.lock));
 
@@ -763,8 +765,10 @@
   pthread_t thread;
   ASSERT_EQ(0, pthread_create(&thread, NULL,
     reinterpret_cast<void* (*)(void*)>(pthread_rwlock_writer_wakeup_reader_helper), &wakeup_arg));
-  sleep(1);
-  ASSERT_EQ(RwlockWakeupHelperArg::LOCK_WAITING, wakeup_arg.progress);
+  while (wakeup_arg.progress != RwlockWakeupHelperArg::LOCK_WAITING) {
+    usleep(5000);
+  }
+  usleep(5000);
   wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_RELEASED;
   ASSERT_EQ(0, pthread_rwlock_unlock(&wakeup_arg.lock));
 
@@ -1177,15 +1181,21 @@
   ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
   ASSERT_EQ(0, pthread_mutexattr_gettype(&attr, &attr_type));
   ASSERT_EQ(PTHREAD_MUTEX_RECURSIVE, attr_type);
+
+  ASSERT_EQ(0, pthread_mutexattr_destroy(&attr));
+}
+
+static void CreateMutex(pthread_mutex_t& mutex, int mutex_type) {
+  pthread_mutexattr_t attr;
+  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
+  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, mutex_type));
+  ASSERT_EQ(0, pthread_mutex_init(&mutex, &attr));
+  ASSERT_EQ(0, pthread_mutexattr_destroy(&attr));
 }
 
 TEST(pthread, pthread_mutex_lock_NORMAL) {
-  pthread_mutexattr_t attr;
-  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
-  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL));
-
   pthread_mutex_t lock;
-  ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
+  CreateMutex(lock, PTHREAD_MUTEX_NORMAL);
 
   ASSERT_EQ(0, pthread_mutex_lock(&lock));
   ASSERT_EQ(0, pthread_mutex_unlock(&lock));
@@ -1193,12 +1203,8 @@
 }
 
 TEST(pthread, pthread_mutex_lock_ERRORCHECK) {
-  pthread_mutexattr_t attr;
-  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
-  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
-
   pthread_mutex_t lock;
-  ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
+  CreateMutex(lock, PTHREAD_MUTEX_ERRORCHECK);
 
   ASSERT_EQ(0, pthread_mutex_lock(&lock));
   ASSERT_EQ(EDEADLK, pthread_mutex_lock(&lock));
@@ -1211,12 +1217,8 @@
 }
 
 TEST(pthread, pthread_mutex_lock_RECURSIVE) {
-  pthread_mutexattr_t attr;
-  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
-  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
-
   pthread_mutex_t lock;
-  ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
+  CreateMutex(lock, PTHREAD_MUTEX_RECURSIVE);
 
   ASSERT_EQ(0, pthread_mutex_lock(&lock));
   ASSERT_EQ(0, pthread_mutex_lock(&lock));
@@ -1228,6 +1230,66 @@
   ASSERT_EQ(0, pthread_mutex_destroy(&lock));
 }
 
+class MutexWakeupHelper {
+ private:
+  pthread_mutex_t mutex;
+  enum Progress {
+    LOCK_INITIALIZED,
+    LOCK_WAITING,
+    LOCK_RELEASED,
+    LOCK_ACCESSED
+  };
+  std::atomic<Progress> progress;
+
+  static void thread_fn(MutexWakeupHelper* helper) {
+    ASSERT_EQ(LOCK_INITIALIZED, helper->progress);
+    helper->progress = LOCK_WAITING;
+
+    ASSERT_EQ(0, pthread_mutex_lock(&helper->mutex));
+    ASSERT_EQ(LOCK_RELEASED, helper->progress);
+    ASSERT_EQ(0, pthread_mutex_unlock(&helper->mutex));
+
+    helper->progress = LOCK_ACCESSED;
+  }
+
+ public:
+  void test(int mutex_type) {
+    CreateMutex(mutex, mutex_type);
+    ASSERT_EQ(0, pthread_mutex_lock(&mutex));
+    progress = LOCK_INITIALIZED;
+
+    pthread_t thread;
+    ASSERT_EQ(0, pthread_create(&thread, NULL,
+      reinterpret_cast<void* (*)(void*)>(MutexWakeupHelper::thread_fn), this));
+
+    while (progress != LOCK_WAITING) {
+      usleep(5000);
+    }
+    usleep(5000);
+    progress = LOCK_RELEASED;
+    ASSERT_EQ(0, pthread_mutex_unlock(&mutex));
+
+    ASSERT_EQ(0, pthread_join(thread, NULL));
+    ASSERT_EQ(LOCK_ACCESSED, progress);
+    ASSERT_EQ(0, pthread_mutex_destroy(&mutex));
+  }
+};
+
+TEST(pthread, pthread_mutex_NORMAL_wakeup) {
+  MutexWakeupHelper helper;
+  helper.test(PTHREAD_MUTEX_NORMAL);
+}
+
+TEST(pthread, pthread_mutex_ERRORCHECK_wakeup) {
+  MutexWakeupHelper helper;
+  helper.test(PTHREAD_MUTEX_ERRORCHECK);
+}
+
+TEST(pthread, pthread_mutex_RECURSIVE_wakeup) {
+  MutexWakeupHelper helper;
+  helper.test(PTHREAD_MUTEX_RECURSIVE);
+}
+
 TEST(pthread, pthread_mutex_owner_tid_limit) {
   FILE* fp = fopen("/proc/sys/kernel/pid_max", "r");
   ASSERT_TRUE(fp != NULL);