Merge "Allow building libc long double code with clang/llvm."
diff --git a/README.md b/README.md
index 2c42b3b..79bb72a 100644
--- a/README.md
+++ b/README.md
@@ -208,19 +208,17 @@
 The host tests require that you have `lunch`ed either an x86 or x86_64 target.
 
     $ mma
-    # 64-bit tests for 64-bit targets, 32-bit otherwise.
-    $ mm bionic-unit-tests-run-on-host
-    # Only exists for 64-bit targets.
     $ mm bionic-unit-tests-run-on-host32
+    $ mm bionic-unit-tests-run-on-host64  # For 64-bit *targets* only.
 
 ### Against glibc
 
 As a way to check that our tests do in fact test the correct behavior (and not
 just the behavior we think is correct), it is possible to run the tests against
-the host's glibc.
+the host's glibc. The executables are already in your path.
 
     $ mma
-    $ bionic-unit-tests-glibc32 # already in your path
+    $ bionic-unit-tests-glibc32
     $ bionic-unit-tests-glibc64
 
 
diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk
index ae0541f..e1580fe 100644
--- a/benchmarks/Android.mk
+++ b/benchmarks/Android.mk
@@ -42,7 +42,7 @@
 LOCAL_CPPFLAGS := $(benchmark_cppflags)
 LOCAL_SRC_FILES := $(benchmarklib_src_files)
 LOCAL_C_INCLUDES := $(benchmark_c_includes)
-LOCAL_STATIC_LIBRARIES := libutils
+LOCAL_STATIC_LIBRARIES := libbase
 include $(BUILD_STATIC_LIBRARY)
 
 # Only supported on linux systems.
@@ -55,7 +55,7 @@
 LOCAL_SRC_FILES := $(benchmarklib_src_files)
 LOCAL_C_INCLUDES := $(benchmark_c_includes)
 LOCAL_MULTILIB := both
-LOCAL_STATIC_LIBRARIES := libutils
+LOCAL_STATIC_LIBRARIES := libbase
 include $(BUILD_HOST_STATIC_LIBRARY)
 
 endif
@@ -84,7 +84,7 @@
 LOCAL_CFLAGS := $(benchmark_cflags)
 LOCAL_CPPFLAGS := $(benchmark_cppflags)
 LOCAL_SRC_FILES := $(benchmark_src_files)
-LOCAL_STATIC_LIBRARIES := libbenchmark libutils
+LOCAL_STATIC_LIBRARIES := libbenchmark libbase
 include $(BUILD_EXECUTABLE)
 
 # We don't build a static benchmark executable because it's not usually
@@ -106,7 +106,7 @@
 LOCAL_CPPFLAGS := $(benchmark_cppflags)
 LOCAL_LDFLAGS := -lrt
 LOCAL_SRC_FILES := $(benchmark_src_files)
-LOCAL_STATIC_LIBRARIES := libbenchmark libutils
+LOCAL_STATIC_LIBRARIES := libbenchmark libbase
 include $(BUILD_HOST_EXECUTABLE)
 
 endif
diff --git a/benchmarks/Benchmark.cpp b/benchmarks/Benchmark.cpp
index 5ca1d47..ea6000f 100644
--- a/benchmarks/Benchmark.cpp
+++ b/benchmarks/Benchmark.cpp
@@ -24,7 +24,7 @@
 #include <string>
 #include <vector>
 
-#include <utils/stringprintf.h>
+#include <base/stringprintf.h>
 
 #include <benchmark/Benchmark.h>
 
@@ -108,7 +108,7 @@
 
 template <>
 std::string BenchmarkWithArg<double>::GetNameStr(double arg) {
-  return Name() + "/" + android::StringPrintf("%0.6f", arg);
+  return Name() + "/" + android::base::StringPrintf("%0.6f", arg);
 }
 
 template<typename T>
diff --git a/benchmarks/pthread_benchmark.cpp b/benchmarks/pthread_benchmark.cpp
index 2f6572d..ad31e7e 100644
--- a/benchmarks/pthread_benchmark.cpp
+++ b/benchmarks/pthread_benchmark.cpp
@@ -121,8 +121,8 @@
   StopBenchmarkTiming();
 }
 
-BENCHMARK_NO_ARG(BM_pthread_rw_lock_read);
-void BM_pthread_rw_lock_read::Run(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_rwlock_read);
+void BM_pthread_rwlock_read::Run(int iters) {
   StopBenchmarkTiming();
   pthread_rwlock_t lock;
   pthread_rwlock_init(&lock, NULL);
@@ -137,8 +137,8 @@
   pthread_rwlock_destroy(&lock);
 }
 
-BENCHMARK_NO_ARG(BM_pthread_rw_lock_write);
-void BM_pthread_rw_lock_write::Run(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_rwlock_write);
+void BM_pthread_rwlock_write::Run(int iters) {
   StopBenchmarkTiming();
   pthread_rwlock_t lock;
   pthread_rwlock_init(&lock, NULL);
diff --git a/libc/Android.mk b/libc/Android.mk
index 8dbdc75..4a199e7 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -63,6 +63,7 @@
     stdio/sprintf.c \
     stdio/stdio.c \
     stdio/stdio_ext.cpp \
+    stdlib/exit.c \
 
 # Fortify implementations of libc functions.
 libc_common_src_files += \
@@ -70,7 +71,11 @@
     bionic/__fgets_chk.cpp \
     bionic/__memmove_chk.cpp \
     bionic/__poll_chk.cpp \
+    bionic/__pread64_chk.cpp \
+    bionic/__pread_chk.cpp \
     bionic/__read_chk.cpp \
+    bionic/__readlink_chk.cpp \
+    bionic/__readlinkat_chk.cpp \
     bionic/__recvfrom_chk.cpp \
     bionic/__stpcpy_chk.cpp \
     bionic/__stpncpy_chk.cpp \
@@ -480,7 +485,6 @@
     upstream-openbsd/lib/libc/stdlib/atoi.c \
     upstream-openbsd/lib/libc/stdlib/atol.c \
     upstream-openbsd/lib/libc/stdlib/atoll.c \
-    upstream-openbsd/lib/libc/stdlib/exit.c \
     upstream-openbsd/lib/libc/stdlib/getenv.c \
     upstream-openbsd/lib/libc/stdlib/insque.c \
     upstream-openbsd/lib/libc/stdlib/lsearch.c \
@@ -521,7 +525,7 @@
     bionic/pthread_getcpuclockid.cpp \
     bionic/pthread_getschedparam.cpp \
     bionic/pthread_gettid_np.cpp \
-    bionic/pthread_internals.cpp \
+    bionic/pthread_internal.cpp \
     bionic/pthread_join.cpp \
     bionic/pthread_key.cpp \
     bionic/pthread_kill.cpp \
@@ -533,6 +537,9 @@
     bionic/pthread_setschedparam.cpp \
     bionic/pthread_sigmask.cpp \
 
+libc_thread_atexit_impl_src_files := \
+    bionic/__cxa_thread_atexit_impl.cpp \
+
 libc_arch_static_src_files := \
     bionic/dl_iterate_phdr_static.cpp \
 
@@ -987,6 +994,24 @@
 $(eval $(call patch-up-arch-specific-flags,LOCAL_SRC_FILES,libc_bionic_src_files))
 include $(BUILD_STATIC_LIBRARY)
 
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(libc_thread_atexit_impl_src_files)
+LOCAL_CFLAGS := $(libc_common_cflags) -Wframe-larger-than=2048
+
+LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
+LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
+LOCAL_C_INCLUDES := $(libc_common_c_includes)
+LOCAL_MODULE := libc_thread_atexit_impl
+# TODO: Clang tries to use __tls_get_addr which is not supported yet
+# remove after it is implemented.
+LOCAL_CLANG := false
+LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
+LOCAL_CXX_STL := none
+LOCAL_SYSTEM_SHARED_LIBRARIES :=
+LOCAL_ADDRESS_SANITIZER := false
+LOCAL_NATIVE_COVERAGE := $(bionic_coverage)
+
+include $(BUILD_STATIC_LIBRARY)
 
 # ========================================================
 # libc_pthread.a - pthreads parts that previously lived in
@@ -1191,6 +1216,7 @@
     libc_pthread \
     libc_stack_protector \
     libc_syscalls \
+    libc_thread_atexit_impl \
     libc_tzcode \
 
 LOCAL_WHOLE_STATIC_LIBRARIES_arm := libc_aeabi
@@ -1310,9 +1336,6 @@
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
 
-# TODO: This is to work around b/19059885. Remove after root cause is fixed
-LOCAL_LDFLAGS_arm := -Wl,--hash-style=sysv
-
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
 LOCAL_SRC_FILES := \
     $(libc_arch_dynamic_src_files) \
@@ -1322,8 +1345,10 @@
 
 LOCAL_MODULE := libc
 LOCAL_CLANG := $(use_clang)
-LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
 LOCAL_REQUIRED_MODULES := tzdata
+LOCAL_ADDITIONAL_DEPENDENCIES := \
+    $(libc_common_additional_dependencies) \
+    $(LOCAL_PATH)/version_script.txt \
 
 # Leave the symbols in the shared library so that stack unwinders can produce
 # meaningful name resolution.
@@ -1342,10 +1367,17 @@
 LOCAL_CXX_STL := none
 LOCAL_SYSTEM_SHARED_LIBRARIES :=
 
+# Don't re-export new/delete and friends, even if the compiler really wants to.
+LOCAL_LDFLAGS := -Wl,--version-script,$(LOCAL_PATH)/version_script.txt
+
 # We'd really like to do this for all architectures, but since this wasn't done
 # before, these symbols must continue to be exported on LP32 for binary
 # compatibility.
-LOCAL_LDFLAGS_64 := -Wl,--exclude-libs,libgcc.a
+# TODO: disabled for http://b/20065774.
+#LOCAL_LDFLAGS_64 := -Wl,--exclude-libs,libgcc.a
+
+# TODO: This is to work around b/19059885. Remove after root cause is fixed
+LOCAL_LDFLAGS_arm := -Wl,--hash-style=sysv
 
 $(eval $(call patch-up-arch-specific-flags,LOCAL_CFLAGS,libc_common_cflags))
 $(eval $(call patch-up-arch-specific-flags,LOCAL_SRC_FILES,libc_arch_dynamic_src_files))
@@ -1394,7 +1426,9 @@
 
 LOCAL_MODULE := libc_malloc_debug_leak
 LOCAL_CLANG := $(use_clang)
-LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
+LOCAL_ADDITIONAL_DEPENDENCIES := \
+    $(libc_common_additional_dependencies) \
+    $(LOCAL_PATH)/version_script.txt \
 
 LOCAL_SHARED_LIBRARIES := libc libdl
 LOCAL_CXX_STL := none
@@ -1405,6 +1439,9 @@
 LOCAL_STATIC_LIBRARIES += libc++abi
 LOCAL_ALLOW_UNDEFINED_SYMBOLS := true
 
+# Don't re-export new/delete and friends, even if the compiler really wants to.
+LOCAL_LDFLAGS := -Wl,--version-script,$(LOCAL_PATH)/version_script.txt
+
 # Don't install on release build
 LOCAL_MODULE_TAGS := eng debug
 LOCAL_ADDRESS_SANITIZER := false
@@ -1434,12 +1471,17 @@
 
 LOCAL_MODULE := libc_malloc_debug_qemu
 LOCAL_CLANG := $(use_clang)
-LOCAL_ADDITIONAL_DEPENDENCIES := $(libc_common_additional_dependencies)
+LOCAL_ADDITIONAL_DEPENDENCIES := \
+    $(libc_common_additional_dependencies) \
+    $(LOCAL_PATH)/version_script.txt \
 
 LOCAL_SHARED_LIBRARIES := libc libdl
 LOCAL_CXX_STL := none
 LOCAL_SYSTEM_SHARED_LIBRARIES :=
 
+# Don't re-export new/delete and friends, even if the compiler really wants to.
+LOCAL_LDFLAGS := -Wl,--version-script,$(LOCAL_PATH)/version_script.txt
+
 # Don't install on release build
 LOCAL_MODULE_TAGS := eng debug
 LOCAL_ADDRESS_SANITIZER := false
@@ -1465,7 +1507,7 @@
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
 
 # TODO: This is to work around b/19059885. Remove after root cause is fixed
-LOCAL_LDFLAGS_arm := -Wl,--hash-style=both
+LOCAL_LDFLAGS_arm := -Wl,--hash-style=sysv
 
 LOCAL_SRC_FILES := $(libstdcxx_common_src_files)
 LOCAL_MODULE:= libstdc++
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 150dd14..b91f5bf 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -174,9 +174,9 @@
 int __fadvise64:fadvise64(int, off64_t, off64_t, int) arm64,mips,mips64,x86_64
 
 int __fstatfs64:fstatfs64(int, size_t, struct statfs*)  arm,mips,x86
-int fstatfs64|fstatfs:fstatfs(int, struct statfs*)  arm64,mips64,x86_64
+int __fstatfs:fstatfs(int, struct statfs*)  arm64,mips64,x86_64
 int __statfs64:statfs64(const char*, size_t, struct statfs*)  arm,mips,x86
-int statfs64|statfs:statfs(const char*, struct statfs*)  arm64,mips64,x86_64
+int __statfs:statfs(const char*, struct statfs*)  arm64,mips64,x86_64
 
 int     fstat64|fstat:fstat64(int, struct stat*)    arm,mips,x86
 int     fstat64|fstat:fstat(int, struct stat*)    arm64,x86_64
diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk
index d72a160..6ef81bb 100644
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -39,6 +39,7 @@
     arch-arm/bionic/__bionic_clone.S \
     arch-arm/bionic/_exit_with_stack_teardown.S \
     arch-arm/bionic/libgcc_compat.c \
+    arch-arm/bionic/libgcc_protect_unwind.c \
     arch-arm/bionic/__restore.S \
     arch-arm/bionic/setjmp.S \
     arch-arm/bionic/syscall.S \
diff --git a/libc/arch-arm/bionic/libgcc_protect_unwind.c b/libc/arch-arm/bionic/libgcc_protect_unwind.c
new file mode 100644
index 0000000..6d758fc
--- /dev/null
+++ b/libc/arch-arm/bionic/libgcc_protect_unwind.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: This file should go away once unwinder migration to libc++.so is complete.
+
+extern char _Unwind_Backtrace __attribute((visibility("protected")));
+extern char __gnu_Unwind_Find_exidx __attribute((visibility("protected")));
+extern char __gnu_Unwind_Restore_VFP_D __attribute((visibility("protected")));
+extern char __gnu_Unwind_Restore_VFP __attribute((visibility("protected")));
+extern char __gnu_Unwind_Restore_VFP_D_16_to_31 __attribute((visibility("protected")));
+extern char __gnu_Unwind_Restore_WMMXD __attribute((visibility("protected")));
+extern char __gnu_Unwind_Restore_WMMXC __attribute((visibility("protected")));
+extern char _Unwind_GetCFA __attribute((visibility("protected")));
+extern char __gnu_Unwind_RaiseException __attribute((visibility("protected")));
+extern char __gnu_Unwind_ForcedUnwind __attribute((visibility("protected")));
+extern char __gnu_Unwind_Resume __attribute((visibility("protected")));
+extern char __gnu_Unwind_Resume_or_Rethrow __attribute((visibility("protected")));
+extern char _Unwind_Complete __attribute((visibility("protected")));
+extern char _Unwind_DeleteException __attribute((visibility("protected")));
+extern char _Unwind_VRS_Get __attribute((visibility("protected")));
+extern char _Unwind_VRS_Set __attribute((visibility("protected")));
+extern char __gnu_Unwind_Backtrace __attribute((visibility("protected")));
+extern char _Unwind_VRS_Pop __attribute((visibility("protected")));
+extern char __gnu_Unwind_Save_VFP_D __attribute((visibility("protected")));
+extern char __gnu_Unwind_Save_VFP __attribute((visibility("protected")));
+extern char __gnu_Unwind_Save_VFP_D_16_to_31 __attribute((visibility("protected")));
+extern char __gnu_Unwind_Save_WMMXD __attribute((visibility("protected")));
+extern char __gnu_Unwind_Save_WMMXC __attribute((visibility("protected")));
+extern char ___Unwind_RaiseException __attribute((visibility("protected")));
+extern char _Unwind_RaiseException __attribute((visibility("protected")));
+extern char ___Unwind_Resume __attribute((visibility("protected")));
+extern char _Unwind_Resume __attribute((visibility("protected")));
+extern char ___Unwind_Resume_or_Rethrow __attribute((visibility("protected")));
+extern char _Unwind_Resume_or_Rethrow __attribute((visibility("protected")));
+extern char ___Unwind_ForcedUnwind __attribute((visibility("protected")));
+extern char _Unwind_ForcedUnwind __attribute((visibility("protected")));
+extern char ___Unwind_Backtrace __attribute((visibility("protected")));
+extern char _Unwind_GetRegionStart __attribute((visibility("protected")));
+extern char _Unwind_GetLanguageSpecificData __attribute((visibility("protected")));
+extern char _Unwind_GetDataRelBase __attribute((visibility("protected")));
+extern char _Unwind_GetTextRelBase __attribute((visibility("protected")));
+
+void* __bionic_libgcc_unwind_symbols[] = {
+    &_Unwind_Backtrace,
+    &__gnu_Unwind_Find_exidx,
+    &__gnu_Unwind_Restore_VFP_D,
+    &__gnu_Unwind_Restore_VFP,
+    &__gnu_Unwind_Restore_VFP_D_16_to_31,
+    &__gnu_Unwind_Restore_WMMXD,
+    &__gnu_Unwind_Restore_WMMXC,
+    &_Unwind_GetCFA,
+    &__gnu_Unwind_RaiseException,
+    &__gnu_Unwind_ForcedUnwind,
+    &__gnu_Unwind_Resume,
+    &__gnu_Unwind_Resume_or_Rethrow,
+    &_Unwind_Complete,
+    &_Unwind_DeleteException,
+    &_Unwind_VRS_Get,
+    &_Unwind_VRS_Set,
+    &__gnu_Unwind_Backtrace,
+    &_Unwind_VRS_Pop,
+    &__gnu_Unwind_Save_VFP_D,
+    &__gnu_Unwind_Save_VFP,
+    &__gnu_Unwind_Save_VFP_D_16_to_31,
+    &__gnu_Unwind_Save_WMMXD,
+    &__gnu_Unwind_Save_WMMXC,
+    &___Unwind_RaiseException,
+    &_Unwind_RaiseException,
+    &___Unwind_Resume,
+    &_Unwind_Resume,
+    &___Unwind_Resume_or_Rethrow,
+    &_Unwind_Resume_or_Rethrow,
+    &___Unwind_ForcedUnwind,
+    &_Unwind_ForcedUnwind,
+    &___Unwind_Backtrace,
+    &_Unwind_GetRegionStart,
+    &_Unwind_GetLanguageSpecificData,
+    &_Unwind_GetDataRelBase,
+    &_Unwind_GetTextRelBase,
+};
diff --git a/libc/arch-arm/bionic/setjmp.S b/libc/arch-arm/bionic/setjmp.S
index 8d7786c..8220c08 100644
--- a/libc/arch-arm/bionic/setjmp.S
+++ b/libc/arch-arm/bionic/setjmp.S
@@ -169,7 +169,5 @@
   bx lr
 END(siglongjmp)
 
-  .globl longjmp
-  .equ longjmp, siglongjmp
-  .globl _longjmp
-  .equ _longjmp, siglongjmp
+ALIAS_SYMBOL(longjmp, siglongjmp)
+ALIAS_SYMBOL(_longjmp, siglongjmp)
diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk
index cc502cf..6fa3270 100644
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -1,5 +1,4 @@
 libc_bionic_src_files_arm += \
-    arch-arm/generic/bionic/memcmp.S \
     arch-arm/cortex-a15/bionic/memcpy.S \
     arch-arm/cortex-a15/bionic/memset.S \
     arch-arm/cortex-a15/bionic/stpcpy.S \
@@ -10,5 +9,8 @@
     arch-arm/cortex-a15/bionic/__strcpy_chk.S \
     arch-arm/cortex-a15/bionic/strlen.S \
 
-libc_openbsd_src_files_arm += \
-    upstream-openbsd/lib/libc/string/memmove.c \
+libc_bionic_src_files_arm += \
+    arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+    arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk
index 7570567..7b38de1 100644
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -1,5 +1,4 @@
 libc_bionic_src_files_arm += \
-    arch-arm/generic/bionic/memcmp.S \
     arch-arm/cortex-a9/bionic/memcpy.S \
     arch-arm/cortex-a9/bionic/memset.S \
     arch-arm/cortex-a9/bionic/stpcpy.S \
@@ -10,5 +9,8 @@
     arch-arm/cortex-a9/bionic/__strcpy_chk.S \
     arch-arm/cortex-a9/bionic/strlen.S \
 
-libc_openbsd_src_files_arm += \
-    upstream-openbsd/lib/libc/string/memmove.c \
+libc_bionic_src_files_arm += \
+    arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+    arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk
index 8bd5e8b..88b4d66 100644
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -1,5 +1,4 @@
 libc_bionic_src_files_arm += \
-    arch-arm/generic/bionic/memcmp.S \
     arch-arm/krait/bionic/memcpy.S \
     arch-arm/krait/bionic/memset.S \
     arch-arm/krait/bionic/strcmp.S \
@@ -13,5 +12,8 @@
     arch-arm/cortex-a15/bionic/strcpy.S \
     arch-arm/cortex-a15/bionic/strlen.S \
 
-libc_openbsd_src_files_arm += \
-    upstream-openbsd/lib/libc/string/memmove.c \
+libc_bionic_src_files_arm += \
+    arch-arm/generic/bionic/memcmp.S \
+
+libc_bionic_src_files_arm += \
+    arch-arm/denver/bionic/memmove.S \
diff --git a/libc/arch-arm/syscalls/_exit.S b/libc/arch-arm/syscalls/_exit.S
index 328a5ce..77da743 100644
--- a/libc/arch-arm/syscalls/_exit.S
+++ b/libc/arch-arm/syscalls/_exit.S
@@ -13,5 +13,4 @@
     b       __set_errno_internal
 END(_exit)
 
-    .globl _Exit
-    .equ _Exit, _exit
+ALIAS_SYMBOL(_Exit, _exit)
diff --git a/libc/arch-arm/syscalls/fstat64.S b/libc/arch-arm/syscalls/fstat64.S
index c60e7ee..798bba7 100644
--- a/libc/arch-arm/syscalls/fstat64.S
+++ b/libc/arch-arm/syscalls/fstat64.S
@@ -13,5 +13,4 @@
     b       __set_errno_internal
 END(fstat64)
 
-    .globl fstat
-    .equ fstat, fstat64
+ALIAS_SYMBOL(fstat, fstat64)
diff --git a/libc/arch-arm/syscalls/fstatat64.S b/libc/arch-arm/syscalls/fstatat64.S
index ce56c36..03e0052 100644
--- a/libc/arch-arm/syscalls/fstatat64.S
+++ b/libc/arch-arm/syscalls/fstatat64.S
@@ -13,5 +13,4 @@
     b       __set_errno_internal
 END(fstatat64)
 
-    .globl fstatat
-    .equ fstatat, fstatat64
+ALIAS_SYMBOL(fstatat, fstatat64)
diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk
index 470a038..6a2f313 100644
--- a/libc/arch-arm64/arm64.mk
+++ b/libc/arch-arm64/arm64.mk
@@ -40,6 +40,8 @@
     arch-arm64/bionic/syscall.S \
     arch-arm64/bionic/vfork.S \
 
+# Work around for http://b/20065774.
+libc_bionic_src_files_arm64 += arch-arm64/bionic/libgcc_compat.c
 
 libc_crt_target_cflags_arm64 := \
     -I$(LOCAL_PATH)/arch-arm64/include
diff --git a/libc/arch-arm64/bionic/libgcc_compat.c b/libc/arch-arm64/bionic/libgcc_compat.c
new file mode 100644
index 0000000..2dae3f5
--- /dev/null
+++ b/libc/arch-arm64/bionic/libgcc_compat.c
@@ -0,0 +1,15 @@
+/* STOPSHIP: remove this once the flounder blobs have been rebuilt (http://b/20065774). */
+
+#if !defined(__clang__)
+
+extern void __clear_cache(char*, char*);
+extern char _Unwind_Backtrace;
+extern char _Unwind_GetIP;
+
+void* __bionic_libgcc_compat_symbols[] = {
+    &__clear_cache,
+    &_Unwind_Backtrace,
+    &_Unwind_GetIP,
+};
+
+#endif
diff --git a/libc/arch-arm64/bionic/setjmp.S b/libc/arch-arm64/bionic/setjmp.S
index 6e119dc..ba0a226 100644
--- a/libc/arch-arm64/bionic/setjmp.S
+++ b/libc/arch-arm64/bionic/setjmp.S
@@ -146,7 +146,5 @@
   ret
 END(siglongjmp)
 
-  .globl longjmp
-  .equ longjmp, siglongjmp
-  .globl _longjmp
-  .equ _longjmp, siglongjmp
+ALIAS_SYMBOL(longjmp, siglongjmp)
+ALIAS_SYMBOL(_longjmp, siglongjmp)
diff --git a/libc/arch-arm64/syscalls/fstatfs64.S b/libc/arch-arm64/syscalls/__fstatfs.S
similarity index 73%
rename from libc/arch-arm64/syscalls/fstatfs64.S
rename to libc/arch-arm64/syscalls/__fstatfs.S
index 2ca2dcd..7e350d6 100644
--- a/libc/arch-arm64/syscalls/fstatfs64.S
+++ b/libc/arch-arm64/syscalls/__fstatfs.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(fstatfs64)
+ENTRY(__fstatfs)
     mov     x8, __NR_fstatfs
     svc     #0
 
@@ -11,7 +11,5 @@
     b.hi    __set_errno_internal
 
     ret
-END(fstatfs64)
-
-    .globl fstatfs
-    .equ fstatfs, fstatfs64
+END(__fstatfs)
+.hidden __fstatfs
diff --git a/libc/arch-arm64/syscalls/statfs64.S b/libc/arch-arm64/syscalls/__statfs.S
similarity index 74%
rename from libc/arch-arm64/syscalls/statfs64.S
rename to libc/arch-arm64/syscalls/__statfs.S
index ec8c588..962c590 100644
--- a/libc/arch-arm64/syscalls/statfs64.S
+++ b/libc/arch-arm64/syscalls/__statfs.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(statfs64)
+ENTRY(__statfs)
     mov     x8, __NR_statfs
     svc     #0
 
@@ -11,7 +11,5 @@
     b.hi    __set_errno_internal
 
     ret
-END(statfs64)
-
-    .globl statfs
-    .equ statfs, statfs64
+END(__statfs)
+.hidden __statfs
diff --git a/libc/arch-arm64/syscalls/_exit.S b/libc/arch-arm64/syscalls/_exit.S
index edf6744..d50f38d 100644
--- a/libc/arch-arm64/syscalls/_exit.S
+++ b/libc/arch-arm64/syscalls/_exit.S
@@ -13,5 +13,4 @@
     ret
 END(_exit)
 
-    .globl _Exit
-    .equ _Exit, _exit
+ALIAS_SYMBOL(_Exit, _exit)
diff --git a/libc/arch-arm64/syscalls/fallocate.S b/libc/arch-arm64/syscalls/fallocate.S
index ef3d4a4..d42a0ba 100644
--- a/libc/arch-arm64/syscalls/fallocate.S
+++ b/libc/arch-arm64/syscalls/fallocate.S
@@ -13,5 +13,4 @@
     ret
 END(fallocate)
 
-    .globl fallocate64
-    .equ fallocate64, fallocate
+ALIAS_SYMBOL(fallocate64, fallocate)
diff --git a/libc/arch-arm64/syscalls/fstat64.S b/libc/arch-arm64/syscalls/fstat64.S
index 85a07f5..f7c9f54 100644
--- a/libc/arch-arm64/syscalls/fstat64.S
+++ b/libc/arch-arm64/syscalls/fstat64.S
@@ -13,5 +13,4 @@
     ret
 END(fstat64)
 
-    .globl fstat
-    .equ fstat, fstat64
+ALIAS_SYMBOL(fstat, fstat64)
diff --git a/libc/arch-arm64/syscalls/fstatat64.S b/libc/arch-arm64/syscalls/fstatat64.S
index dafd982..9f8f2c5 100644
--- a/libc/arch-arm64/syscalls/fstatat64.S
+++ b/libc/arch-arm64/syscalls/fstatat64.S
@@ -13,5 +13,4 @@
     ret
 END(fstatat64)
 
-    .globl fstatat
-    .equ fstatat, fstatat64
+ALIAS_SYMBOL(fstatat, fstatat64)
diff --git a/libc/arch-arm64/syscalls/ftruncate.S b/libc/arch-arm64/syscalls/ftruncate.S
index c6e99f5..c21e098 100644
--- a/libc/arch-arm64/syscalls/ftruncate.S
+++ b/libc/arch-arm64/syscalls/ftruncate.S
@@ -13,5 +13,4 @@
     ret
 END(ftruncate)
 
-    .globl ftruncate64
-    .equ ftruncate64, ftruncate
+ALIAS_SYMBOL(ftruncate64, ftruncate)
diff --git a/libc/arch-arm64/syscalls/getrlimit.S b/libc/arch-arm64/syscalls/getrlimit.S
index 518ab73..03ee9a8 100644
--- a/libc/arch-arm64/syscalls/getrlimit.S
+++ b/libc/arch-arm64/syscalls/getrlimit.S
@@ -13,5 +13,4 @@
     ret
 END(getrlimit)
 
-    .globl getrlimit64
-    .equ getrlimit64, getrlimit
+ALIAS_SYMBOL(getrlimit64, getrlimit)
diff --git a/libc/arch-arm64/syscalls/lseek.S b/libc/arch-arm64/syscalls/lseek.S
index de96df0..93afeb7 100644
--- a/libc/arch-arm64/syscalls/lseek.S
+++ b/libc/arch-arm64/syscalls/lseek.S
@@ -13,5 +13,4 @@
     ret
 END(lseek)
 
-    .globl lseek64
-    .equ lseek64, lseek
+ALIAS_SYMBOL(lseek64, lseek)
diff --git a/libc/arch-arm64/syscalls/mmap.S b/libc/arch-arm64/syscalls/mmap.S
index 64b955e..65371bc 100644
--- a/libc/arch-arm64/syscalls/mmap.S
+++ b/libc/arch-arm64/syscalls/mmap.S
@@ -13,5 +13,4 @@
     ret
 END(mmap)
 
-    .globl mmap64
-    .equ mmap64, mmap
+ALIAS_SYMBOL(mmap64, mmap)
diff --git a/libc/arch-arm64/syscalls/pread64.S b/libc/arch-arm64/syscalls/pread64.S
index eafc044..6c9f0e9 100644
--- a/libc/arch-arm64/syscalls/pread64.S
+++ b/libc/arch-arm64/syscalls/pread64.S
@@ -13,5 +13,4 @@
     ret
 END(pread64)
 
-    .globl pread
-    .equ pread, pread64
+ALIAS_SYMBOL(pread, pread64)
diff --git a/libc/arch-arm64/syscalls/prlimit64.S b/libc/arch-arm64/syscalls/prlimit64.S
index 2bece99..9c018ba 100644
--- a/libc/arch-arm64/syscalls/prlimit64.S
+++ b/libc/arch-arm64/syscalls/prlimit64.S
@@ -13,5 +13,4 @@
     ret
 END(prlimit64)
 
-    .globl prlimit
-    .equ prlimit, prlimit64
+ALIAS_SYMBOL(prlimit, prlimit64)
diff --git a/libc/arch-arm64/syscalls/pwrite64.S b/libc/arch-arm64/syscalls/pwrite64.S
index 6970954..1599c14 100644
--- a/libc/arch-arm64/syscalls/pwrite64.S
+++ b/libc/arch-arm64/syscalls/pwrite64.S
@@ -13,5 +13,4 @@
     ret
 END(pwrite64)
 
-    .globl pwrite
-    .equ pwrite, pwrite64
+ALIAS_SYMBOL(pwrite, pwrite64)
diff --git a/libc/arch-arm64/syscalls/sendfile.S b/libc/arch-arm64/syscalls/sendfile.S
index 17a0d46..50ac12d 100644
--- a/libc/arch-arm64/syscalls/sendfile.S
+++ b/libc/arch-arm64/syscalls/sendfile.S
@@ -13,5 +13,4 @@
     ret
 END(sendfile)
 
-    .globl sendfile64
-    .equ sendfile64, sendfile
+ALIAS_SYMBOL(sendfile64, sendfile)
diff --git a/libc/arch-arm64/syscalls/setrlimit.S b/libc/arch-arm64/syscalls/setrlimit.S
index 6cb6b98..52c75a1 100644
--- a/libc/arch-arm64/syscalls/setrlimit.S
+++ b/libc/arch-arm64/syscalls/setrlimit.S
@@ -13,5 +13,4 @@
     ret
 END(setrlimit)
 
-    .globl setrlimit64
-    .equ setrlimit64, setrlimit
+ALIAS_SYMBOL(setrlimit64, setrlimit)
diff --git a/libc/arch-arm64/syscalls/truncate.S b/libc/arch-arm64/syscalls/truncate.S
index 0e5a33e..e01cc7d 100644
--- a/libc/arch-arm64/syscalls/truncate.S
+++ b/libc/arch-arm64/syscalls/truncate.S
@@ -13,5 +13,4 @@
     ret
 END(truncate)
 
-    .globl truncate64
-    .equ truncate64, truncate
+ALIAS_SYMBOL(truncate64, truncate)
diff --git a/libc/arch-mips/bionic/setjmp.S b/libc/arch-mips/bionic/setjmp.S
index 1c26553..bed9562 100644
--- a/libc/arch-mips/bionic/setjmp.S
+++ b/libc/arch-mips/bionic/setjmp.S
@@ -352,12 +352,5 @@
 	jal	abort
 END(siglongjmp)
 
-
-	.globl	longjmp
-	.type	longjmp, @function
-	.equ	longjmp, siglongjmp	# alias for siglongjmp
-
-
-	.globl	_longjmp
-	.type	_longjmp, @function
-	.equ	_longjmp, siglongjmp	# alias for siglongjmp
+ALIAS_SYMBOL(longjmp, siglongjmp)
+ALIAS_SYMBOL(_longjmp, siglongjmp)
diff --git a/libc/arch-mips/string/memset.S b/libc/arch-mips/string/memset.S
index 3e630ca..09b756b 100644
--- a/libc/arch-mips/string/memset.S
+++ b/libc/arch-mips/string/memset.S
@@ -67,86 +67,6 @@
 #define DBG
 #endif
 
-/*
- * void _memset16(uint16_t* dst, uint16_t value, size_t size);
- */
-
-LEAF(_memset16,0)
-	.set noreorder
-DBG	/* Check parameters */
-DBG	andi	t0,a0,1			# a0 must be halfword aligned
-DBG	tne	t0,zero
-DBG	andi	t2,a2,1			# a2 must be even
-DBG	tne	t2,zero
-
-#ifdef FIXARGS
-	# ensure count is even
-#if (__mips==32) && (__mips_isa_rev>=2)
-	ins	a2,zero,0,1
-#else
-	ori	a2,1
-	xori	a2,1
-#endif
-#endif
-
-#if (__mips==32) && (__mips_isa_rev>=2)
-	ins	a1,a1,16,16
-#else
-	andi	a1,0xffff
-	sll	t3,a1,16
-	or	a1,t3
-#endif
-
-	beqz	a2,.Ldone
-	 andi	t1,a0,2
-	beqz	t1,.Lalignok
-	 addu	t0,a0,a2		# t0 is the "past the end" address
-	sh	a1,0(a0)		# store one halfword to get aligned
-	addu	a0,2
-	subu	a2,2
-.Lalignok:
-	slti	t1,a2,4			# .Laligned for 4 or more bytes
-	beqz	t1,.Laligned
-	 sne	t1,a2,2			# one more halfword?
-	bnez	t1,.Ldone
-	 nop
-	sh	a1,0(a0)
-.Ldone:
-	j	ra
-	 nop
-	.set reorder
-END(_memset16)
-
-/*
- * void _memset32(uint32_t* dst, uint32_t value, size_t size);
- */
-
-LEAF(_memset32,0)
-	.set noreorder
-DBG	/* Check parameters */
-DBG	andi	t0,a0,3			# a0 must be word aligned
-DBG	tne	t0,zero
-DBG	andi	t2,a2,3			# a2 must be a multiple of 4 bytes
-DBG	tne	t2,zero
-
-#ifdef FIXARGS
-	# ensure count is a multiple of 4
-#if (__mips==32) && (__mips_isa_rev>=2)
-	ins	$a2,$0,0,2
-#else
-	ori	a2,3
-	xori	a2,3
-#endif
-#endif
-
-	bnez	a2,.Laligned		# any work to do?
-	 addu	t0,a0,a2		# t0 is the "past the end" address
-
-	j	ra
-	 nop
-	.set reorder
-END(_memset32)
-
 LEAF(memset,0)
 
 	.set	noreorder
diff --git a/libc/arch-mips/syscalls/_exit.S b/libc/arch-mips/syscalls/_exit.S
index 5ac1324..6e97aac 100644
--- a/libc/arch-mips/syscalls/_exit.S
+++ b/libc/arch-mips/syscalls/_exit.S
@@ -18,5 +18,4 @@
     .set reorder
 END(_exit)
 
-    .globl _Exit
-    .equ _Exit, _exit
+ALIAS_SYMBOL(_Exit, _exit)
diff --git a/libc/arch-mips/syscalls/fstat64.S b/libc/arch-mips/syscalls/fstat64.S
index 525c23c..16e2791 100644
--- a/libc/arch-mips/syscalls/fstat64.S
+++ b/libc/arch-mips/syscalls/fstat64.S
@@ -18,5 +18,4 @@
     .set reorder
 END(fstat64)
 
-    .globl fstat
-    .equ fstat, fstat64
+ALIAS_SYMBOL(fstat, fstat64)
diff --git a/libc/arch-mips/syscalls/fstatat64.S b/libc/arch-mips/syscalls/fstatat64.S
index f7b8e1d..ebcf6b0 100644
--- a/libc/arch-mips/syscalls/fstatat64.S
+++ b/libc/arch-mips/syscalls/fstatat64.S
@@ -18,5 +18,4 @@
     .set reorder
 END(fstatat64)
 
-    .globl fstatat
-    .equ fstatat, fstatat64
+ALIAS_SYMBOL(fstatat, fstatat64)
diff --git a/libc/arch-mips64/string/memset.S b/libc/arch-mips64/string/memset.S
index 3e630ca..09b756b 100644
--- a/libc/arch-mips64/string/memset.S
+++ b/libc/arch-mips64/string/memset.S
@@ -67,86 +67,6 @@
 #define DBG
 #endif
 
-/*
- * void _memset16(uint16_t* dst, uint16_t value, size_t size);
- */
-
-LEAF(_memset16,0)
-	.set noreorder
-DBG	/* Check parameters */
-DBG	andi	t0,a0,1			# a0 must be halfword aligned
-DBG	tne	t0,zero
-DBG	andi	t2,a2,1			# a2 must be even
-DBG	tne	t2,zero
-
-#ifdef FIXARGS
-	# ensure count is even
-#if (__mips==32) && (__mips_isa_rev>=2)
-	ins	a2,zero,0,1
-#else
-	ori	a2,1
-	xori	a2,1
-#endif
-#endif
-
-#if (__mips==32) && (__mips_isa_rev>=2)
-	ins	a1,a1,16,16
-#else
-	andi	a1,0xffff
-	sll	t3,a1,16
-	or	a1,t3
-#endif
-
-	beqz	a2,.Ldone
-	 andi	t1,a0,2
-	beqz	t1,.Lalignok
-	 addu	t0,a0,a2		# t0 is the "past the end" address
-	sh	a1,0(a0)		# store one halfword to get aligned
-	addu	a0,2
-	subu	a2,2
-.Lalignok:
-	slti	t1,a2,4			# .Laligned for 4 or more bytes
-	beqz	t1,.Laligned
-	 sne	t1,a2,2			# one more halfword?
-	bnez	t1,.Ldone
-	 nop
-	sh	a1,0(a0)
-.Ldone:
-	j	ra
-	 nop
-	.set reorder
-END(_memset16)
-
-/*
- * void _memset32(uint32_t* dst, uint32_t value, size_t size);
- */
-
-LEAF(_memset32,0)
-	.set noreorder
-DBG	/* Check parameters */
-DBG	andi	t0,a0,3			# a0 must be word aligned
-DBG	tne	t0,zero
-DBG	andi	t2,a2,3			# a2 must be a multiple of 4 bytes
-DBG	tne	t2,zero
-
-#ifdef FIXARGS
-	# ensure count is a multiple of 4
-#if (__mips==32) && (__mips_isa_rev>=2)
-	ins	$a2,$0,0,2
-#else
-	ori	a2,3
-	xori	a2,3
-#endif
-#endif
-
-	bnez	a2,.Laligned		# any work to do?
-	 addu	t0,a0,a2		# t0 is the "past the end" address
-
-	j	ra
-	 nop
-	.set reorder
-END(_memset32)
-
 LEAF(memset,0)
 
 	.set	noreorder
diff --git a/libc/arch-mips64/syscalls/fstatfs64.S b/libc/arch-mips64/syscalls/__fstatfs.S
similarity index 81%
rename from libc/arch-mips64/syscalls/fstatfs64.S
rename to libc/arch-mips64/syscalls/__fstatfs.S
index 12e885c..8766e22 100644
--- a/libc/arch-mips64/syscalls/fstatfs64.S
+++ b/libc/arch-mips64/syscalls/__fstatfs.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(fstatfs64)
+ENTRY(__fstatfs)
     .set push
     .set noreorder
     li v0, __NR_fstatfs
@@ -22,7 +22,5 @@
     j t9
     move ra, t0
     .set pop
-END(fstatfs64)
-
-    .globl fstatfs
-    .equ fstatfs, fstatfs64
+END(__fstatfs)
+.hidden __fstatfs
diff --git a/libc/arch-mips64/syscalls/statfs64.S b/libc/arch-mips64/syscalls/__statfs.S
similarity index 82%
rename from libc/arch-mips64/syscalls/statfs64.S
rename to libc/arch-mips64/syscalls/__statfs.S
index 74351f7..52db4e2 100644
--- a/libc/arch-mips64/syscalls/statfs64.S
+++ b/libc/arch-mips64/syscalls/__statfs.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(statfs64)
+ENTRY(__statfs)
     .set push
     .set noreorder
     li v0, __NR_statfs
@@ -22,7 +22,5 @@
     j t9
     move ra, t0
     .set pop
-END(statfs64)
-
-    .globl statfs
-    .equ statfs, statfs64
+END(__statfs)
+.hidden __statfs
diff --git a/libc/arch-mips64/syscalls/_exit.S b/libc/arch-mips64/syscalls/_exit.S
index da5a2f7..37f8119 100644
--- a/libc/arch-mips64/syscalls/_exit.S
+++ b/libc/arch-mips64/syscalls/_exit.S
@@ -24,5 +24,4 @@
     .set pop
 END(_exit)
 
-    .globl _Exit
-    .equ _Exit, _exit
+ALIAS_SYMBOL(_Exit, _exit)
diff --git a/libc/arch-mips64/syscalls/fallocate.S b/libc/arch-mips64/syscalls/fallocate.S
index c1ef0ed..14e25a0 100644
--- a/libc/arch-mips64/syscalls/fallocate.S
+++ b/libc/arch-mips64/syscalls/fallocate.S
@@ -24,5 +24,4 @@
     .set pop
 END(fallocate)
 
-    .globl fallocate64
-    .equ fallocate64, fallocate
+ALIAS_SYMBOL(fallocate64, fallocate)
diff --git a/libc/arch-mips64/syscalls/ftruncate.S b/libc/arch-mips64/syscalls/ftruncate.S
index 58b847b..063e8f3 100644
--- a/libc/arch-mips64/syscalls/ftruncate.S
+++ b/libc/arch-mips64/syscalls/ftruncate.S
@@ -24,5 +24,4 @@
     .set pop
 END(ftruncate)
 
-    .globl ftruncate64
-    .equ ftruncate64, ftruncate
+ALIAS_SYMBOL(ftruncate64, ftruncate)
diff --git a/libc/arch-mips64/syscalls/getrlimit.S b/libc/arch-mips64/syscalls/getrlimit.S
index 7576c17..5e2a82a 100644
--- a/libc/arch-mips64/syscalls/getrlimit.S
+++ b/libc/arch-mips64/syscalls/getrlimit.S
@@ -24,5 +24,4 @@
     .set pop
 END(getrlimit)
 
-    .globl getrlimit64
-    .equ getrlimit64, getrlimit
+ALIAS_SYMBOL(getrlimit64, getrlimit)
diff --git a/libc/arch-mips64/syscalls/lseek.S b/libc/arch-mips64/syscalls/lseek.S
index 5c92d70..3bfc29d 100644
--- a/libc/arch-mips64/syscalls/lseek.S
+++ b/libc/arch-mips64/syscalls/lseek.S
@@ -24,5 +24,4 @@
     .set pop
 END(lseek)
 
-    .globl lseek64
-    .equ lseek64, lseek
+ALIAS_SYMBOL(lseek64, lseek)
diff --git a/libc/arch-mips64/syscalls/mmap.S b/libc/arch-mips64/syscalls/mmap.S
index 393271a..cc53eb2 100644
--- a/libc/arch-mips64/syscalls/mmap.S
+++ b/libc/arch-mips64/syscalls/mmap.S
@@ -24,5 +24,4 @@
     .set pop
 END(mmap)
 
-    .globl mmap64
-    .equ mmap64, mmap
+ALIAS_SYMBOL(mmap64, mmap)
diff --git a/libc/arch-mips64/syscalls/pread64.S b/libc/arch-mips64/syscalls/pread64.S
index 90e0612..7965ba9 100644
--- a/libc/arch-mips64/syscalls/pread64.S
+++ b/libc/arch-mips64/syscalls/pread64.S
@@ -24,5 +24,4 @@
     .set pop
 END(pread64)
 
-    .globl pread
-    .equ pread, pread64
+ALIAS_SYMBOL(pread, pread64)
diff --git a/libc/arch-mips64/syscalls/prlimit64.S b/libc/arch-mips64/syscalls/prlimit64.S
index 5f0ba1d..e04a5b6 100644
--- a/libc/arch-mips64/syscalls/prlimit64.S
+++ b/libc/arch-mips64/syscalls/prlimit64.S
@@ -24,5 +24,4 @@
     .set pop
 END(prlimit64)
 
-    .globl prlimit
-    .equ prlimit, prlimit64
+ALIAS_SYMBOL(prlimit, prlimit64)
diff --git a/libc/arch-mips64/syscalls/pwrite64.S b/libc/arch-mips64/syscalls/pwrite64.S
index e34f8db..97e0183 100644
--- a/libc/arch-mips64/syscalls/pwrite64.S
+++ b/libc/arch-mips64/syscalls/pwrite64.S
@@ -24,5 +24,4 @@
     .set pop
 END(pwrite64)
 
-    .globl pwrite
-    .equ pwrite, pwrite64
+ALIAS_SYMBOL(pwrite, pwrite64)
diff --git a/libc/arch-mips64/syscalls/sendfile.S b/libc/arch-mips64/syscalls/sendfile.S
index f330242..a50459e 100644
--- a/libc/arch-mips64/syscalls/sendfile.S
+++ b/libc/arch-mips64/syscalls/sendfile.S
@@ -24,5 +24,4 @@
     .set pop
 END(sendfile)
 
-    .globl sendfile64
-    .equ sendfile64, sendfile
+ALIAS_SYMBOL(sendfile64, sendfile)
diff --git a/libc/arch-mips64/syscalls/setrlimit.S b/libc/arch-mips64/syscalls/setrlimit.S
index 0e5e80e..be6fdc3 100644
--- a/libc/arch-mips64/syscalls/setrlimit.S
+++ b/libc/arch-mips64/syscalls/setrlimit.S
@@ -24,5 +24,4 @@
     .set pop
 END(setrlimit)
 
-    .globl setrlimit64
-    .equ setrlimit64, setrlimit
+ALIAS_SYMBOL(setrlimit64, setrlimit)
diff --git a/libc/arch-mips64/syscalls/truncate.S b/libc/arch-mips64/syscalls/truncate.S
index fb3b7eb..b832796 100644
--- a/libc/arch-mips64/syscalls/truncate.S
+++ b/libc/arch-mips64/syscalls/truncate.S
@@ -24,5 +24,4 @@
     .set pop
 END(truncate)
 
-    .globl truncate64
-    .equ truncate64, truncate
+ALIAS_SYMBOL(truncate64, truncate)
diff --git a/libc/arch-x86/bionic/__bionic_clone.S b/libc/arch-x86/bionic/__bionic_clone.S
index ef78aee..1a6f642 100644
--- a/libc/arch-x86/bionic/__bionic_clone.S
+++ b/libc/arch-x86/bionic/__bionic_clone.S
@@ -3,8 +3,14 @@
 // pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
 ENTRY(__bionic_clone)
         pushl   %ebx
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset ebx, 0
         pushl   %esi
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset esi, 0
         pushl   %edi
+        .cfi_adjust_cfa_offset 4
+        .cfi_rel_offset edi, 0
 
         # Load system call arguments into registers.
         movl    16(%esp), %ebx   # flags
@@ -46,8 +52,14 @@
         # We're the parent; nothing to do.
 .L_bc_return:
         popl    %edi
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore edi
         popl    %esi
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore esi
         popl    %ebx
+        .cfi_adjust_cfa_offset -4
+        .cfi_restore ebx
         ret
 END(__bionic_clone)
 .hidden __bionic_clone
diff --git a/libc/arch-x86/bionic/setjmp.S b/libc/arch-x86/bionic/setjmp.S
index 25a016d..18ad810 100644
--- a/libc/arch-x86/bionic/setjmp.S
+++ b/libc/arch-x86/bionic/setjmp.S
@@ -123,7 +123,5 @@
   ret
 END(siglongjmp)
 
-  .globl longjmp
-  .equ longjmp, siglongjmp
-  .globl _longjmp
-  .equ _longjmp, siglongjmp
+ALIAS_SYMBOL(longjmp, siglongjmp)
+ALIAS_SYMBOL(_longjmp, siglongjmp)
diff --git a/libc/arch-x86/bionic/syscall.S b/libc/arch-x86/bionic/syscall.S
index f85ec39..2a15102 100644
--- a/libc/arch-x86/bionic/syscall.S
+++ b/libc/arch-x86/bionic/syscall.S
@@ -15,9 +15,17 @@
 ENTRY(syscall)
     # Push the callee save registers.
     push    %ebx
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset ebx, 0
     push    %esi
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset esi, 0
     push    %edi
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset edi, 0
     push    %ebp
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset ebp, 0
 
     # Load all the arguments from the calling frame.
     # (Not all will be valid, depending on the syscall.)
@@ -43,8 +51,16 @@
 1:
     # Restore the callee save registers.
     pop    %ebp
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore ebp
     pop    %edi
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore edi
     pop    %esi
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore esi
     pop    %ebx
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore ebx
     ret
 END(syscall)
diff --git a/libc/arch-x86/bionic/vfork.S b/libc/arch-x86/bionic/vfork.S
index 6c02910..ca7af0f 100644
--- a/libc/arch-x86/bionic/vfork.S
+++ b/libc/arch-x86/bionic/vfork.S
@@ -32,6 +32,8 @@
 
 ENTRY(vfork)
   popl    %ecx  // Grab the return address.
+  .cfi_adjust_cfa_offset 4
+  .cfi_rel_offset ecx, 0
   movl    $__NR_vfork, %eax
   int     $0x80
   cmpl    $-MAX_ERRNO, %eax
diff --git a/libc/arch-x86/syscalls/_exit.S b/libc/arch-x86/syscalls/_exit.S
index 8528ee4..9945b35 100644
--- a/libc/arch-x86/syscalls/_exit.S
+++ b/libc/arch-x86/syscalls/_exit.S
@@ -20,5 +20,4 @@
     ret
 END(_exit)
 
-    .globl _Exit
-    .equ _Exit, _exit
+ALIAS_SYMBOL(_Exit, _exit)
diff --git a/libc/arch-x86/syscalls/fstat64.S b/libc/arch-x86/syscalls/fstat64.S
index fc16233..ba385a4 100644
--- a/libc/arch-x86/syscalls/fstat64.S
+++ b/libc/arch-x86/syscalls/fstat64.S
@@ -25,5 +25,4 @@
     ret
 END(fstat64)
 
-    .globl fstat
-    .equ fstat, fstat64
+ALIAS_SYMBOL(fstat, fstat64)
diff --git a/libc/arch-x86/syscalls/fstatat64.S b/libc/arch-x86/syscalls/fstatat64.S
index a3697e6..90e87b6 100644
--- a/libc/arch-x86/syscalls/fstatat64.S
+++ b/libc/arch-x86/syscalls/fstatat64.S
@@ -35,5 +35,4 @@
     ret
 END(fstatat64)
 
-    .globl fstatat
-    .equ fstatat, fstatat64
+ALIAS_SYMBOL(fstatat, fstatat64)
diff --git a/libc/arch-x86_64/bionic/setjmp.S b/libc/arch-x86_64/bionic/setjmp.S
index 28981fa..5559f54 100644
--- a/libc/arch-x86_64/bionic/setjmp.S
+++ b/libc/arch-x86_64/bionic/setjmp.S
@@ -129,7 +129,5 @@
   ret
 END(siglongjmp)
 
-  .globl longjmp
-  .equ longjmp, siglongjmp
-  .globl _longjmp
-  .equ _longjmp, siglongjmp
+ALIAS_SYMBOL(longjmp, siglongjmp)
+ALIAS_SYMBOL(_longjmp, siglongjmp)
diff --git a/libc/arch-x86_64/syscalls/fstatfs64.S b/libc/arch-x86_64/syscalls/__fstatfs.S
similarity index 75%
rename from libc/arch-x86_64/syscalls/fstatfs64.S
rename to libc/arch-x86_64/syscalls/__fstatfs.S
index f727350..b50e355 100644
--- a/libc/arch-x86_64/syscalls/fstatfs64.S
+++ b/libc/arch-x86_64/syscalls/__fstatfs.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(fstatfs64)
+ENTRY(__fstatfs)
     movl    $__NR_fstatfs, %eax
     syscall
     cmpq    $-MAX_ERRNO, %rax
@@ -12,7 +12,5 @@
     call    __set_errno_internal
 1:
     ret
-END(fstatfs64)
-
-    .globl fstatfs
-    .equ fstatfs, fstatfs64
+END(__fstatfs)
+.hidden __fstatfs
diff --git a/libc/arch-x86_64/syscalls/statfs64.S b/libc/arch-x86_64/syscalls/__statfs.S
similarity index 77%
rename from libc/arch-x86_64/syscalls/statfs64.S
rename to libc/arch-x86_64/syscalls/__statfs.S
index 16f6bdd..607a809 100644
--- a/libc/arch-x86_64/syscalls/statfs64.S
+++ b/libc/arch-x86_64/syscalls/__statfs.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(statfs64)
+ENTRY(__statfs)
     movl    $__NR_statfs, %eax
     syscall
     cmpq    $-MAX_ERRNO, %rax
@@ -12,7 +12,5 @@
     call    __set_errno_internal
 1:
     ret
-END(statfs64)
-
-    .globl statfs
-    .equ statfs, statfs64
+END(__statfs)
+.hidden __statfs
diff --git a/libc/arch-x86_64/syscalls/_exit.S b/libc/arch-x86_64/syscalls/_exit.S
index c79091d..1ab4d4f 100644
--- a/libc/arch-x86_64/syscalls/_exit.S
+++ b/libc/arch-x86_64/syscalls/_exit.S
@@ -14,5 +14,4 @@
     ret
 END(_exit)
 
-    .globl _Exit
-    .equ _Exit, _exit
+ALIAS_SYMBOL(_Exit, _exit)
diff --git a/libc/arch-x86_64/syscalls/fallocate.S b/libc/arch-x86_64/syscalls/fallocate.S
index 8307f7e..f6f891b 100644
--- a/libc/arch-x86_64/syscalls/fallocate.S
+++ b/libc/arch-x86_64/syscalls/fallocate.S
@@ -15,5 +15,4 @@
     ret
 END(fallocate)
 
-    .globl fallocate64
-    .equ fallocate64, fallocate
+ALIAS_SYMBOL(fallocate64, fallocate)
diff --git a/libc/arch-x86_64/syscalls/fstat64.S b/libc/arch-x86_64/syscalls/fstat64.S
index de57668..a0d4fa1 100644
--- a/libc/arch-x86_64/syscalls/fstat64.S
+++ b/libc/arch-x86_64/syscalls/fstat64.S
@@ -14,5 +14,4 @@
     ret
 END(fstat64)
 
-    .globl fstat
-    .equ fstat, fstat64
+ALIAS_SYMBOL(fstat, fstat64)
diff --git a/libc/arch-x86_64/syscalls/fstatat64.S b/libc/arch-x86_64/syscalls/fstatat64.S
index 47785bb..1984d68 100644
--- a/libc/arch-x86_64/syscalls/fstatat64.S
+++ b/libc/arch-x86_64/syscalls/fstatat64.S
@@ -15,5 +15,4 @@
     ret
 END(fstatat64)
 
-    .globl fstatat
-    .equ fstatat, fstatat64
+ALIAS_SYMBOL(fstatat, fstatat64)
diff --git a/libc/arch-x86_64/syscalls/ftruncate.S b/libc/arch-x86_64/syscalls/ftruncate.S
index 0365368..7917468 100644
--- a/libc/arch-x86_64/syscalls/ftruncate.S
+++ b/libc/arch-x86_64/syscalls/ftruncate.S
@@ -14,5 +14,4 @@
     ret
 END(ftruncate)
 
-    .globl ftruncate64
-    .equ ftruncate64, ftruncate
+ALIAS_SYMBOL(ftruncate64, ftruncate)
diff --git a/libc/arch-x86_64/syscalls/getrlimit.S b/libc/arch-x86_64/syscalls/getrlimit.S
index 2d272a1..00ed08a 100644
--- a/libc/arch-x86_64/syscalls/getrlimit.S
+++ b/libc/arch-x86_64/syscalls/getrlimit.S
@@ -14,5 +14,4 @@
     ret
 END(getrlimit)
 
-    .globl getrlimit64
-    .equ getrlimit64, getrlimit
+ALIAS_SYMBOL(getrlimit64, getrlimit)
diff --git a/libc/arch-x86_64/syscalls/lseek.S b/libc/arch-x86_64/syscalls/lseek.S
index 153b935..69d60c2 100644
--- a/libc/arch-x86_64/syscalls/lseek.S
+++ b/libc/arch-x86_64/syscalls/lseek.S
@@ -14,5 +14,4 @@
     ret
 END(lseek)
 
-    .globl lseek64
-    .equ lseek64, lseek
+ALIAS_SYMBOL(lseek64, lseek)
diff --git a/libc/arch-x86_64/syscalls/mmap.S b/libc/arch-x86_64/syscalls/mmap.S
index 8aa4780..0c25473 100644
--- a/libc/arch-x86_64/syscalls/mmap.S
+++ b/libc/arch-x86_64/syscalls/mmap.S
@@ -15,5 +15,4 @@
     ret
 END(mmap)
 
-    .globl mmap64
-    .equ mmap64, mmap
+ALIAS_SYMBOL(mmap64, mmap)
diff --git a/libc/arch-x86_64/syscalls/pread64.S b/libc/arch-x86_64/syscalls/pread64.S
index 3aa56e5..eaa47b1 100644
--- a/libc/arch-x86_64/syscalls/pread64.S
+++ b/libc/arch-x86_64/syscalls/pread64.S
@@ -15,5 +15,4 @@
     ret
 END(pread64)
 
-    .globl pread
-    .equ pread, pread64
+ALIAS_SYMBOL(pread, pread64)
diff --git a/libc/arch-x86_64/syscalls/prlimit64.S b/libc/arch-x86_64/syscalls/prlimit64.S
index 63ec492..737b863 100644
--- a/libc/arch-x86_64/syscalls/prlimit64.S
+++ b/libc/arch-x86_64/syscalls/prlimit64.S
@@ -15,5 +15,4 @@
     ret
 END(prlimit64)
 
-    .globl prlimit
-    .equ prlimit, prlimit64
+ALIAS_SYMBOL(prlimit, prlimit64)
diff --git a/libc/arch-x86_64/syscalls/pwrite64.S b/libc/arch-x86_64/syscalls/pwrite64.S
index 2779fb4..edb60af 100644
--- a/libc/arch-x86_64/syscalls/pwrite64.S
+++ b/libc/arch-x86_64/syscalls/pwrite64.S
@@ -15,5 +15,4 @@
     ret
 END(pwrite64)
 
-    .globl pwrite
-    .equ pwrite, pwrite64
+ALIAS_SYMBOL(pwrite, pwrite64)
diff --git a/libc/arch-x86_64/syscalls/sendfile.S b/libc/arch-x86_64/syscalls/sendfile.S
index 117b0aa..c0fa4ee 100644
--- a/libc/arch-x86_64/syscalls/sendfile.S
+++ b/libc/arch-x86_64/syscalls/sendfile.S
@@ -15,5 +15,4 @@
     ret
 END(sendfile)
 
-    .globl sendfile64
-    .equ sendfile64, sendfile
+ALIAS_SYMBOL(sendfile64, sendfile)
diff --git a/libc/arch-x86_64/syscalls/setrlimit.S b/libc/arch-x86_64/syscalls/setrlimit.S
index ef03068..3843ff9 100644
--- a/libc/arch-x86_64/syscalls/setrlimit.S
+++ b/libc/arch-x86_64/syscalls/setrlimit.S
@@ -14,5 +14,4 @@
     ret
 END(setrlimit)
 
-    .globl setrlimit64
-    .equ setrlimit64, setrlimit
+ALIAS_SYMBOL(setrlimit64, setrlimit)
diff --git a/libc/arch-x86_64/syscalls/truncate.S b/libc/arch-x86_64/syscalls/truncate.S
index 2ecd05b..4b953a3 100644
--- a/libc/arch-x86_64/syscalls/truncate.S
+++ b/libc/arch-x86_64/syscalls/truncate.S
@@ -14,5 +14,4 @@
     ret
 END(truncate)
 
-    .globl truncate64
-    .equ truncate64, truncate
+ALIAS_SYMBOL(truncate64, truncate)
diff --git a/libc/bionic/NetdClient.cpp b/libc/bionic/NetdClient.cpp
index 5b0f4fd..b117d72 100644
--- a/libc/bionic/NetdClient.cpp
+++ b/libc/bionic/NetdClient.cpp
@@ -34,7 +34,7 @@
 }
 
 static void netdClientInitImpl() {
-    void* netdClientHandle = dlopen("libnetd_client.so", RTLD_LAZY);
+    void* netdClientHandle = dlopen("libnetd_client.so", RTLD_NOW);
     if (netdClientHandle == NULL) {
         // If the library is not available, it's not an error. We'll just use
         // default implementations of functions that it would've overridden.
diff --git a/libc/bionic/__cxa_thread_atexit_impl.cpp b/libc/bionic/__cxa_thread_atexit_impl.cpp
new file mode 100644
index 0000000..0e427d3
--- /dev/null
+++ b/libc/bionic/__cxa_thread_atexit_impl.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <sys/cdefs.h>
+
+struct thread_local_dtor {
+  void (*func) (void *);
+  void *arg;
+  void *dso_handle; // unused...
+  thread_local_dtor* next;
+};
+
+static __thread thread_local_dtor* thread_local_dtors = nullptr;
+
+extern "C" int __cxa_thread_atexit_impl(void (*func) (void *), void *arg, void *dso_handle) {
+  thread_local_dtor* dtor = new thread_local_dtor();
+
+  dtor->func = func;
+  dtor->arg = arg;
+  dtor->dso_handle = dso_handle;
+  dtor->next = thread_local_dtors;
+
+  thread_local_dtors = dtor;
+
+  return 0;
+}
+
+extern "C" __LIBC_HIDDEN__ void __cxa_thread_finalize() {
+  while (thread_local_dtors != nullptr) {
+    thread_local_dtor* current = thread_local_dtors;
+    thread_local_dtors = current->next;
+
+    current->func(current->arg);
+    delete current;
+  }
+}
diff --git a/libc/bionic/__pread64_chk.cpp b/libc/bionic/__pread64_chk.cpp
new file mode 100644
index 0000000..5d6ad2d
--- /dev/null
+++ b/libc/bionic/__pread64_chk.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#undef _FORTIFY_SOURCE
+#include <unistd.h>
+#include "private/libc_logging.h"
+
+extern "C" ssize_t __pread64_chk(int fd, void* buf, size_t count, off64_t offset, size_t buf_size) {
+  if (__predict_false(count > buf_size)) {
+    __fortify_chk_fail("pread64: prevented write past end of buffer", 0);
+  }
+
+  if (__predict_false(count > SSIZE_MAX)) {
+    __fortify_chk_fail("pread64: count > SSIZE_MAX", 0);
+  }
+
+  return pread64(fd, buf, count, offset);
+}
diff --git a/libc/bionic/__pread_chk.cpp b/libc/bionic/__pread_chk.cpp
new file mode 100644
index 0000000..7109ce6
--- /dev/null
+++ b/libc/bionic/__pread_chk.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#undef _FORTIFY_SOURCE
+#include <unistd.h>
+#include "private/libc_logging.h"
+
+extern "C" ssize_t __pread_chk(int fd, void* buf, size_t count, off_t offset, size_t buf_size) {
+  if (__predict_false(count > buf_size)) {
+    __fortify_chk_fail("pread: prevented write past end of buffer", 0);
+  }
+
+  if (__predict_false(count > SSIZE_MAX)) {
+    __fortify_chk_fail("pread: count > SSIZE_MAX", 0);
+  }
+
+  return pread(fd, buf, count, offset);
+}
diff --git a/libc/bionic/__readlink_chk.cpp b/libc/bionic/__readlink_chk.cpp
new file mode 100644
index 0000000..f19f917
--- /dev/null
+++ b/libc/bionic/__readlink_chk.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#undef _FORTIFY_SOURCE
+#include <unistd.h>
+#include "private/libc_logging.h"
+
+extern "C" ssize_t __readlink_chk(const char* path, char* buf, size_t size, size_t buf_size) {
+  if (__predict_false(size > buf_size)) {
+    __fortify_chk_fail("readlink: prevented write past end of buffer", 0);
+  }
+
+  if (__predict_false(size > SSIZE_MAX)) {
+    __fortify_chk_fail("readlink: size > SSIZE_MAX", 0);
+  }
+
+  return readlink(path, buf, size);
+}
diff --git a/libc/bionic/__readlinkat_chk.cpp b/libc/bionic/__readlinkat_chk.cpp
new file mode 100644
index 0000000..a11db8e
--- /dev/null
+++ b/libc/bionic/__readlinkat_chk.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#undef _FORTIFY_SOURCE
+#include <unistd.h>
+#include "private/libc_logging.h"
+
+extern "C" ssize_t __readlinkat_chk(int dirfd, const char* path, char* buf, size_t size, size_t buf_size) {
+  if (__predict_false(size > buf_size)) {
+    __fortify_chk_fail("readlinkat: prevented write past end of buffer", 0);
+  }
+
+  if (__predict_false(size > SSIZE_MAX)) {
+    __fortify_chk_fail("readlinkat: size > SSIZE_MAX", 0);
+  }
+
+  return readlinkat(dirfd, path, buf, size);
+}
diff --git a/libc/bionic/getauxval.cpp b/libc/bionic/getauxval.cpp
index bc41824..22922b9 100644
--- a/libc/bionic/getauxval.cpp
+++ b/libc/bionic/getauxval.cpp
@@ -31,6 +31,7 @@
 #include <sys/auxv.h>
 #include <private/bionic_auxv.h>
 #include <elf.h>
+#include <errno.h>
 
 __LIBC_HIDDEN__ ElfW(auxv_t)* __libc_auxv = NULL;
 
@@ -40,5 +41,6 @@
       return v->a_un.a_val;
     }
   }
+  errno = ENOENT;
   return 0;
 }
diff --git a/libc/bionic/legacy_32_bit_support.cpp b/libc/bionic/legacy_32_bit_support.cpp
index 73f77be..a107664 100644
--- a/libc/bionic/legacy_32_bit_support.cpp
+++ b/libc/bionic/legacy_32_bit_support.cpp
@@ -26,6 +26,8 @@
  * SUCH DAMAGE.
  */
 
+#undef _FORTIFY_SOURCE
+
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
@@ -40,9 +42,7 @@
 
 // System calls we need.
 extern "C" int __fcntl64(int, int, void*);
-extern "C" int __fstatfs64(int, size_t, struct statfs*);
 extern "C" int __llseek(int, unsigned long, unsigned long, off64_t*, int);
-extern "C" int __statfs64(const char*, size_t, struct statfs*);
 
 // For fcntl we use the fcntl64 system call to signal that we're using struct flock64.
 int fcntl(int fd, int cmd, ...) {
@@ -55,18 +55,6 @@
   return __fcntl64(fd, cmd, arg);
 }
 
-// For fstatfs we need to add the extra argument giving the kernel the size of the buffer.
-int fstatfs(int fd, struct statfs* stat) {
-  return __fstatfs64(fd, sizeof(*stat), stat);
-}
-__strong_alias(fstatfs64, fstatfs);
-
-// For statfs we need to add the extra argument giving the kernel the size of the buffer.
-int statfs(const char* path, struct statfs* stat) {
-  return __statfs64(path, sizeof(*stat), stat);
-}
-__strong_alias(statfs64, statfs);
-
 // For lseek64 we need to use the llseek system call which splits the off64_t in two and
 // returns the off64_t result via a pointer because 32-bit kernels can't return 64-bit results.
 off64_t lseek64(int fd, off64_t off, int whence) {
diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp
index 52ca0f2..36dc085 100644
--- a/libc/bionic/libc_init_common.cpp
+++ b/libc/bionic/libc_init_common.cpp
@@ -92,7 +92,7 @@
   main_thread.attr.stack_size = 0; // User code should never see this; we'll compute it when asked.
   // TODO: the main thread's sched_policy and sched_priority need to be queried.
 
-  __init_thread(&main_thread, false);
+  __init_thread(&main_thread);
   __init_tls(&main_thread);
   __set_tls(main_thread.tls);
   main_thread.tls[TLS_SLOT_BIONIC_PREINIT] = &args;
@@ -113,7 +113,7 @@
 
   // Get the main thread from TLS and add it to the thread list.
   pthread_internal_t* main_thread = __get_thread();
-  _pthread_internal_add(main_thread);
+  __pthread_internal_add(main_thread);
 
   __system_properties_init(); // Requires 'environ'.
 
diff --git a/libc/bionic/libc_logging.cpp b/libc/bionic/libc_logging.cpp
index 2eb9d68..7ad21c4 100644
--- a/libc/bionic/libc_logging.cpp
+++ b/libc/bionic/libc_logging.cpp
@@ -427,7 +427,7 @@
 }
 
 static int __libc_write_stderr(const char* tag, const char* msg) {
-  int fd = TEMP_FAILURE_RETRY(open("/dev/stderr", O_CLOEXEC | O_WRONLY));
+  int fd = TEMP_FAILURE_RETRY(open("/dev/stderr", O_CLOEXEC | O_WRONLY | O_APPEND));
   if (fd == -1) {
     return -1;
   }
diff --git a/libc/bionic/libgen.cpp b/libc/bionic/libgen.cpp
index b98f504..2f29d7b 100644
--- a/libc/bionic/libgen.cpp
+++ b/libc/bionic/libgen.cpp
@@ -36,6 +36,9 @@
 
 #include "private/ThreadLocalBuffer.h"
 
+static ThreadLocalBuffer<char, MAXPATHLEN> g_basename_tls_buffer;
+static ThreadLocalBuffer<char, MAXPATHLEN> g_dirname_tls_buffer;
+
 __LIBC64_HIDDEN__ int basename_r(const char* path, char* buffer, size_t buffer_size) {
   const char* startp = NULL;
   const char* endp = NULL;
@@ -147,17 +150,14 @@
   return result;
 }
 
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(basename);
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(dirname);
-
 char* basename(const char* path) {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, basename, MAXPATHLEN);
-  int rc = basename_r(path, basename_tls_buffer, basename_tls_buffer_size);
-  return (rc < 0) ? NULL : basename_tls_buffer;
+  char* buf = g_basename_tls_buffer.get();
+  int rc = basename_r(path, buf, g_basename_tls_buffer.size());
+  return (rc < 0) ? NULL : buf;
 }
 
 char* dirname(const char* path) {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, dirname, MAXPATHLEN);
-  int rc = dirname_r(path, dirname_tls_buffer, dirname_tls_buffer_size);
-  return (rc < 0) ? NULL : dirname_tls_buffer;
+  char* buf = g_dirname_tls_buffer.get();
+  int rc = dirname_r(path, buf, g_dirname_tls_buffer.size());
+  return (rc < 0) ? NULL : buf;
 }
diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp
index 1a2765a..ee796c6 100644
--- a/libc/bionic/malloc_debug_common.cpp
+++ b/libc/bionic/malloc_debug_common.cpp
@@ -402,7 +402,7 @@
   }
 
   // Load .so that implements the required malloc debugging functionality.
-  void* malloc_impl_handle = dlopen(so_name, RTLD_LAZY);
+  void* malloc_impl_handle = dlopen(so_name, RTLD_NOW);
   if (malloc_impl_handle == NULL) {
     error_log("%s: Missing module %s required for malloc debug level %d: %s",
               getprogname(), so_name, g_malloc_debug_level, dlerror());
diff --git a/libc/bionic/mntent.cpp b/libc/bionic/mntent.cpp
index 4afacda..d169e29 100644
--- a/libc/bionic/mntent.cpp
+++ b/libc/bionic/mntent.cpp
@@ -31,14 +31,13 @@
 
 #include "private/ThreadLocalBuffer.h"
 
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(getmntent_mntent);
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(getmntent_strings);
+static ThreadLocalBuffer<mntent> g_getmntent_mntent_tls_buffer;
+static ThreadLocalBuffer<char, BUFSIZ> g_getmntent_strings_tls_buffer;
 
 mntent* getmntent(FILE* fp) {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(mntent*, getmntent_mntent, sizeof(mntent));
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, getmntent_strings, BUFSIZ);
-  return getmntent_r(fp, getmntent_mntent_tls_buffer,
-                     getmntent_strings_tls_buffer, getmntent_strings_tls_buffer_size);
+  return getmntent_r(fp, g_getmntent_mntent_tls_buffer.get(),
+                     g_getmntent_strings_tls_buffer.get(),
+                     g_getmntent_strings_tls_buffer.size());
 }
 
 mntent* getmntent_r(FILE* fp, struct mntent* e, char* buf, int buf_len) {
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index 109c523..8b34495 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -346,6 +346,14 @@
   return malloc(size);
 }
 
+#define __get_thread __real_get_thread
+#include "pthread_internal.h"
+#undef __get_thread
+// Various third-party apps contain a backport of our pthread_rwlock implementation that uses this.
+extern "C" pthread_internal_t* __get_thread() {
+  return __real_get_thread();
+}
+
 #endif // !defined(__LP64__)
 
 // This is never implemented in bionic, only needed for ABI compatibility with the NDK.
diff --git a/libc/bionic/posix_timers.cpp b/libc/bionic/posix_timers.cpp
index 9991573..bc3aeb2 100644
--- a/libc/bionic/posix_timers.cpp
+++ b/libc/bionic/posix_timers.cpp
@@ -26,14 +26,15 @@
  * SUCH DAMAGE.
  */
 
-#include "pthread_internal.h"
-#include "private/bionic_futex.h"
 #include "private/kernel_sigset_t.h"
 
 #include <errno.h>
 #include <malloc.h>
+#include <pthread.h>
+#include <stdatomic.h>
 #include <stdio.h>
 #include <string.h>
+#include <time.h>
 
 // System calls.
 extern "C" int __rt_sigtimedwait(const sigset_t*, siginfo_t*, const struct timespec*, size_t);
@@ -59,11 +60,11 @@
 
   int sigev_notify;
 
-  // These fields are only needed for a SIGEV_THREAD timer.
+  // The fields below are only needed for a SIGEV_THREAD timer.
   pthread_t callback_thread;
   void (*callback)(sigval_t);
   sigval_t callback_argument;
-  volatile bool armed;
+  atomic_bool deleted;  // Set when the timer is deleted, to prevent further calling of callback.
 };
 
 static __kernel_timer_t to_kernel_timer_id(timer_t timer) {
@@ -85,8 +86,13 @@
       continue;
     }
 
-    if (si.si_code == SI_TIMER && timer->armed) {
+    if (si.si_code == SI_TIMER) {
       // This signal was sent because a timer fired, so call the callback.
+
+      // All events to the callback will be ignored when the timer is deleted.
+      if (atomic_load(&timer->deleted) == true) {
+        continue;
+      }
       timer->callback(timer->callback_argument);
     } else if (si.si_code == SI_TKILL) {
       // This signal was sent because someone wants us to exit.
@@ -97,9 +103,7 @@
 }
 
 static void __timer_thread_stop(PosixTimer* timer) {
-  // Immediately mark the timer as disarmed so even if some events
-  // continue to happen, the callback won't be called.
-  timer->armed = false;
+  atomic_store(&timer->deleted, true);
   pthread_kill(timer->callback_thread, TIMER_SIGNAL);
 }
 
@@ -126,7 +130,7 @@
   // Otherwise, this must be SIGEV_THREAD timer...
   timer->callback = evp->sigev_notify_function;
   timer->callback_argument = evp->sigev_value;
-  timer->armed = false;
+  atomic_init(&timer->deleted, false);
 
   // Check arguments that the kernel doesn't care about but we do.
   if (timer->callback == NULL) {
@@ -199,25 +203,19 @@
   return 0;
 }
 
-// http://pubs.opengroup.org/onlinepubs/9699919799/functions/timer_getoverrun.html
+// http://pubs.opengroup.org/onlinepubs/9699919799/functions/timer_gettime.html
 int timer_gettime(timer_t id, itimerspec* ts) {
   return __timer_gettime(to_kernel_timer_id(id), ts);
 }
 
-// http://pubs.opengroup.org/onlinepubs/9699919799/functions/timer_getoverrun.html
+// http://pubs.opengroup.org/onlinepubs/9699919799/functions/timer_settime.html
+// When using timer_settime to disarm a repeatable SIGEV_THREAD timer with a very small
+// period (like below 1ms), the kernel may continue to send events to the callback thread
+// for a few extra times. This behavior is fine because in POSIX standard: The effect of
+// disarming or resetting a timer with pending expiration notifications is unspecified.
 int timer_settime(timer_t id, int flags, const itimerspec* ts, itimerspec* ots) {
   PosixTimer* timer= reinterpret_cast<PosixTimer*>(id);
-  int rc = __timer_settime(timer->kernel_timer_id, flags, ts, ots);
-  if (rc == 0) {
-    // Mark the timer as either being armed or disarmed. This avoids the
-    // callback being called after the disarm for SIGEV_THREAD timers only.
-    if (ts->it_value.tv_sec != 0 || ts->it_value.tv_nsec != 0) {
-      timer->armed = true;
-    } else {
-      timer->armed = false;
-    }
-  }
-  return rc;
+  return __timer_settime(timer->kernel_timer_id, flags, ts, ots);
 }
 
 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/timer_getoverrun.html
diff --git a/libc/bionic/pthread_accessor.h b/libc/bionic/pthread_accessor.h
deleted file mode 100644
index df4a5a2..0000000
--- a/libc/bionic/pthread_accessor.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PTHREAD_ACCESSOR_H
-#define PTHREAD_ACCESSOR_H
-
-#include <pthread.h>
-
-#include "private/bionic_macros.h"
-#include "pthread_internal.h"
-
-class pthread_accessor {
- public:
-  explicit pthread_accessor(pthread_t desired_thread) {
-    Lock();
-    for (thread_ = g_thread_list; thread_ != NULL; thread_ = thread_->next) {
-      if (thread_ == reinterpret_cast<pthread_internal_t*>(desired_thread)) {
-        break;
-      }
-    }
-  }
-
-  ~pthread_accessor() {
-    Unlock();
-  }
-
-  void Unlock() {
-    if (is_locked_) {
-      is_locked_ = false;
-      thread_ = NULL;
-      pthread_mutex_unlock(&g_thread_list_lock);
-    }
-  }
-
-  pthread_internal_t& operator*() const { return *thread_; }
-  pthread_internal_t* operator->() const { return thread_; }
-  pthread_internal_t* get() const { return thread_; }
-
- private:
-  pthread_internal_t* thread_;
-  bool is_locked_;
-
-  void Lock() {
-    pthread_mutex_lock(&g_thread_list_lock);
-    is_locked_ = true;
-  }
-
-  DISALLOW_COPY_AND_ASSIGN(pthread_accessor);
-};
-
-#endif // PTHREAD_ACCESSOR_H
diff --git a/libc/bionic/pthread_attr.cpp b/libc/bionic/pthread_attr.cpp
index be1c252..7ad3431 100644
--- a/libc/bionic/pthread_attr.cpp
+++ b/libc/bionic/pthread_attr.cpp
@@ -170,6 +170,11 @@
 int pthread_getattr_np(pthread_t t, pthread_attr_t* attr) {
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(t);
   *attr = thread->attr;
+  // We prefer reading join_state here to setting thread->attr.flags in pthread_detach.
+  // Because data race exists in the latter case.
+  if (atomic_load(&thread->join_state) == THREAD_DETACHED) {
+    attr->flags |= PTHREAD_ATTR_FLAG_DETACHED;
+  }
   // The main thread's stack information is not stored in thread->attr, and we need to
   // collect that at runtime.
   if (thread->tid == getpid()) {
diff --git a/libc/bionic/pthread_cond.cpp b/libc/bionic/pthread_cond.cpp
index 5542c59..4a69da5 100644
--- a/libc/bionic/pthread_cond.cpp
+++ b/libc/bionic/pthread_cond.cpp
@@ -41,6 +41,13 @@
 #include "private/bionic_time_conversions.h"
 #include "private/bionic_tls.h"
 
+// XXX *technically* there is a race condition that could allow
+// XXX a signal to be missed.  If thread A is preempted in _wait()
+// XXX after unlocking the mutex and before waiting, and if other
+// XXX threads call signal or broadcast UINT_MAX/2 times (exactly),
+// XXX before thread A is scheduled again and calls futex_wait(),
+// XXX then the signal will be lost.
+
 // We use one bit in pthread_condattr_t (long) values as the 'shared' flag
 // and one bit for the clock type (CLOCK_REALTIME is ((clockid_t) 1), and
 // CLOCK_MONOTONIC is ((clockid_t) 0).). The rest of the bits are a counter.
@@ -57,7 +64,6 @@
 #define COND_GET_CLOCK(c) (((c) & COND_CLOCK_MASK) >> 1)
 #define COND_SET_CLOCK(attr, c) ((attr) | (c << 1))
 
-
 int pthread_condattr_init(pthread_condattr_t* attr) {
   *attr = 0;
   *attr |= PTHREAD_PROCESS_PRIVATE;
@@ -98,47 +104,56 @@
   return 0;
 }
 
-static inline atomic_uint* COND_TO_ATOMIC_POINTER(pthread_cond_t* cond) {
-  static_assert(sizeof(atomic_uint) == sizeof(cond->value),
-                "cond->value should actually be atomic_uint in implementation.");
+struct pthread_cond_internal_t {
+  atomic_uint state;
 
-  // We prefer casting to atomic_uint instead of declaring cond->value to be atomic_uint directly.
-  // Because using the second method pollutes pthread.h, and causes an error when compiling libcxx.
-  return reinterpret_cast<atomic_uint*>(&cond->value);
+  bool process_shared() {
+    return COND_IS_SHARED(atomic_load_explicit(&state, memory_order_relaxed));
+  }
+
+  int get_clock() {
+    return COND_GET_CLOCK(atomic_load_explicit(&state, memory_order_relaxed));
+  }
+
+#if defined(__LP64__)
+  char __reserved[44];
+#endif
+};
+
+static_assert(sizeof(pthread_cond_t) == sizeof(pthread_cond_internal_t),
+              "pthread_cond_t should actually be pthread_cond_internal_t in implementation.");
+
+// For binary compatibility with old version of pthread_cond_t, we can't use more strict alignment
+// than 4-byte alignment.
+static_assert(alignof(pthread_cond_t) == 4,
+              "pthread_cond_t should fulfill the alignment requirement of pthread_cond_internal_t.");
+
+static pthread_cond_internal_t* __get_internal_cond(pthread_cond_t* cond_interface) {
+  return reinterpret_cast<pthread_cond_internal_t*>(cond_interface);
 }
 
-// XXX *technically* there is a race condition that could allow
-// XXX a signal to be missed.  If thread A is preempted in _wait()
-// XXX after unlocking the mutex and before waiting, and if other
-// XXX threads call signal or broadcast UINT_MAX/2 times (exactly),
-// XXX before thread A is scheduled again and calls futex_wait(),
-// XXX then the signal will be lost.
+int pthread_cond_init(pthread_cond_t* cond_interface, const pthread_condattr_t* attr) {
+  pthread_cond_internal_t* cond = __get_internal_cond(cond_interface);
 
-int pthread_cond_init(pthread_cond_t* cond, const pthread_condattr_t* attr) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  unsigned int init_value = 0;
-
+  unsigned int init_state = 0;
   if (attr != NULL) {
-    init_value = (*attr & COND_FLAGS_MASK);
+    init_state = (*attr & COND_FLAGS_MASK);
   }
-  atomic_init(cond_value_ptr, init_value);
+  atomic_init(&cond->state, init_state);
 
   return 0;
 }
 
-int pthread_cond_destroy(pthread_cond_t* cond) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  atomic_store_explicit(cond_value_ptr, 0xdeadc04d, memory_order_relaxed);
+int pthread_cond_destroy(pthread_cond_t* cond_interface) {
+  pthread_cond_internal_t* cond = __get_internal_cond(cond_interface);
+  atomic_store_explicit(&cond->state, 0xdeadc04d, memory_order_relaxed);
   return 0;
 }
 
 // This function is used by pthread_cond_broadcast and
 // pthread_cond_signal to atomically decrement the counter
 // then wake up thread_count threads.
-static int __pthread_cond_pulse(atomic_uint* cond_value_ptr, int thread_count) {
-  unsigned int old_value = atomic_load_explicit(cond_value_ptr, memory_order_relaxed);
-  bool shared = COND_IS_SHARED(old_value);
-
+static int __pthread_cond_pulse(pthread_cond_internal_t* cond, int thread_count) {
   // We don't use a release/seq_cst fence here. Because pthread_cond_wait/signal can't be
   // used as a method for memory synchronization by itself. It should always be used with
   // pthread mutexes. Note that Spurious wakeups from pthread_cond_wait/timedwait may occur,
@@ -149,20 +164,18 @@
   // synchronization. And it doesn't help even if we use any fence here.
 
   // The increase of value should leave flags alone, even if the value can overflows.
-  atomic_fetch_add_explicit(cond_value_ptr, COND_COUNTER_STEP, memory_order_relaxed);
+  atomic_fetch_add_explicit(&cond->state, COND_COUNTER_STEP, memory_order_relaxed);
 
-  __futex_wake_ex(cond_value_ptr, shared, thread_count);
+  __futex_wake_ex(&cond->state, cond->process_shared(), thread_count);
   return 0;
 }
 
-__LIBC_HIDDEN__
-int __pthread_cond_timedwait_relative(atomic_uint* cond_value_ptr, pthread_mutex_t* mutex,
-                                      const timespec* reltime) {
-  unsigned int old_value = atomic_load_explicit(cond_value_ptr, memory_order_relaxed);
-  bool shared = COND_IS_SHARED(old_value);
+static int __pthread_cond_timedwait_relative(pthread_cond_internal_t* cond, pthread_mutex_t* mutex,
+                                             const timespec* rel_timeout_or_null) {
+  unsigned int old_state = atomic_load_explicit(&cond->state, memory_order_relaxed);
 
   pthread_mutex_unlock(mutex);
-  int status = __futex_wait_ex(cond_value_ptr, shared, old_value, reltime);
+  int status = __futex_wait_ex(&cond->state, cond->process_shared(), old_state, rel_timeout_or_null);
   pthread_mutex_lock(mutex);
 
   if (status == -ETIMEDOUT) {
@@ -171,67 +184,68 @@
   return 0;
 }
 
-__LIBC_HIDDEN__
-int __pthread_cond_timedwait(atomic_uint* cond_value_ptr, pthread_mutex_t* mutex,
-                             const timespec* abs_ts, clockid_t clock) {
+static int __pthread_cond_timedwait(pthread_cond_internal_t* cond, pthread_mutex_t* mutex,
+                                    const timespec* abs_timeout_or_null, clockid_t clock) {
   timespec ts;
-  timespec* tsp;
+  timespec* rel_timeout = NULL;
 
-  if (abs_ts != NULL) {
-    if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
+  if (abs_timeout_or_null != NULL) {
+    rel_timeout = &ts;
+    if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, clock)) {
       return ETIMEDOUT;
     }
-    tsp = &ts;
-  } else {
-    tsp = NULL;
   }
 
-  return __pthread_cond_timedwait_relative(cond_value_ptr, mutex, tsp);
+  return __pthread_cond_timedwait_relative(cond, mutex, rel_timeout);
 }
 
-int pthread_cond_broadcast(pthread_cond_t* cond) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_pulse(cond_value_ptr, INT_MAX);
+int pthread_cond_broadcast(pthread_cond_t* cond_interface) {
+  return __pthread_cond_pulse(__get_internal_cond(cond_interface), INT_MAX);
 }
 
-int pthread_cond_signal(pthread_cond_t* cond) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_pulse(cond_value_ptr, 1);
+int pthread_cond_signal(pthread_cond_t* cond_interface) {
+  return __pthread_cond_pulse(__get_internal_cond(cond_interface), 1);
 }
 
-int pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_timedwait(cond_value_ptr, mutex, NULL,
-           COND_GET_CLOCK(atomic_load_explicit(cond_value_ptr, memory_order_relaxed)));
+int pthread_cond_wait(pthread_cond_t* cond_interface, pthread_mutex_t* mutex) {
+  pthread_cond_internal_t* cond = __get_internal_cond(cond_interface);
+  return __pthread_cond_timedwait(cond, mutex, NULL, cond->get_clock());
 }
 
-int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t * mutex, const timespec *abstime) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_timedwait(cond_value_ptr, mutex, abstime,
-           COND_GET_CLOCK(atomic_load_explicit(cond_value_ptr, memory_order_relaxed)));
+int pthread_cond_timedwait(pthread_cond_t *cond_interface, pthread_mutex_t * mutex,
+                           const timespec *abstime) {
+
+  pthread_cond_internal_t* cond = __get_internal_cond(cond_interface);
+  return __pthread_cond_timedwait(cond, mutex, abstime, cond->get_clock());
 }
 
 #if !defined(__LP64__)
 // TODO: this exists only for backward binary compatibility on 32 bit platforms.
-extern "C" int pthread_cond_timedwait_monotonic(pthread_cond_t* cond, pthread_mutex_t* mutex, const timespec* abstime) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_timedwait(cond_value_ptr, mutex, abstime, CLOCK_MONOTONIC);
+extern "C" int pthread_cond_timedwait_monotonic(pthread_cond_t* cond_interface,
+                                                pthread_mutex_t* mutex,
+                                                const timespec* abs_timeout) {
+
+  return __pthread_cond_timedwait(__get_internal_cond(cond_interface), mutex, abs_timeout,
+                                  CLOCK_MONOTONIC);
 }
 
-extern "C" int pthread_cond_timedwait_monotonic_np(pthread_cond_t* cond, pthread_mutex_t* mutex, const timespec* abstime) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_timedwait(cond_value_ptr, mutex, abstime, CLOCK_MONOTONIC);
+extern "C" int pthread_cond_timedwait_monotonic_np(pthread_cond_t* cond_interface,
+                                                   pthread_mutex_t* mutex,
+                                                   const timespec* abs_timeout) {
+  return pthread_cond_timedwait_monotonic(cond_interface, mutex, abs_timeout);
 }
 
-extern "C" int pthread_cond_timedwait_relative_np(pthread_cond_t* cond, pthread_mutex_t* mutex, const timespec* reltime) {
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_timedwait_relative(cond_value_ptr, mutex, reltime);
+extern "C" int pthread_cond_timedwait_relative_np(pthread_cond_t* cond_interface,
+                                                  pthread_mutex_t* mutex,
+                                                  const timespec* rel_timeout) {
+
+  return __pthread_cond_timedwait_relative(__get_internal_cond(cond_interface), mutex, rel_timeout);
 }
 
-extern "C" int pthread_cond_timeout_np(pthread_cond_t* cond, pthread_mutex_t* mutex, unsigned ms) {
+extern "C" int pthread_cond_timeout_np(pthread_cond_t* cond_interface,
+                                       pthread_mutex_t* mutex, unsigned ms) {
   timespec ts;
   timespec_from_ms(ts, ms);
-  atomic_uint* cond_value_ptr = COND_TO_ATOMIC_POINTER(cond);
-  return __pthread_cond_timedwait_relative(cond_value_ptr, mutex, &ts);
+  return pthread_cond_timedwait_relative_np(cond_interface, mutex, &ts);
 }
 #endif // !defined(__LP64__)
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 2bca43f..dbdb180 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -69,13 +69,20 @@
 
 void __init_alternate_signal_stack(pthread_internal_t* thread) {
   // Create and set an alternate signal stack.
-  stack_t ss;
-  ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-  if (ss.ss_sp != MAP_FAILED) {
-    ss.ss_size = SIGSTKSZ;
+  void* stack_base = mmap(NULL, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+  if (stack_base != MAP_FAILED) {
+
+    // Create a guard page to catch stack overflows in signal handlers.
+    if (mprotect(stack_base, PAGE_SIZE, PROT_NONE) == -1) {
+      munmap(stack_base, SIGNAL_STACK_SIZE);
+      return;
+    }
+    stack_t ss;
+    ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PAGE_SIZE;
+    ss.ss_size = SIGNAL_STACK_SIZE - PAGE_SIZE;
     ss.ss_flags = 0;
     sigaltstack(&ss, NULL);
-    thread->alternate_signal_stack = ss.ss_sp;
+    thread->alternate_signal_stack = stack_base;
 
     // We can only use const static allocated string for mapped region name, as Android kernel
     // uses the string pointer directly when dumping /proc/pid/maps.
@@ -83,9 +90,15 @@
   }
 }
 
-int __init_thread(pthread_internal_t* thread, bool add_to_thread_list) {
+int __init_thread(pthread_internal_t* thread) {
   int error = 0;
 
+  if (__predict_true((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) == 0)) {
+    atomic_init(&thread->join_state, THREAD_NOT_JOINED);
+  } else {
+    atomic_init(&thread->join_state, THREAD_DETACHED);
+  }
+
   // Set the scheduling policy/priority of the thread.
   if (thread->attr.sched_policy != SCHED_NORMAL) {
     sched_param param;
@@ -102,10 +115,6 @@
 
   thread->cleanup_stack = NULL;
 
-  if (add_to_thread_list) {
-    _pthread_internal_add(thread);
-  }
-
   return error;
 }
 
@@ -156,15 +165,16 @@
   }
 
   // Mapped space(or user allocated stack) is used for:
-  //   thread_internal_t
+  //   pthread_internal_t
   //   thread stack (including guard page)
-  stack_top -= sizeof(pthread_internal_t);
+
+  // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
+  stack_top = reinterpret_cast<uint8_t*>(
+                (reinterpret_cast<uintptr_t>(stack_top) - sizeof(pthread_internal_t)) & ~0xf);
+
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   attr->stack_size = stack_top - reinterpret_cast<uint8_t*>(attr->stack_base);
 
-  // No need to check stack_top alignment. The size of pthread_internal_t is 16-bytes aligned,
-  // and user allocated stack is guaranteed by pthread_attr_setstack.
-
   thread->mmap_size = mmap_size;
   thread->attr = *attr;
   __init_tls(thread);
@@ -259,18 +269,19 @@
     return clone_errno;
   }
 
-  int init_errno = __init_thread(thread, true);
+  int init_errno = __init_thread(thread);
   if (init_errno != 0) {
     // Mark the thread detached and replace its start_routine with a no-op.
     // Letting the thread run is the easiest way to clean up its resources.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
+    atomic_store(&thread->join_state, THREAD_DETACHED);
+    __pthread_internal_add(thread);
     thread->start_routine = __do_nothing;
     pthread_mutex_unlock(&thread->startup_handshake_mutex);
     return init_errno;
   }
 
   // Publish the pthread_t and unlock the mutex to let the new thread start running.
-  *thread_out = reinterpret_cast<pthread_t>(thread);
+  *thread_out = __pthread_internal_add(thread);
   pthread_mutex_unlock(&thread->startup_handshake_mutex);
 
   return 0;
diff --git a/libc/bionic/pthread_detach.cpp b/libc/bionic/pthread_detach.cpp
index c800660..fb8e0dd 100644
--- a/libc/bionic/pthread_detach.cpp
+++ b/libc/bionic/pthread_detach.cpp
@@ -29,30 +29,24 @@
 #include <errno.h>
 #include <pthread.h>
 
-#include "pthread_accessor.h"
+#include "pthread_internal.h"
 
 int pthread_detach(pthread_t t) {
-  {
-    pthread_accessor thread(t);
-    if (thread.get() == NULL) {
-      return ESRCH;
-    }
-
-    if (thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) {
-      return EINVAL; // Already detached.
-    }
-
-    if (thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) {
-      return 0; // Already being joined; silently do nothing, like glibc.
-    }
-
-    // If the thread has not exited, we can detach it safely.
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_ZOMBIE) == 0) {
-      thread->attr.flags |= PTHREAD_ATTR_FLAG_DETACHED;
-      return 0;
-    }
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
+    return ESRCH;
   }
 
-  // The thread is in zombie state, use pthread_join to clean it up.
-  return pthread_join(t, NULL);
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while (old_state == THREAD_NOT_JOINED &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_DETACHED)) {
+  }
+
+  if (old_state == THREAD_NOT_JOINED) {
+    return 0;
+  } else if (old_state == THREAD_EXITED_NOT_JOINED) {
+    // Use pthread_join to clean it up.
+    return pthread_join(t, NULL);
+  }
+  return EINVAL;
 }
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index d0d64b0..ceda931 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -37,6 +37,7 @@
 extern "C" __noreturn void _exit_with_stack_teardown(void*, size_t);
 extern "C" __noreturn void __exit(int);
 extern "C" int __set_tid_address(int*);
+extern "C" void __cxa_thread_finalize();
 
 /* CAVEAT: our implementation of pthread_cleanup_push/pop doesn't support C++ exceptions
  *         and thread cancelation
@@ -59,10 +60,13 @@
 }
 
 void pthread_exit(void* return_value) {
+  // Call dtors for thread_local objects first.
+  __cxa_thread_finalize();
+
   pthread_internal_t* thread = __get_thread();
   thread->return_value = return_value;
 
-  // Call the cleanup handlers first.
+  // Call the cleanup handlers.
   while (thread->cleanup_stack) {
     __pthread_cleanup_t* c = thread->cleanup_stack;
     thread->cleanup_stack = c->__cleanup_prev;
@@ -83,13 +87,16 @@
     sigaltstack(&ss, NULL);
 
     // Free it.
-    munmap(thread->alternate_signal_stack, SIGSTKSZ);
+    munmap(thread->alternate_signal_stack, SIGNAL_STACK_SIZE);
     thread->alternate_signal_stack = NULL;
   }
 
-  bool free_mapped_space = false;
-  pthread_mutex_lock(&g_thread_list_lock);
-  if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while (old_state == THREAD_NOT_JOINED &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
+  }
+
+  if (old_state == THREAD_DETACHED) {
     // The thread is detached, no one will use pthread_internal_t after pthread_exit.
     // So we can free mapped space, which includes pthread_internal_t and thread stack.
     // First make sure that the kernel does not try to clear the tid field
@@ -97,28 +104,23 @@
     __set_tid_address(NULL);
 
     // pthread_internal_t is freed below with stack, not here.
-    _pthread_internal_remove_locked(thread, false);
-    free_mapped_space = true;
-  } else {
-    // Mark the thread as exiting without freeing pthread_internal_t.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_ZOMBIE;
+    __pthread_internal_remove(thread);
+
+    if (thread->mmap_size != 0) {
+      // We need to free mapped space for detached threads when they exit.
+      // That's not something we can do in C.
+
+      // We don't want to take a signal after we've unmapped the stack.
+      // That's one last thing we can handle in C.
+      sigset_t mask;
+      sigfillset(&mask);
+      sigprocmask(SIG_SETMASK, &mask, NULL);
+
+      _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
+    }
   }
-  pthread_mutex_unlock(&g_thread_list_lock);
 
-  if (free_mapped_space && thread->mmap_size != 0) {
-    // We need to free mapped space for detached threads when they exit.
-    // That's not something we can do in C.
-
-    // We don't want to take a signal after we've unmapped the stack.
-    // That's one last thing we can handle in C.
-    sigset_t mask;
-    sigfillset(&mask);
-    sigprocmask(SIG_SETMASK, &mask, NULL);
-
-    _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
-  } else {
-    // No need to free mapped space. Either there was no space mapped, or it is left for
-    // the pthread_join caller to clean up.
-    __exit(0);
-  }
+  // No need to free mapped space. Either there was no space mapped, or it is left for
+  // the pthread_join caller to clean up.
+  __exit(0);
 }
diff --git a/libc/bionic/pthread_getcpuclockid.cpp b/libc/bionic/pthread_getcpuclockid.cpp
index d11f56a..2bf2004 100644
--- a/libc/bionic/pthread_getcpuclockid.cpp
+++ b/libc/bionic/pthread_getcpuclockid.cpp
@@ -28,11 +28,11 @@
 
 #include <errno.h>
 
-#include "pthread_accessor.h"
+#include "pthread_internal.h"
 
 int pthread_getcpuclockid(pthread_t t, clockid_t* clockid) {
-  pthread_accessor thread(t);
-  if (thread.get() == NULL) {
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
     return ESRCH;
   }
 
diff --git a/libc/bionic/pthread_getschedparam.cpp b/libc/bionic/pthread_getschedparam.cpp
index 2cdc11a..052fb05 100644
--- a/libc/bionic/pthread_getschedparam.cpp
+++ b/libc/bionic/pthread_getschedparam.cpp
@@ -29,13 +29,13 @@
 #include <errno.h>
 
 #include "private/ErrnoRestorer.h"
-#include "pthread_accessor.h"
+#include "pthread_internal.h"
 
 int pthread_getschedparam(pthread_t t, int* policy, sched_param* param) {
   ErrnoRestorer errno_restorer;
 
-  pthread_accessor thread(t);
-  if (thread.get() == NULL) {
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
     return ESRCH;
   }
 
diff --git a/libc/bionic/pthread_internals.cpp b/libc/bionic/pthread_internal.cpp
similarity index 70%
rename from libc/bionic/pthread_internals.cpp
rename to libc/bionic/pthread_internal.cpp
index 14061d1..1967ccf 100644
--- a/libc/bionic/pthread_internals.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -38,26 +38,10 @@
 #include "private/libc_logging.h"
 #include "private/ScopedPthreadMutexLocker.h"
 
-pthread_internal_t* g_thread_list = NULL;
-pthread_mutex_t g_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_internal_t* g_thread_list = NULL;
+static pthread_mutex_t g_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
 
-void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread) {
-  if (thread->next != NULL) {
-    thread->next->prev = thread->prev;
-  }
-  if (thread->prev != NULL) {
-    thread->prev->next = thread->next;
-  } else {
-    g_thread_list = thread->next;
-  }
-
-  if (free_thread && thread->mmap_size != 0) {
-    // Free mapped space, including thread stack and pthread_internal_t.
-    munmap(thread->attr.stack_base, thread->mmap_size);
-  }
-}
-
-void _pthread_internal_add(pthread_internal_t* thread) {
+pthread_t __pthread_internal_add(pthread_internal_t* thread) {
   ScopedPthreadMutexLocker locker(&g_thread_list_lock);
 
   // We insert at the head.
@@ -67,8 +51,42 @@
     thread->next->prev = thread;
   }
   g_thread_list = thread;
+  return reinterpret_cast<pthread_t>(thread);
 }
 
-pthread_internal_t* __get_thread(void) {
-  return reinterpret_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
+void __pthread_internal_remove(pthread_internal_t* thread) {
+  ScopedPthreadMutexLocker locker(&g_thread_list_lock);
+
+  if (thread->next != NULL) {
+    thread->next->prev = thread->prev;
+  }
+  if (thread->prev != NULL) {
+    thread->prev->next = thread->next;
+  } else {
+    g_thread_list = thread->next;
+  }
+}
+
+static void __pthread_internal_free(pthread_internal_t* thread) {
+  if (thread->mmap_size != 0) {
+    // Free mapped space, including thread stack and pthread_internal_t.
+    munmap(thread->attr.stack_base, thread->mmap_size);
+  }
+}
+
+void __pthread_internal_remove_and_free(pthread_internal_t* thread) {
+  __pthread_internal_remove(thread);
+  __pthread_internal_free(thread);
+}
+
+pthread_internal_t* __pthread_internal_find(pthread_t thread_id) {
+  pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(thread_id);
+  ScopedPthreadMutexLocker locker(&g_thread_list_lock);
+
+  for (pthread_internal_t* t = g_thread_list; t != NULL; t = t->next) {
+    if (t == thread) {
+      return thread;
+    }
+  }
+  return NULL;
 }
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 6ace301..3b91e6a 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -29,6 +29,7 @@
 #define _PTHREAD_INTERNAL_H_
 
 #include <pthread.h>
+#include <stdatomic.h>
 
 #include "private/bionic_tls.h"
 
@@ -38,14 +39,18 @@
 /* Has the thread been joined by another thread? */
 #define PTHREAD_ATTR_FLAG_JOINED 0x00000002
 
-/* Did the thread exit without freeing pthread_internal_t? */
-#define PTHREAD_ATTR_FLAG_ZOMBIE 0x00000004
-
 struct pthread_key_data_t {
   uintptr_t seq; // Use uintptr_t just for alignment, as we use pointer below.
   void* data;
 };
 
+enum ThreadJoinState {
+  THREAD_NOT_JOINED,
+  THREAD_EXITED_NOT_JOINED,
+  THREAD_JOINED,
+  THREAD_DETACHED
+};
+
 struct pthread_internal_t {
   struct pthread_internal_t* next;
   struct pthread_internal_t* prev;
@@ -74,6 +79,8 @@
 
   pthread_attr_t attr;
 
+  _Atomic(ThreadJoinState) join_state;
+
   __pthread_cleanup_t* cleanup_stack;
 
   void* (*start_routine)(void*);
@@ -96,18 +103,23 @@
    */
 #define __BIONIC_DLERROR_BUFFER_SIZE 512
   char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
-} __attribute__((aligned(16))); // Align it as thread stack top below it should be aligned.
+};
 
-__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list);
+__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
-__LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread);
 
-/* Various third-party apps contain a backport of our pthread_rwlock implementation that uses this. */
-extern "C" __LIBC64_HIDDEN__ pthread_internal_t* __get_thread(void);
+__LIBC_HIDDEN__ pthread_t           __pthread_internal_add(pthread_internal_t* thread);
+__LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id);
+__LIBC_HIDDEN__ void                __pthread_internal_remove(pthread_internal_t* thread);
+__LIBC_HIDDEN__ void                __pthread_internal_remove_and_free(pthread_internal_t* thread);
+
+// Make __get_thread() inlined for performance reason. See http://b/19825434.
+static inline __always_inline pthread_internal_t* __get_thread() {
+  return reinterpret_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
+}
 
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
-__LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread);
 
 /*
  * Traditionally we gave threads a 1MiB stack. When we started
@@ -118,8 +130,8 @@
  */
 #define PTHREAD_STACK_SIZE_DEFAULT ((1 * 1024 * 1024) - SIGSTKSZ)
 
-__LIBC_HIDDEN__ extern pthread_internal_t* g_thread_list;
-__LIBC_HIDDEN__ extern pthread_mutex_t g_thread_list_lock;
+/* Leave room for a guard page in the internally created signal stacks. */
+#define SIGNAL_STACK_SIZE (SIGSTKSZ + PAGE_SIZE)
 
 /* Needed by fork. */
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
diff --git a/libc/bionic/pthread_join.cpp b/libc/bionic/pthread_join.cpp
index e3350ef..4d852cb 100644
--- a/libc/bionic/pthread_join.cpp
+++ b/libc/bionic/pthread_join.cpp
@@ -29,36 +29,31 @@
 #include <errno.h>
 
 #include "private/bionic_futex.h"
-#include "pthread_accessor.h"
+#include "pthread_internal.h"
 
 int pthread_join(pthread_t t, void** return_value) {
   if (t == pthread_self()) {
     return EDEADLK;
   }
 
-  pid_t tid;
-  volatile int* tid_ptr;
-  {
-    pthread_accessor thread(t);
-    if (thread.get() == NULL) {
-      return ESRCH;
-    }
-
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
-      return EINVAL;
-    }
-
-    if ((thread->attr.flags & PTHREAD_ATTR_FLAG_JOINED) != 0) {
-      return EINVAL;
-    }
-
-    // Okay, looks like we can signal our intention to join.
-    thread->attr.flags |= PTHREAD_ATTR_FLAG_JOINED;
-    tid = thread->tid;
-    tid_ptr = &thread->tid;
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
+    return ESRCH;
   }
 
-  // We set the PTHREAD_ATTR_FLAG_JOINED flag with the lock held,
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while ((old_state == THREAD_NOT_JOINED || old_state == THREAD_EXITED_NOT_JOINED) &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_JOINED)) {
+  }
+
+  if (old_state == THREAD_DETACHED || old_state == THREAD_JOINED) {
+    return EINVAL;
+  }
+
+  pid_t tid = thread->tid;
+  volatile int* tid_ptr = &thread->tid;
+
+  // We set thread->join_state to THREAD_JOINED with atomic operation,
   // so no one is going to remove this thread except us.
 
   // Wait for the thread to actually exit, if it hasn't already.
@@ -66,14 +61,10 @@
     __futex_wait(tid_ptr, tid, NULL);
   }
 
-  // Take the lock again so we can pull the thread's return value
-  // and remove the thread from the list.
-  pthread_accessor thread(t);
-
   if (return_value) {
     *return_value = thread->return_value;
   }
 
-  _pthread_internal_remove_locked(thread.get(), true);
+  __pthread_internal_remove_and_free(thread);
   return 0;
 }
diff --git a/libc/bionic/pthread_key.cpp b/libc/bionic/pthread_key.cpp
index 65e0879..6d77afa 100644
--- a/libc/bionic/pthread_key.cpp
+++ b/libc/bionic/pthread_key.cpp
@@ -57,8 +57,15 @@
   return seq & (1 << SEQ_KEY_IN_USE_BIT);
 }
 
+#define KEY_VALID_FLAG (1 << 31)
+
+static_assert(sizeof(pthread_key_t) == sizeof(int) && static_cast<pthread_key_t>(-1) < 0,
+              "pthread_key_t should be typedef to int");
+
 static inline bool KeyInValidRange(pthread_key_t key) {
-  return key >= 0 && key < BIONIC_PTHREAD_KEY_COUNT;
+  // key < 0 means bit 31 is set.
+  // Then key < (2^31 | BIONIC_PTHREAD_KEY_COUNT) means the index part of key < BIONIC_PTHREAD_KEY_COUNT.
+  return (key < (KEY_VALID_FLAG | BIONIC_PTHREAD_KEY_COUNT));
 }
 
 // Called from pthread_exit() to remove all pthread keys. This must call the destructor of
@@ -114,7 +121,7 @@
     while (!SeqOfKeyInUse(seq)) {
       if (atomic_compare_exchange_weak(&key_map[i].seq, &seq, seq + SEQ_INCREMENT_STEP)) {
         atomic_store(&key_map[i].key_destructor, reinterpret_cast<uintptr_t>(key_destructor));
-        *key = i;
+        *key = i | KEY_VALID_FLAG;
         return 0;
       }
     }
@@ -127,9 +134,10 @@
 // responsibility of the caller to properly dispose of the corresponding data
 // and resources, using any means it finds suitable.
 int pthread_key_delete(pthread_key_t key) {
-  if (!KeyInValidRange(key)) {
+  if (__predict_false(!KeyInValidRange(key))) {
     return EINVAL;
   }
+  key &= ~KEY_VALID_FLAG;
   // Increase seq to invalidate values in all threads.
   uintptr_t seq = atomic_load_explicit(&key_map[key].seq, memory_order_relaxed);
   if (SeqOfKeyInUse(seq)) {
@@ -141,9 +149,10 @@
 }
 
 void* pthread_getspecific(pthread_key_t key) {
-  if (!KeyInValidRange(key)) {
+  if (__predict_false(!KeyInValidRange(key))) {
     return NULL;
   }
+  key &= ~KEY_VALID_FLAG;
   uintptr_t seq = atomic_load_explicit(&key_map[key].seq, memory_order_relaxed);
   pthread_key_data_t* data = &(__get_thread()->key_data[key]);
   // It is user's responsibility to synchornize between the creation and use of pthread keys,
@@ -151,16 +160,19 @@
   if (__predict_true(SeqOfKeyInUse(seq) && data->seq == seq)) {
     return data->data;
   }
+  // We arrive here when current thread holds the seq of an deleted pthread key. So the
+  // data is for the deleted pthread key, and should be cleared.
   data->data = NULL;
   return NULL;
 }
 
 int pthread_setspecific(pthread_key_t key, const void* ptr) {
-  if (!KeyInValidRange(key)) {
+  if (__predict_false(!KeyInValidRange(key))) {
     return EINVAL;
   }
+  key &= ~KEY_VALID_FLAG;
   uintptr_t seq = atomic_load_explicit(&key_map[key].seq, memory_order_relaxed);
-  if (SeqOfKeyInUse(seq)) {
+  if (__predict_true(SeqOfKeyInUse(seq))) {
     pthread_key_data_t* data = &(__get_thread()->key_data[key]);
     data->seq = seq;
     data->data = const_cast<void*>(ptr);
diff --git a/libc/bionic/pthread_kill.cpp b/libc/bionic/pthread_kill.cpp
index 163317e..93513fa 100644
--- a/libc/bionic/pthread_kill.cpp
+++ b/libc/bionic/pthread_kill.cpp
@@ -30,26 +30,17 @@
 #include <unistd.h>
 
 #include "private/ErrnoRestorer.h"
-#include "pthread_accessor.h"
+#include "pthread_internal.h"
 
 extern "C" int tgkill(int tgid, int tid, int sig);
 
 int pthread_kill(pthread_t t, int sig) {
   ErrnoRestorer errno_restorer;
 
-  pthread_accessor thread(t);
-  if (thread.get() == NULL) {
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
     return ESRCH;
   }
 
-  // There's a race here, but it's one we share with all other C libraries.
-  pid_t tid = thread->tid;
-  thread.Unlock();
-
-  int rc = tgkill(getpid(), tid, sig);
-  if (rc == -1) {
-    return errno;
-  }
-
-  return 0;
+  return (tgkill(getpid(), thread->tid, sig) == -1) ? errno : 0;
 }
diff --git a/libc/bionic/pthread_mutex.cpp b/libc/bionic/pthread_mutex.cpp
index 83d6b54..4fec753 100644
--- a/libc/bionic/pthread_mutex.cpp
+++ b/libc/bionic/pthread_mutex.cpp
@@ -31,6 +31,7 @@
 #include <errno.h>
 #include <limits.h>
 #include <stdatomic.h>
+#include <string.h>
 #include <sys/cdefs.h>
 #include <sys/mman.h>
 #include <unistd.h>
@@ -43,130 +44,6 @@
 #include "private/bionic_time_conversions.h"
 #include "private/bionic_tls.h"
 
-/* a mutex is implemented as a 32-bit integer holding the following fields
- *
- * bits:     name     description
- * 31-16     tid      owner thread's tid (recursive and errorcheck only)
- * 15-14     type     mutex type
- * 13        shared   process-shared flag
- * 12-2      counter  counter of recursive mutexes
- * 1-0       state    lock state (0, 1 or 2)
- */
-
-/* Convenience macro, creates a mask of 'bits' bits that starts from
- * the 'shift'-th least significant bit in a 32-bit word.
- *
- * Examples: FIELD_MASK(0,4)  -> 0xf
- *           FIELD_MASK(16,9) -> 0x1ff0000
- */
-#define  FIELD_MASK(shift,bits)           (((1 << (bits))-1) << (shift))
-
-/* This one is used to create a bit pattern from a given field value */
-#define  FIELD_TO_BITS(val,shift,bits)    (((val) & ((1 << (bits))-1)) << (shift))
-
-/* And this one does the opposite, i.e. extract a field's value from a bit pattern */
-#define  FIELD_FROM_BITS(val,shift,bits)  (((val) >> (shift)) & ((1 << (bits))-1))
-
-/* Mutex state:
- *
- * 0 for unlocked
- * 1 for locked, no waiters
- * 2 for locked, maybe waiters
- */
-#define  MUTEX_STATE_SHIFT      0
-#define  MUTEX_STATE_LEN        2
-
-#define  MUTEX_STATE_MASK           FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
-#define  MUTEX_STATE_FROM_BITS(v)   FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
-#define  MUTEX_STATE_TO_BITS(v)     FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
-
-#define  MUTEX_STATE_UNLOCKED            0   /* must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
-#define  MUTEX_STATE_LOCKED_UNCONTENDED  1   /* must be 1 due to atomic dec in unlock operation */
-#define  MUTEX_STATE_LOCKED_CONTENDED    2   /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
-
-#define  MUTEX_STATE_BITS_UNLOCKED            MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
-#define  MUTEX_STATE_BITS_LOCKED_UNCONTENDED  MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
-#define  MUTEX_STATE_BITS_LOCKED_CONTENDED    MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
-
-/* return true iff the mutex if locked with no waiters */
-#define  MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v)  (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
-
-/* return true iff the mutex if locked with maybe waiters */
-#define  MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v)   (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
-
-/* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
-#define  MUTEX_STATE_BITS_FLIP_CONTENTION(v)      ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
-
-/* Mutex counter:
- *
- * We need to check for overflow before incrementing, and we also need to
- * detect when the counter is 0
- */
-#define  MUTEX_COUNTER_SHIFT         2
-#define  MUTEX_COUNTER_LEN           11
-#define  MUTEX_COUNTER_MASK          FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
-
-#define  MUTEX_COUNTER_BITS_WILL_OVERFLOW(v)    (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
-#define  MUTEX_COUNTER_BITS_IS_ZERO(v)          (((v) & MUTEX_COUNTER_MASK) == 0)
-
-/* Used to increment the counter directly after overflow has been checked */
-#define  MUTEX_COUNTER_BITS_ONE      FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
-
-/* Mutex shared bit flag
- *
- * This flag is set to indicate that the mutex is shared among processes.
- * This changes the futex opcode we use for futex wait/wake operations
- * (non-shared operations are much faster).
- */
-#define  MUTEX_SHARED_SHIFT    13
-#define  MUTEX_SHARED_MASK     FIELD_MASK(MUTEX_SHARED_SHIFT,1)
-
-/* Mutex type:
- *
- * We support normal, recursive and errorcheck mutexes.
- *
- * The constants defined here *cannot* be changed because they must match
- * the C library ABI which defines the following initialization values in
- * <pthread.h>:
- *
- *   __PTHREAD_MUTEX_INIT_VALUE
- *   __PTHREAD_RECURSIVE_MUTEX_VALUE
- *   __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE
- */
-#define  MUTEX_TYPE_SHIFT      14
-#define  MUTEX_TYPE_LEN        2
-#define  MUTEX_TYPE_MASK       FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
-
-#define  MUTEX_TYPE_NORMAL          0  /* Must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
-#define  MUTEX_TYPE_RECURSIVE       1
-#define  MUTEX_TYPE_ERRORCHECK      2
-
-#define  MUTEX_TYPE_TO_BITS(t)       FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
-
-#define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(MUTEX_TYPE_NORMAL)
-#define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(MUTEX_TYPE_RECURSIVE)
-#define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(MUTEX_TYPE_ERRORCHECK)
-
-/* Mutex owner field:
- *
- * This is only used for recursive and errorcheck mutexes. It holds the
- * tid of the owning thread. We use 16 bits to represent tid here,
- * so the highest tid is 65535. There is a test to check /proc/sys/kernel/pid_max
- * to make sure it will not exceed our limit.
- */
-#define  MUTEX_OWNER_SHIFT     16
-#define  MUTEX_OWNER_LEN       16
-
-#define  MUTEX_OWNER_FROM_BITS(v)    FIELD_FROM_BITS(v,MUTEX_OWNER_SHIFT,MUTEX_OWNER_LEN)
-#define  MUTEX_OWNER_TO_BITS(v)      FIELD_TO_BITS(v,MUTEX_OWNER_SHIFT,MUTEX_OWNER_LEN)
-
-/* Convenience macros.
- *
- * These are used to form or modify the bit pattern of a given mutex value
- */
-
-
-
 /* a mutex attribute holds the following fields
  *
  * bits:     name       description
@@ -176,7 +53,6 @@
 #define  MUTEXATTR_TYPE_MASK   0x000f
 #define  MUTEXATTR_SHARED_MASK 0x0010
 
-
 int pthread_mutexattr_init(pthread_mutexattr_t *attr)
 {
     *attr = PTHREAD_MUTEX_DEFAULT;
@@ -237,46 +113,174 @@
     return 0;
 }
 
-static inline atomic_int* MUTEX_TO_ATOMIC_POINTER(pthread_mutex_t* mutex) {
-    static_assert(sizeof(atomic_int) == sizeof(mutex->value),
-                  "mutex->value should actually be atomic_int in implementation.");
+/* a mutex contains a state value and a owner_tid.
+ * The value is implemented as a 16-bit integer holding the following fields:
+ *
+ * bits:     name     description
+ * 15-14     type     mutex type
+ * 13        shared   process-shared flag
+ * 12-2      counter  counter of recursive mutexes
+ * 1-0       state    lock state (0, 1 or 2)
+ *
+ * The owner_tid is used only in recursive and errorcheck mutex to hold the mutex owner thread tid.
+ */
 
-    // We prefer casting to atomic_int instead of declaring mutex->value to be atomic_int directly.
-    // Because using the second method pollutes pthread.h, and causes an error when compiling libcxx.
-    return reinterpret_cast<atomic_int*>(&mutex->value);
+/* Convenience macro, creates a mask of 'bits' bits that starts from
+ * the 'shift'-th least significant bit in a 32-bit word.
+ *
+ * Examples: FIELD_MASK(0,4)  -> 0xf
+ *           FIELD_MASK(16,9) -> 0x1ff0000
+ */
+#define  FIELD_MASK(shift,bits)           (((1 << (bits))-1) << (shift))
+
+/* This one is used to create a bit pattern from a given field value */
+#define  FIELD_TO_BITS(val,shift,bits)    (((val) & ((1 << (bits))-1)) << (shift))
+
+/* And this one does the opposite, i.e. extract a field's value from a bit pattern */
+#define  FIELD_FROM_BITS(val,shift,bits)  (((val) >> (shift)) & ((1 << (bits))-1))
+
+
+/* Convenience macros.
+ *
+ * These are used to form or modify the bit pattern of a given mutex value
+ */
+
+/* Mutex state:
+ *
+ * 0 for unlocked
+ * 1 for locked, no waiters
+ * 2 for locked, maybe waiters
+ */
+#define  MUTEX_STATE_SHIFT      0
+#define  MUTEX_STATE_LEN        2
+
+#define  MUTEX_STATE_MASK           FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+#define  MUTEX_STATE_FROM_BITS(v)   FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+#define  MUTEX_STATE_TO_BITS(v)     FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+
+#define  MUTEX_STATE_UNLOCKED            0   /* must be 0 to match PTHREAD_MUTEX_INITIALIZER */
+#define  MUTEX_STATE_LOCKED_UNCONTENDED  1   /* must be 1 due to atomic dec in unlock operation */
+#define  MUTEX_STATE_LOCKED_CONTENDED    2   /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
+
+#define  MUTEX_STATE_BITS_UNLOCKED            MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
+#define  MUTEX_STATE_BITS_LOCKED_UNCONTENDED  MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
+#define  MUTEX_STATE_BITS_LOCKED_CONTENDED    MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
+
+/* return true iff the mutex if locked with no waiters */
+#define  MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v)  (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
+
+/* return true iff the mutex if locked with maybe waiters */
+#define  MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v)   (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
+
+/* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
+#define  MUTEX_STATE_BITS_FLIP_CONTENTION(v)      ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
+
+/* Mutex counter:
+ *
+ * We need to check for overflow before incrementing, and we also need to
+ * detect when the counter is 0
+ */
+#define  MUTEX_COUNTER_SHIFT         2
+#define  MUTEX_COUNTER_LEN           11
+#define  MUTEX_COUNTER_MASK          FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
+
+#define  MUTEX_COUNTER_BITS_WILL_OVERFLOW(v)    (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
+#define  MUTEX_COUNTER_BITS_IS_ZERO(v)          (((v) & MUTEX_COUNTER_MASK) == 0)
+
+/* Used to increment the counter directly after overflow has been checked */
+#define  MUTEX_COUNTER_BITS_ONE      FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
+
+/* Mutex shared bit flag
+ *
+ * This flag is set to indicate that the mutex is shared among processes.
+ * This changes the futex opcode we use for futex wait/wake operations
+ * (non-shared operations are much faster).
+ */
+#define  MUTEX_SHARED_SHIFT    13
+#define  MUTEX_SHARED_MASK     FIELD_MASK(MUTEX_SHARED_SHIFT,1)
+
+/* Mutex type:
+ * We support normal, recursive and errorcheck mutexes.
+ */
+#define  MUTEX_TYPE_SHIFT      14
+#define  MUTEX_TYPE_LEN        2
+#define  MUTEX_TYPE_MASK       FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
+
+#define  MUTEX_TYPE_TO_BITS(t)       FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
+
+#define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL)
+#define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE)
+#define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK)
+
+struct pthread_mutex_internal_t {
+  _Atomic(uint16_t) state;
+#if defined(__LP64__)
+  uint16_t __pad;
+  atomic_int owner_tid;
+  char __reserved[32];
+#else
+  _Atomic(uint16_t) owner_tid;
+#endif
+} __attribute__((aligned(4)));
+
+static_assert(sizeof(pthread_mutex_t) == sizeof(pthread_mutex_internal_t),
+              "pthread_mutex_t should actually be pthread_mutex_internal_t in implementation.");
+
+// For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment
+// than 4-byte alignment.
+static_assert(alignof(pthread_mutex_t) == 4,
+              "pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t.");
+
+static inline pthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface) {
+  return reinterpret_cast<pthread_mutex_internal_t*>(mutex_interface);
 }
 
-int pthread_mutex_init(pthread_mutex_t* mutex, const pthread_mutexattr_t* attr) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+int pthread_mutex_init(pthread_mutex_t* mutex_interface, const pthread_mutexattr_t* attr) {
+    pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
+
+    memset(mutex, 0, sizeof(pthread_mutex_internal_t));
 
     if (__predict_true(attr == NULL)) {
-        atomic_init(mutex_value_ptr, MUTEX_TYPE_BITS_NORMAL);
+        atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL);
         return 0;
     }
 
-    int value = 0;
+    uint16_t state = 0;
     if ((*attr & MUTEXATTR_SHARED_MASK) != 0) {
-        value |= MUTEX_SHARED_MASK;
+        state |= MUTEX_SHARED_MASK;
     }
 
     switch (*attr & MUTEXATTR_TYPE_MASK) {
     case PTHREAD_MUTEX_NORMAL:
-        value |= MUTEX_TYPE_BITS_NORMAL;
-        break;
+      state |= MUTEX_TYPE_BITS_NORMAL;
+      break;
     case PTHREAD_MUTEX_RECURSIVE:
-        value |= MUTEX_TYPE_BITS_RECURSIVE;
-        break;
+      state |= MUTEX_TYPE_BITS_RECURSIVE;
+      break;
     case PTHREAD_MUTEX_ERRORCHECK:
-        value |= MUTEX_TYPE_BITS_ERRORCHECK;
-        break;
+      state |= MUTEX_TYPE_BITS_ERRORCHECK;
+      break;
     default:
         return EINVAL;
     }
 
-    atomic_init(mutex_value_ptr, value);
+    atomic_init(&mutex->state, state);
+    atomic_init(&mutex->owner_tid, 0);
     return 0;
 }
 
+static inline __always_inline int __pthread_normal_mutex_trylock(pthread_mutex_internal_t* mutex,
+                                                                 uint16_t shared) {
+    const uint16_t unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
+    const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+
+    uint16_t old_state = unlocked;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
+                         locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
+        return 0;
+    }
+    return EBUSY;
+}
 
 /*
  * Lock a mutex of type NORMAL.
@@ -290,55 +294,59 @@
  * "type" value is zero, so the only bits that will be set are the ones in
  * the lock state field.
  */
-static inline void _normal_mutex_lock(atomic_int* mutex_value_ptr, int shared) {
-    /* convenience shortcuts */
-    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
-    const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-
-    // The common case is an unlocked mutex, so we begin by trying to
-    // change the lock's state from unlocked to locked_uncontended.
-    // If exchanged successfully, An acquire fence is required to make
-    // all memory accesses made by other threads visible in current CPU.
-    int mvalue = unlocked;
-    if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue,
-                                                locked_uncontended,
-                                                memory_order_acquire,
-                                                memory_order_relaxed))) {
-        return;
+static inline __always_inline int __pthread_normal_mutex_lock(pthread_mutex_internal_t* mutex,
+                                                              uint16_t shared,
+                                                              const timespec* abs_timeout_or_null,
+                                                              clockid_t clock) {
+    if (__predict_true(__pthread_normal_mutex_trylock(mutex, shared) == 0)) {
+        return 0;
     }
 
     ScopedTrace trace("Contending for pthread mutex");
 
+    const uint16_t unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
+    const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+
     // We want to go to sleep until the mutex is available, which requires
     // promoting it to locked_contended. We need to swap in the new state
-    // value and then wait until somebody wakes us up.
+    // and then wait until somebody wakes us up.
     // An atomic_exchange is used to compete with other threads for the lock.
     // If it returns unlocked, we have acquired the lock, otherwise another
     // thread still holds the lock and we should wait again.
     // If lock is acquired, an acquire fence is needed to make all memory accesses
-    // made by other threads visible in current CPU.
-    const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-    while (atomic_exchange_explicit(mutex_value_ptr, locked_contended,
+    // made by other threads visible to the current CPU.
+    while (atomic_exchange_explicit(&mutex->state, locked_contended,
                                     memory_order_acquire) != unlocked) {
-
-        __futex_wait_ex(mutex_value_ptr, shared, locked_contended, NULL);
+        timespec ts;
+        timespec* rel_timeout = NULL;
+        if (abs_timeout_or_null != NULL) {
+            rel_timeout = &ts;
+            if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, clock)) {
+                return ETIMEDOUT;
+            }
+        }
+        if (__futex_wait_ex(&mutex->state, shared, locked_contended, rel_timeout) == -ETIMEDOUT) {
+            return ETIMEDOUT;
+        }
     }
+    return 0;
 }
 
 /*
- * Release a mutex of type NORMAL.  The caller is responsible for determining
+ * Release a normal mutex.  The caller is responsible for determining
  * that we are in fact the owner of this lock.
  */
-static inline void _normal_mutex_unlock(atomic_int* mutex_value_ptr, int shared) {
-    const int unlocked         = shared | MUTEX_STATE_BITS_UNLOCKED;
-    const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+static inline __always_inline void __pthread_normal_mutex_unlock(pthread_mutex_internal_t* mutex,
+                                                                 uint16_t shared) {
+    const uint16_t unlocked         = shared | MUTEX_STATE_BITS_UNLOCKED;
+    const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
 
     // We use an atomic_exchange to release the lock. If locked_contended state
     // is returned, some threads is waiting for the lock and we need to wake up
     // one of them.
     // A release fence is required to make previous stores visible to next
     // lock owner threads.
-    if (atomic_exchange_explicit(mutex_value_ptr, unlocked,
+    if (atomic_exchange_explicit(&mutex->state, unlocked,
                                  memory_order_release) == locked_contended) {
         // Wake up one waiting thread. We don't know which thread will be
         // woken or when it'll start executing -- futexes make no guarantees
@@ -358,79 +366,88 @@
         // we call wake, the thread we eventually wake will find an unlocked mutex
         // and will execute. Either way we have correct behavior and nobody is
         // orphaned on the wait queue.
-        __futex_wake_ex(mutex_value_ptr, shared, 1);
+        __futex_wake_ex(&mutex->state, shared, 1);
     }
 }
 
-/* This common inlined function is used to increment the counter of an
- * errorcheck or recursive mutex.
+/* This common inlined function is used to increment the counter of a recursive mutex.
  *
- * For errorcheck mutexes, it will return EDEADLK
- * If the counter overflows, it will return EAGAIN
- * Otherwise, it atomically increments the counter and returns 0
- * after providing an acquire barrier.
+ * If the counter overflows, it will return EAGAIN.
+ * Otherwise, it atomically increments the counter and returns 0.
  *
- * mtype is the current mutex type
- * mvalue is the current mutex value (already loaded)
- * mutex pointers to the mutex.
  */
-static inline __always_inline
-int _recursive_increment(atomic_int* mutex_value_ptr, int mvalue, int mtype) {
-    if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
-        // Trying to re-lock a mutex we already acquired.
-        return EDEADLK;
-    }
-
+static inline __always_inline int __recursive_increment(pthread_mutex_internal_t* mutex,
+                                                        uint16_t old_state) {
     // Detect recursive lock overflow and return EAGAIN.
     // This is safe because only the owner thread can modify the
     // counter bits in the mutex value.
-    if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(mvalue)) {
+    if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)) {
         return EAGAIN;
     }
 
-    // We own the mutex, but other threads are able to change the lower bits
-    // (e.g. promoting it to "contended"), so we need to use an atomic exchange
-    // loop to update the counter. The counter will not overflow in the loop,
-    // as only the owner thread can change it.
-    // The mutex is still locked, so we don't need a release fence.
-    while (!atomic_compare_exchange_weak_explicit(mutex_value_ptr, &mvalue,
-                                                  mvalue + MUTEX_COUNTER_BITS_ONE,
-                                                  memory_order_relaxed,
-                                                  memory_order_relaxed)) { }
+    // Other threads are able to change the lower bits (e.g. promoting it to "contended"),
+    // but the mutex counter will not overflow. So we use atomic_fetch_add operation here.
+    // The mutex is still locked by current thread, so we don't need a release fence.
+    atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
     return 0;
 }
 
-int pthread_mutex_lock(pthread_mutex_t* mutex) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+static inline __always_inline int __recursive_or_errorcheck_mutex_wait(
+                                                      pthread_mutex_internal_t* mutex,
+                                                      uint16_t shared,
+                                                      uint16_t old_state,
+                                                      const timespec* rel_timeout) {
+// __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid
+// field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used.
+// But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the
+// owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error.
 
-    int mvalue, mtype, tid, shared;
+#if defined(__LP64__)
+  return __futex_wait_ex(&mutex->state, shared, old_state, rel_timeout);
 
-    mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    mtype = (mvalue & MUTEX_TYPE_MASK);
-    shared = (mvalue & MUTEX_SHARED_MASK);
+#else
+  // This implementation works only when the layout of pthread_mutex_internal_t matches below expectation.
+  // And it is based on the assumption that Android is always in little-endian devices.
+  static_assert(offsetof(pthread_mutex_internal_t, state) == 0, "");
+  static_assert(offsetof(pthread_mutex_internal_t, owner_tid) == 2, "");
+
+  uint32_t owner_tid = atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed);
+  return __futex_wait_ex(&mutex->state, shared, (owner_tid << 16) | old_state, rel_timeout);
+#endif
+}
+
+static int __pthread_mutex_lock_with_timeout(pthread_mutex_internal_t* mutex,
+                                           const timespec* abs_timeout_or_null, clockid_t clock) {
+    uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
+    uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
+    uint16_t shared = (old_state & MUTEX_SHARED_MASK);
 
     // Handle common case first.
     if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
-        _normal_mutex_lock(mutex_value_ptr, shared);
-        return 0;
+        return __pthread_normal_mutex_lock(mutex, shared, abs_timeout_or_null, clock);
     }
 
     // Do we already own this recursive or error-check mutex?
-    tid = __get_thread()->tid;
-    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
-        return _recursive_increment(mutex_value_ptr, mvalue, mtype);
+    pid_t tid = __get_thread()->tid;
+    if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
+        if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
+            return EDEADLK;
+        }
+        return __recursive_increment(mutex, old_state);
+    }
 
-    // Add in shared state to avoid extra 'or' operations below.
-    mtype |= shared;
+    const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+    const uint16_t locked_contended   = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
 
     // First, if the mutex is unlocked, try to quickly acquire it.
     // In the optimistic case where this works, set the state to locked_uncontended.
-    if (mvalue == mtype) {
-        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue,
-                           newval, memory_order_acquire, memory_order_relaxed))) {
+    if (old_state == unlocked) {
+        // If exchanged successfully, an acquire fence is required to make
+        // all memory accesses made by other threads visible to the current CPU.
+        if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
+                             locked_uncontended, memory_order_acquire, memory_order_relaxed))) {
+            atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
             return 0;
         }
     }
@@ -438,73 +455,91 @@
     ScopedTrace trace("Contending for pthread mutex");
 
     while (true) {
-        if (mvalue == mtype) {
-            // If the mutex is unlocked, its value should be 'mtype' and
-            // we try to acquire it by setting its owner and state atomically.
+        if (old_state == unlocked) {
             // NOTE: We put the state to locked_contended since we _know_ there
             // is contention when we are in this loop. This ensures all waiters
             // will be unlocked.
 
-            int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-            // If exchanged successfully, An acquire fence is required to make
-            // all memory accesses made by other threads visible in current CPU.
-            if (__predict_true(atomic_compare_exchange_weak_explicit(mutex_value_ptr,
-                                                                     &mvalue, newval,
+            // If exchanged successfully, an acquire fence is required to make
+            // all memory accesses made by other threads visible to the current CPU.
+            if (__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state,
+                                                                     &old_state, locked_contended,
                                                                      memory_order_acquire,
                                                                      memory_order_relaxed))) {
+                atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
                 return 0;
             }
             continue;
-        } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
-            // The mutex is already locked by another thread, if the state is locked_uncontended,
-            // we should set it to locked_contended beforing going to sleep. This can make
+        } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)) {
+            // We should set it to locked_contended beforing going to sleep. This can make
             // sure waiters will be woken up eventually.
 
-            int newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue);
-            if (__predict_false(!atomic_compare_exchange_weak_explicit(mutex_value_ptr,
-                                                                       &mvalue, newval,
+            int new_state = MUTEX_STATE_BITS_FLIP_CONTENTION(old_state);
+            if (__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state,
+                                                                       &old_state, new_state,
                                                                        memory_order_relaxed,
                                                                        memory_order_relaxed))) {
                 continue;
             }
-            mvalue = newval;
+            old_state = new_state;
         }
 
-        // We are in locked_contended state, sleep until someone wake us up.
-        __futex_wait_ex(mutex_value_ptr, shared, mvalue, NULL);
-        mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
+        // We are in locked_contended state, sleep until someone wakes us up.
+        timespec ts;
+        timespec* rel_timeout = NULL;
+        if (abs_timeout_or_null != NULL) {
+            rel_timeout = &ts;
+            if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, clock)) {
+                return ETIMEDOUT;
+            }
+        }
+        if (__recursive_or_errorcheck_mutex_wait(mutex, shared, old_state, rel_timeout) == -ETIMEDOUT) {
+            return ETIMEDOUT;
+        }
+        old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
     }
 }
 
-int pthread_mutex_unlock(pthread_mutex_t* mutex) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+int pthread_mutex_lock(pthread_mutex_t* mutex_interface) {
+    pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
 
-    int mvalue, mtype, tid, shared;
+    uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
+    uint16_t mtype = (old_state & MUTEX_TYPE_MASK);
+    uint16_t shared = (old_state & MUTEX_SHARED_MASK);
+    // Avoid slowing down fast path of normal mutex lock operation.
+    if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
+      if (__predict_true(__pthread_normal_mutex_trylock(mutex, shared) == 0)) {
+        return 0;
+      }
+    }
+    return __pthread_mutex_lock_with_timeout(mutex, NULL, 0);
+}
 
-    mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    mtype  = (mvalue & MUTEX_TYPE_MASK);
-    shared = (mvalue & MUTEX_SHARED_MASK);
+int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) {
+    pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
+
+    uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
+    uint16_t mtype  = (old_state & MUTEX_TYPE_MASK);
+    uint16_t shared = (old_state & MUTEX_SHARED_MASK);
 
     // Handle common case first.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        _normal_mutex_unlock(mutex_value_ptr, shared);
+        __pthread_normal_mutex_unlock(mutex, shared);
         return 0;
     }
 
     // Do we already own this recursive or error-check mutex?
-    tid = __get_thread()->tid;
-    if ( tid != MUTEX_OWNER_FROM_BITS(mvalue) )
+    pid_t tid = __get_thread()->tid;
+    if ( tid != atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed) ) {
         return EPERM;
+    }
 
     // If the counter is > 0, we can simply decrement it atomically.
     // Since other threads can mutate the lower state bits (and only the
     // lower state bits), use a compare_exchange loop to do it.
-    if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) {
+    if (!MUTEX_COUNTER_BITS_IS_ZERO(old_state)) {
         // We still own the mutex, so a release fence is not needed.
-        while (!atomic_compare_exchange_weak_explicit(mutex_value_ptr, &mvalue,
-                                                      mvalue - MUTEX_COUNTER_BITS_ONE,
-                                                      memory_order_relaxed,
-                                                      memory_order_relaxed)) { }
+        atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed);
         return 0;
     }
 
@@ -514,172 +549,58 @@
     // to awake.
     // A release fence is required to make previous stores visible to next
     // lock owner threads.
-    mvalue = atomic_exchange_explicit(mutex_value_ptr,
-                                      mtype | shared | MUTEX_STATE_BITS_UNLOCKED,
-                                      memory_order_release);
-    if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
-        __futex_wake_ex(mutex_value_ptr, shared, 1);
+    atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed);
+    const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    old_state = atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release);
+    if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)) {
+        __futex_wake_ex(&mutex->state, shared, 1);
     }
 
     return 0;
 }
 
-int pthread_mutex_trylock(pthread_mutex_t* mutex) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
+int pthread_mutex_trylock(pthread_mutex_t* mutex_interface) {
+    pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface);
 
-    int mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    int mtype  = (mvalue & MUTEX_TYPE_MASK);
-    int shared = (mvalue & MUTEX_SHARED_MASK);
+    uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed);
+    uint16_t mtype  = (old_state & MUTEX_TYPE_MASK);
+    uint16_t shared = (old_state & MUTEX_SHARED_MASK);
+
+    const uint16_t unlocked           = mtype | shared | MUTEX_STATE_BITS_UNLOCKED;
+    const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
 
     // Handle common case first.
     if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        mvalue = shared | MUTEX_STATE_BITS_UNLOCKED;
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        if (atomic_compare_exchange_strong_explicit(mutex_value_ptr,
-                                                    &mvalue,
-                                                    shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED,
-                                                    memory_order_acquire,
-                                                    memory_order_relaxed)) {
-            return 0;
-        }
-        return EBUSY;
+        return __pthread_normal_mutex_trylock(mutex, shared);
     }
 
     // Do we already own this recursive or error-check mutex?
     pid_t tid = __get_thread()->tid;
-    if (tid == MUTEX_OWNER_FROM_BITS(mvalue)) {
+    if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) {
         if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
             return EBUSY;
         }
-        return _recursive_increment(mutex_value_ptr, mvalue, mtype);
+        return __recursive_increment(mutex, old_state);
     }
 
     // Same as pthread_mutex_lock, except that we don't want to wait, and
     // the only operation that can succeed is a single compare_exchange to acquire the
     // lock if it is released / not owned by anyone. No need for a complex loop.
-    // If exchanged successfully, An acquire fence is required to make
-    // all memory accesses made by other threads visible in current CPU.
-    mtype |= shared | MUTEX_STATE_BITS_UNLOCKED;
-    mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-
-    if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr,
-                                                               &mtype, mvalue,
+    // If exchanged successfully, an acquire fence is required to make
+    // all memory accesses made by other threads visible to the current CPU.
+    old_state = unlocked;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state,
+                                                               locked_uncontended,
                                                                memory_order_acquire,
                                                                memory_order_relaxed))) {
+        atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed);
         return 0;
     }
     return EBUSY;
 }
 
-static int __pthread_mutex_timedlock(pthread_mutex_t* mutex, const timespec* abs_ts, clockid_t clock) {
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
-
-    timespec ts;
-
-    int mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    int mtype  = (mvalue & MUTEX_TYPE_MASK);
-    int shared = (mvalue & MUTEX_SHARED_MASK);
-
-    // Handle common case first.
-    if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) {
-        const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
-        const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        const int locked_contended   = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        mvalue = unlocked;
-        if (atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, locked_uncontended,
-                                                    memory_order_acquire, memory_order_relaxed)) {
-            return 0;
-        }
-
-        ScopedTrace trace("Contending for timed pthread mutex");
-
-        // Same as pthread_mutex_lock, except that we can only wait for a specified
-        // time interval. If lock is acquired, an acquire fence is needed to make
-        // all memory accesses made by other threads visible in current CPU.
-        while (atomic_exchange_explicit(mutex_value_ptr, locked_contended,
-                                        memory_order_acquire) != unlocked) {
-            if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
-                return ETIMEDOUT;
-            }
-            __futex_wait_ex(mutex_value_ptr, shared, locked_contended, &ts);
-        }
-
-        return 0;
-    }
-
-    // Do we already own this recursive or error-check mutex?
-    pid_t tid = __get_thread()->tid;
-    if (tid == MUTEX_OWNER_FROM_BITS(mvalue)) {
-        return _recursive_increment(mutex_value_ptr, mvalue, mtype);
-    }
-
-    mtype |= shared;
-
-    // First try a quick lock.
-    if (mvalue == mtype) {
-        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
-        // If exchanged successfully, An acquire fence is required to make
-        // all memory accesses made by other threads visible in current CPU.
-        if (__predict_true(atomic_compare_exchange_strong_explicit(mutex_value_ptr,
-                                                                   &mvalue, newval,
-                                                                   memory_order_acquire,
-                                                                   memory_order_relaxed))) {
-            return 0;
-        }
-    }
-
-    ScopedTrace trace("Contending for timed pthread mutex");
-
-    // The following implements the same loop as pthread_mutex_lock,
-    // but adds checks to ensure that the operation never exceeds the
-    // absolute expiration time.
-    while (true) {
-        if (mvalue == mtype) { // Unlocked.
-            int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
-            // An acquire fence is needed for successful exchange.
-            if (!atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, newval,
-                                                         memory_order_acquire,
-                                                         memory_order_relaxed)) {
-                goto check_time;
-            }
-
-            return 0;
-        } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
-            // The value is locked. If the state is locked_uncontended, we need to switch
-            // it to locked_contended before sleep, so we can get woken up later.
-            int newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue);
-            if (!atomic_compare_exchange_strong_explicit(mutex_value_ptr, &mvalue, newval,
-                                                         memory_order_relaxed,
-                                                         memory_order_relaxed)) {
-                goto check_time;
-            }
-            mvalue = newval;
-        }
-
-        if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
-            return ETIMEDOUT;
-        }
-
-        if (__futex_wait_ex(mutex_value_ptr, shared, mvalue, &ts) == -ETIMEDOUT) {
-            return ETIMEDOUT;
-        }
-
-check_time:
-        if (!timespec_from_absolute_timespec(ts, *abs_ts, clock)) {
-            return ETIMEDOUT;
-        }
-        // After futex_wait or time costly timespec_from_absolte_timespec,
-        // we'd better read mvalue again in case it is changed.
-        mvalue = atomic_load_explicit(mutex_value_ptr, memory_order_relaxed);
-    }
-}
-
 #if !defined(__LP64__)
-extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex, unsigned ms) {
+extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface, unsigned ms) {
     timespec abs_timeout;
     clock_gettime(CLOCK_MONOTONIC, &abs_timeout);
     abs_timeout.tv_sec  += ms / 1000;
@@ -689,7 +610,8 @@
         abs_timeout.tv_nsec -= NS_PER_S;
     }
 
-    int error = __pthread_mutex_timedlock(mutex, &abs_timeout, CLOCK_MONOTONIC);
+    int error = __pthread_mutex_lock_with_timeout(__get_internal_mutex(mutex_interface),
+                                                  &abs_timeout, CLOCK_MONOTONIC);
     if (error == ETIMEDOUT) {
         error = EBUSY;
     }
@@ -697,18 +619,16 @@
 }
 #endif
 
-int pthread_mutex_timedlock(pthread_mutex_t* mutex, const timespec* abs_timeout) {
-    return __pthread_mutex_timedlock(mutex, abs_timeout, CLOCK_REALTIME);
+int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const timespec* abs_timeout) {
+    return __pthread_mutex_lock_with_timeout(__get_internal_mutex(mutex_interface),
+                                             abs_timeout, CLOCK_REALTIME);
 }
 
-int pthread_mutex_destroy(pthread_mutex_t* mutex) {
+int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) {
     // Use trylock to ensure that the mutex is valid and not already locked.
-    int error = pthread_mutex_trylock(mutex);
+    int error = pthread_mutex_trylock(mutex_interface);
     if (error != 0) {
         return error;
     }
-
-    atomic_int* mutex_value_ptr = MUTEX_TO_ATOMIC_POINTER(mutex);
-    atomic_store_explicit(mutex_value_ptr, 0xdead10cc, memory_order_relaxed);
     return 0;
 }
diff --git a/libc/bionic/pthread_rwlock.cpp b/libc/bionic/pthread_rwlock.cpp
index f089940..934210e 100644
--- a/libc/bionic/pthread_rwlock.cpp
+++ b/libc/bionic/pthread_rwlock.cpp
@@ -28,9 +28,11 @@
 
 #include <errno.h>
 #include <stdatomic.h>
+#include <string.h>
 
 #include "pthread_internal.h"
 #include "private/bionic_futex.h"
+#include "private/bionic_lock.h"
 #include "private/bionic_time_conversions.h"
 
 /* Technical note:
@@ -53,18 +55,39 @@
  *  - This implementation will return EDEADLK in "write after write" and "read after
  *    write" cases and will deadlock in write after read case.
  *
- * TODO: As it stands now, pending_readers and pending_writers could be merged into a
- * a single waiters variable.  Keeping them separate adds a bit of clarity and keeps
- * the door open for a writer-biased implementation.
- *
  */
 
-#define RWLOCKATTR_DEFAULT     0
-#define RWLOCKATTR_SHARED_MASK 0x0010
+// A rwlockattr is implemented as a 32-bit integer which has following fields:
+//  bits    name              description
+//   1     rwlock_kind       have rwlock preference like PTHREAD_RWLOCK_PREFER_READER_NP.
+//   0      process_shared    set to 1 if the rwlock is shared between processes.
+
+#define RWLOCKATTR_PSHARED_SHIFT 0
+#define RWLOCKATTR_KIND_SHIFT    1
+
+#define RWLOCKATTR_PSHARED_MASK  1
+#define RWLOCKATTR_KIND_MASK     2
+#define RWLOCKATTR_RESERVED_MASK (~3)
+
+static inline __always_inline __always_inline bool __rwlockattr_getpshared(const pthread_rwlockattr_t* attr) {
+  return (*attr & RWLOCKATTR_PSHARED_MASK) >> RWLOCKATTR_PSHARED_SHIFT;
+}
+
+static inline __always_inline __always_inline void __rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
+  *attr = (*attr & ~RWLOCKATTR_PSHARED_MASK) | (pshared << RWLOCKATTR_PSHARED_SHIFT);
+}
+
+static inline __always_inline int __rwlockattr_getkind(const pthread_rwlockattr_t* attr) {
+  return (*attr & RWLOCKATTR_KIND_MASK) >> RWLOCKATTR_KIND_SHIFT;
+}
+
+static inline __always_inline void __rwlockattr_setkind(pthread_rwlockattr_t* attr, int kind) {
+  *attr = (*attr & ~RWLOCKATTR_KIND_MASK) | (kind << RWLOCKATTR_KIND_SHIFT);
+}
 
 
 int pthread_rwlockattr_init(pthread_rwlockattr_t* attr) {
-  *attr = PTHREAD_PROCESS_PRIVATE;
+  *attr = 0;
   return 0;
 }
 
@@ -73,67 +96,158 @@
   return 0;
 }
 
+int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
+  if (__rwlockattr_getpshared(attr)) {
+    *pshared = PTHREAD_PROCESS_SHARED;
+  } else {
+    *pshared = PTHREAD_PROCESS_PRIVATE;
+  }
+  return 0;
+}
+
 int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
   switch (pshared) {
     case PTHREAD_PROCESS_PRIVATE:
+      __rwlockattr_setpshared(attr, 0);
+      return 0;
     case PTHREAD_PROCESS_SHARED:
-      *attr = pshared;
+      __rwlockattr_setpshared(attr, 1);
       return 0;
     default:
       return EINVAL;
   }
 }
 
-int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
-  *pshared = *attr;
+int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t* attr, int* pref) {
+  *pref = __rwlockattr_getkind(attr);
   return 0;
 }
 
-struct pthread_rwlock_internal_t {
-  atomic_int state; // 0=unlock, -1=writer lock, +n=reader lock
-  atomic_int writer_thread_id;
-  atomic_uint pending_readers;
-  atomic_uint pending_writers;
-  int32_t attr;
-
-  bool process_shared() const {
-    return attr == PTHREAD_PROCESS_SHARED;
+int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t* attr, int pref) {
+  switch (pref) {
+    case PTHREAD_RWLOCK_PREFER_READER_NP:   // Fall through.
+    case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
+      __rwlockattr_setkind(attr, pref);
+      return 0;
+    default:
+      return EINVAL;
   }
+}
+
+// A rwlock state is implemented as a 32-bit integer which has following rules:
+//  bits      name                              description
+//   31      owned_by_writer_flag              set to 1 if the lock is owned by a writer now.
+//  30-2     reader_count                      the count of readers holding the lock.
+//   1       have_pending_writers              set to 1 if having pending writers.
+//   0       have_pending_readers              set to 1 if having pending readers.
+
+#define STATE_HAVE_PENDING_READERS_SHIFT    0
+#define STATE_HAVE_PENDING_WRITERS_SHIFT    1
+#define STATE_READER_COUNT_SHIFT            2
+#define STATE_OWNED_BY_WRITER_SHIFT        31
+
+#define STATE_HAVE_PENDING_READERS_FLAG     (1 << STATE_HAVE_PENDING_READERS_SHIFT)
+#define STATE_HAVE_PENDING_WRITERS_FLAG     (1 << STATE_HAVE_PENDING_WRITERS_SHIFT)
+#define STATE_READER_COUNT_CHANGE_STEP  (1 << STATE_READER_COUNT_SHIFT)
+#define STATE_OWNED_BY_WRITER_FLAG      (1 << STATE_OWNED_BY_WRITER_SHIFT)
+
+#define STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG \
+          (STATE_HAVE_PENDING_READERS_FLAG | STATE_HAVE_PENDING_WRITERS_FLAG)
+
+struct pthread_rwlock_internal_t {
+  atomic_int state;
+  atomic_int writer_tid;
+
+  bool pshared;
+  bool writer_nonrecursive_preferred;
+  uint16_t __pad;
+
+// When a reader thread plans to suspend on the rwlock, it will add STATE_HAVE_PENDING_READERS_FLAG
+// in state, increase pending_reader_count, and wait on pending_reader_wakeup_serial. After woken
+// up, the reader thread decreases pending_reader_count, and the last pending reader thread should
+// remove STATE_HAVE_PENDING_READERS_FLAG in state. A pending writer thread works in a similar way,
+// except that it uses flag and members for writer threads.
+
+  Lock pending_lock;  // All pending members below are protected by pending_lock.
+  uint32_t pending_reader_count;  // Count of pending reader threads.
+  uint32_t pending_writer_count;  // Count of pending writer threads.
+  uint32_t pending_reader_wakeup_serial;  // Pending reader threads wait on this address by futex_wait.
+  uint32_t pending_writer_wakeup_serial;  // Pending writer threads wait on this address by futex_wait.
 
 #if defined(__LP64__)
-  char __reserved[36];
-#else
   char __reserved[20];
+#else
+  char __reserved[4];
 #endif
 };
 
-static inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
-  static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
-                "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
+static inline __always_inline bool __state_owned_by_writer(int state) {
+  return state < 0;
+}
+
+static inline __always_inline bool __state_owned_by_readers(int state) {
+  // If state >= 0, the owned_by_writer_flag is not set.
+  // And if state >= STATE_READER_COUNT_CHANGE_STEP, the reader_count field is not empty.
+  return state >= STATE_READER_COUNT_CHANGE_STEP;
+}
+
+static inline __always_inline bool __state_owned_by_readers_or_writer(int state) {
+  return state < 0 || state >= STATE_READER_COUNT_CHANGE_STEP;
+}
+
+static inline __always_inline int __state_add_writer_flag(int state) {
+  return state | STATE_OWNED_BY_WRITER_FLAG;
+}
+
+static inline __always_inline bool __state_is_last_reader(int state) {
+  return (state >> STATE_READER_COUNT_SHIFT) == 1;
+}
+
+static inline __always_inline bool __state_have_pending_writers(int state) {
+  return state & STATE_HAVE_PENDING_WRITERS_FLAG;
+}
+
+static inline __always_inline bool __state_have_pending_readers_or_writers(int state) {
+  return state & STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG;
+}
+
+static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
+              "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
+
+// For binary compatibility with old version of pthread_rwlock_t, we can't use more strict
+// alignment than 4-byte alignment.
+static_assert(alignof(pthread_rwlock_t) == 4,
+             "pthread_rwlock_t should fulfill the alignment requirement of pthread_rwlock_internal_t.");
+
+static inline __always_inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
   return reinterpret_cast<pthread_rwlock_internal_t*>(rwlock_interface);
 }
 
 int pthread_rwlock_init(pthread_rwlock_t* rwlock_interface, const pthread_rwlockattr_t* attr) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
 
-  if (__predict_true(attr == NULL)) {
-    rwlock->attr = 0;
-  } else {
-    switch (*attr) {
-      case PTHREAD_PROCESS_SHARED:
-      case PTHREAD_PROCESS_PRIVATE:
-        rwlock->attr= *attr;
+  memset(rwlock, 0, sizeof(pthread_rwlock_internal_t));
+
+  if (__predict_false(attr != NULL)) {
+    rwlock->pshared = __rwlockattr_getpshared(attr);
+    int kind = __rwlockattr_getkind(attr);
+    switch (kind) {
+      case PTHREAD_RWLOCK_PREFER_READER_NP:
+        rwlock->writer_nonrecursive_preferred = false;
+        break;
+      case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
+        rwlock->writer_nonrecursive_preferred = true;
         break;
       default:
         return EINVAL;
     }
+    if ((*attr & RWLOCKATTR_RESERVED_MASK) != 0) {
+      return EINVAL;
+    }
   }
 
   atomic_init(&rwlock->state, 0);
-  atomic_init(&rwlock->writer_thread_id, 0);
-  atomic_init(&rwlock->pending_readers, 0);
-  atomic_init(&rwlock->pending_writers, 0);
-
+  rwlock->pending_lock.init(rwlock->pshared);
   return 0;
 }
 
@@ -146,105 +260,173 @@
   return 0;
 }
 
+static inline __always_inline bool __can_acquire_read_lock(int old_state,
+                                                             bool writer_nonrecursive_preferred) {
+  // If writer is preferred with nonrecursive reader, we prevent further readers from acquiring
+  // the lock when there are writers waiting for the lock.
+  bool cannot_apply = __state_owned_by_writer(old_state) ||
+                      (writer_nonrecursive_preferred && __state_have_pending_writers(old_state));
+  return !cannot_apply;
+}
+
+static inline __always_inline int __pthread_rwlock_tryrdlock(pthread_rwlock_internal_t* rwlock) {
+  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
+
+  while (__predict_true(__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred))) {
+
+    int new_state = old_state + STATE_READER_COUNT_CHANGE_STEP;
+    if (__predict_false(!__state_owned_by_readers(new_state))) { // Happens when reader count overflows.
+      return EAGAIN;
+    }
+    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, new_state,
+                                              memory_order_acquire, memory_order_relaxed))) {
+      return 0;
+    }
+  }
+  return EBUSY;
+}
+
 static int __pthread_rwlock_timedrdlock(pthread_rwlock_internal_t* rwlock,
                                         const timespec* abs_timeout_or_null) {
 
-  if (__predict_false(__get_thread()->tid == atomic_load_explicit(&rwlock->writer_thread_id,
-                                                                  memory_order_relaxed))) {
+  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
     return EDEADLK;
   }
 
   while (true) {
+    int ret = __pthread_rwlock_tryrdlock(rwlock);
+    if (ret == 0 || ret == EAGAIN) {
+      return ret;
+    }
+
     int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-    if (__predict_true(old_state >= 0)) {
-      if (atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, old_state + 1,
-                                                memory_order_acquire, memory_order_relaxed)) {
-        return 0;
-      }
-    } else {
-      timespec ts;
-      timespec* rel_timeout = NULL;
+    if (__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
+      continue;
+    }
 
-      if (abs_timeout_or_null != NULL) {
-        rel_timeout = &ts;
-        if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
-          return ETIMEDOUT;
-        }
-      }
+    timespec ts;
+    timespec* rel_timeout = NULL;
 
-      // To avoid losing wake ups, the pending_readers increment should be observed before
-      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
-      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
-      // operations in futex_wait.
-      atomic_fetch_add_explicit(&rwlock->pending_readers, 1, memory_order_relaxed);
-
-      atomic_thread_fence(memory_order_seq_cst);
-
-      int ret = __futex_wait_ex(&rwlock->state, rwlock->process_shared(), old_state,
-                                rel_timeout);
-
-      atomic_fetch_sub_explicit(&rwlock->pending_readers, 1, memory_order_relaxed);
-
-      if (ret == -ETIMEDOUT) {
+    if (abs_timeout_or_null != NULL) {
+      rel_timeout = &ts;
+      if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
         return ETIMEDOUT;
       }
     }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_reader_count++;
+
+    // We rely on the fact that all atomic exchange operations on the same object (here it is
+    // rwlock->state) always appear to occur in a single total order. If the pending flag is added
+    // before unlocking, the unlocking thread will wakeup the waiter. Otherwise, we will see the
+    // state is unlocked and will not wait anymore.
+    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_READERS_FLAG,
+                                         memory_order_relaxed);
+
+    int old_serial = rwlock->pending_reader_wakeup_serial;
+    rwlock->pending_lock.unlock();
+
+    int futex_ret = 0;
+    if (!__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
+      futex_ret = __futex_wait_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared,
+                                  old_serial, rel_timeout);
+    }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_reader_count--;
+    if (rwlock->pending_reader_count == 0) {
+      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_READERS_FLAG,
+                                memory_order_relaxed);
+    }
+    rwlock->pending_lock.unlock();
+
+    if (futex_ret == -ETIMEDOUT) {
+      return ETIMEDOUT;
+    }
   }
 }
 
+static inline __always_inline bool __can_acquire_write_lock(int old_state) {
+  return !__state_owned_by_readers_or_writer(old_state);
+}
+
+static inline __always_inline int __pthread_rwlock_trywrlock(pthread_rwlock_internal_t* rwlock) {
+  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
+
+  while (__predict_true(__can_acquire_write_lock(old_state))) {
+    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
+          __state_add_writer_flag(old_state), memory_order_acquire, memory_order_relaxed))) {
+
+      atomic_store_explicit(&rwlock->writer_tid, __get_thread()->tid, memory_order_relaxed);
+      return 0;
+    }
+  }
+  return EBUSY;
+}
+
 static int __pthread_rwlock_timedwrlock(pthread_rwlock_internal_t* rwlock,
                                         const timespec* abs_timeout_or_null) {
 
-  if (__predict_false(__get_thread()->tid == atomic_load_explicit(&rwlock->writer_thread_id,
-                                                                  memory_order_relaxed))) {
+  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
     return EDEADLK;
   }
-
   while (true) {
+    int ret = __pthread_rwlock_trywrlock(rwlock);
+    if (ret == 0) {
+      return ret;
+    }
+
     int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-    if (__predict_true(old_state == 0)) {
-      if (atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, -1,
-                                                memory_order_acquire, memory_order_relaxed)) {
-        // writer_thread_id is protected by rwlock and can only be modified in rwlock write
-        // owner thread. Other threads may read it for EDEADLK error checking, atomic operation
-        // is safe enough for it.
-        atomic_store_explicit(&rwlock->writer_thread_id, __get_thread()->tid, memory_order_relaxed);
-        return 0;
-      }
-    } else {
-      timespec ts;
-      timespec* rel_timeout = NULL;
+    if (__can_acquire_write_lock(old_state)) {
+      continue;
+    }
 
-      if (abs_timeout_or_null != NULL) {
-        rel_timeout = &ts;
-        if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
-          return ETIMEDOUT;
-        }
-      }
+    timespec ts;
+    timespec* rel_timeout = NULL;
 
-      // To avoid losing wake ups, the pending_writers increment should be observed before
-      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
-      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
-      // operations in futex_wait.
-      atomic_fetch_add_explicit(&rwlock->pending_writers, 1, memory_order_relaxed);
-
-      atomic_thread_fence(memory_order_seq_cst);
-
-      int ret = __futex_wait_ex(&rwlock->state, rwlock->process_shared(), old_state,
-                                rel_timeout);
-
-      atomic_fetch_sub_explicit(&rwlock->pending_writers, 1, memory_order_relaxed);
-
-      if (ret == -ETIMEDOUT) {
+    if (abs_timeout_or_null != NULL) {
+      rel_timeout = &ts;
+      if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
         return ETIMEDOUT;
       }
     }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_writer_count++;
+
+    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_WRITERS_FLAG,
+                                         memory_order_relaxed);
+
+    int old_serial = rwlock->pending_writer_wakeup_serial;
+    rwlock->pending_lock.unlock();
+
+    int futex_ret = 0;
+    if (!__can_acquire_write_lock(old_state)) {
+      futex_ret = __futex_wait_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared,
+                                  old_serial, rel_timeout);
+    }
+
+    rwlock->pending_lock.lock();
+    rwlock->pending_writer_count--;
+    if (rwlock->pending_writer_count == 0) {
+      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_WRITERS_FLAG,
+                                memory_order_relaxed);
+    }
+    rwlock->pending_lock.unlock();
+
+    if (futex_ret == -ETIMEDOUT) {
+      return ETIMEDOUT;
+    }
   }
 }
 
 int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock_interface) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
+  // Avoid slowing down fast path of rdlock.
+  if (__predict_true(__pthread_rwlock_tryrdlock(rwlock) == 0)) {
+    return 0;
+  }
   return __pthread_rwlock_timedrdlock(rwlock, NULL);
 }
 
@@ -255,19 +437,15 @@
 }
 
 int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock_interface) {
-  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
-  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-
-  while (old_state >= 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
-                             old_state + 1, memory_order_acquire, memory_order_relaxed)) {
-  }
-  return (old_state >= 0) ? 0 : EBUSY;
+  return __pthread_rwlock_tryrdlock(__get_internal_rwlock(rwlock_interface));
 }
 
 int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock_interface) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
+  // Avoid slowing down fast path of wrlock.
+  if (__predict_true(__pthread_rwlock_trywrlock(rwlock) == 0)) {
+    return 0;
+  }
   return __pthread_rwlock_timedwrlock(rwlock, NULL);
 }
 
@@ -278,65 +456,52 @@
 }
 
 int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock_interface) {
-  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
-
-  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-
-  while (old_state == 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, -1,
-                                              memory_order_acquire, memory_order_relaxed)) {
-  }
-  if (old_state == 0) {
-    atomic_store_explicit(&rwlock->writer_thread_id, __get_thread()->tid, memory_order_relaxed);
-    return 0;
-  }
-  return EBUSY;
+  return __pthread_rwlock_trywrlock(__get_internal_rwlock(rwlock_interface));
 }
 
-
 int pthread_rwlock_unlock(pthread_rwlock_t* rwlock_interface) {
   pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
 
   int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
-  if (__predict_false(old_state == 0)) {
-    return EPERM;
-  } else if (old_state == -1) {
-    if (atomic_load_explicit(&rwlock->writer_thread_id, memory_order_relaxed) != __get_thread()->tid) {
+  if (__state_owned_by_writer(old_state)) {
+    if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) != __get_thread()->tid) {
       return EPERM;
     }
-    // We're no longer the owner.
-    atomic_store_explicit(&rwlock->writer_thread_id, 0, memory_order_relaxed);
-    // Change state from -1 to 0.
-    atomic_store_explicit(&rwlock->state, 0, memory_order_release);
-
-  } else { // old_state > 0
-    // Reduce state by 1.
-    while (old_state > 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
-                               old_state - 1, memory_order_release, memory_order_relaxed)) {
-    }
-
-    if (old_state <= 0) {
-      return EPERM;
-    } else if (old_state > 1) {
+    atomic_store_explicit(&rwlock->writer_tid, 0, memory_order_relaxed);
+    old_state = atomic_fetch_and_explicit(&rwlock->state, ~STATE_OWNED_BY_WRITER_FLAG,
+                                          memory_order_release);
+    if (!__state_have_pending_readers_or_writers(old_state)) {
       return 0;
     }
-    // old_state = 1, which means the last reader calling unlock. It has to wake up waiters.
+
+  } else if (__state_owned_by_readers(old_state)) {
+    old_state = atomic_fetch_sub_explicit(&rwlock->state, STATE_READER_COUNT_CHANGE_STEP,
+                                          memory_order_release);
+    if (!__state_is_last_reader(old_state) || !__state_have_pending_readers_or_writers(old_state)) {
+      return 0;
+    }
+
+  } else {
+    return EPERM;
   }
 
-  // If having waiters, wake up them.
-  // To avoid losing wake ups, the update of state should be observed before reading
-  // pending_readers/pending_writers by all threads. Use read locking as an example:
-  //     read locking thread                        unlocking thread
-  //      pending_readers++;                         state = 0;
-  //      seq_cst fence                              seq_cst fence
-  //      read state for futex_wait                  read pending_readers for futex_wake
-  //
-  // So when locking and unlocking threads are running in parallel, we will not get
-  // in a situation that the locking thread reads state as negative and needs to wait,
-  // while the unlocking thread reads pending_readers as zero and doesn't need to wake up waiters.
-  atomic_thread_fence(memory_order_seq_cst);
-  if (__predict_false(atomic_load_explicit(&rwlock->pending_readers, memory_order_relaxed) > 0 ||
-                      atomic_load_explicit(&rwlock->pending_writers, memory_order_relaxed) > 0)) {
-    __futex_wake_ex(&rwlock->state, rwlock->process_shared(), INT_MAX);
+  // Wake up pending readers or writers.
+  rwlock->pending_lock.lock();
+  if (rwlock->pending_writer_count != 0) {
+    rwlock->pending_writer_wakeup_serial++;
+    rwlock->pending_lock.unlock();
+
+    __futex_wake_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared, 1);
+
+  } else if (rwlock->pending_reader_count != 0) {
+    rwlock->pending_reader_wakeup_serial++;
+    rwlock->pending_lock.unlock();
+
+    __futex_wake_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared, INT_MAX);
+
+  } else {
+    // It happens when waiters are woken up by timeout.
+    rwlock->pending_lock.unlock();
   }
   return 0;
 }
diff --git a/libc/bionic/pthread_setname_np.cpp b/libc/bionic/pthread_setname_np.cpp
index c4e9fb8..bb1114e 100644
--- a/libc/bionic/pthread_setname_np.cpp
+++ b/libc/bionic/pthread_setname_np.cpp
@@ -36,9 +36,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-#include "pthread_accessor.h"
-#include "pthread_internal.h"
 #include "private/ErrnoRestorer.h"
+#include "pthread_internal.h"
 
 // This value is not exported by kernel headers.
 #define MAX_TASK_COMM_LEN 16
@@ -58,14 +57,12 @@
   }
 
   // We have to change another thread's name.
-  pid_t tid = 0;
-  {
-    pthread_accessor thread(t);
-    if (thread.get() == NULL) {
-      return ENOENT;
-    }
-    tid = thread->tid;
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
+    return ENOENT;
   }
+  pid_t tid = thread->tid;
+
   char comm_name[sizeof(TASK_COMM_FMT) + 8];
   snprintf(comm_name, sizeof(comm_name), TASK_COMM_FMT, tid);
   int fd = open(comm_name, O_CLOEXEC | O_WRONLY);
diff --git a/libc/bionic/pthread_setschedparam.cpp b/libc/bionic/pthread_setschedparam.cpp
index 419cc6f..0ad68bb 100644
--- a/libc/bionic/pthread_setschedparam.cpp
+++ b/libc/bionic/pthread_setschedparam.cpp
@@ -29,13 +29,13 @@
 #include <errno.h>
 
 #include "private/ErrnoRestorer.h"
-#include "pthread_accessor.h"
+#include "pthread_internal.h"
 
 int pthread_setschedparam(pthread_t t, int policy, const sched_param* param) {
   ErrnoRestorer errno_restorer;
 
-  pthread_accessor thread(t);
-  if (thread.get() == NULL) {
+  pthread_internal_t* thread = __pthread_internal_find(t);
+  if (thread == NULL) {
     return ESRCH;
   }
 
diff --git a/libc/bionic/pty.cpp b/libc/bionic/pty.cpp
index 8847147..1a37847 100644
--- a/libc/bionic/pty.cpp
+++ b/libc/bionic/pty.cpp
@@ -38,8 +38,8 @@
 
 #include "private/ThreadLocalBuffer.h"
 
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(ptsname);
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(ttyname);
+static ThreadLocalBuffer<char, 32> g_ptsname_tls_buffer;
+static ThreadLocalBuffer<char, 64> g_ttyname_tls_buffer;
 
 int getpt() {
   return posix_openpt(O_RDWR|O_NOCTTY);
@@ -54,9 +54,9 @@
 }
 
 char* ptsname(int fd) {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, ptsname, 32);
-  int error = ptsname_r(fd, ptsname_tls_buffer, ptsname_tls_buffer_size);
-  return (error == 0) ? ptsname_tls_buffer : NULL;
+  char* buf = g_ptsname_tls_buffer.get();
+  int error = ptsname_r(fd, buf, g_ptsname_tls_buffer.size());
+  return (error == 0) ? buf : NULL;
 }
 
 int ptsname_r(int fd, char* buf, size_t len) {
@@ -80,9 +80,9 @@
 }
 
 char* ttyname(int fd) {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, ttyname, 64);
-  int error = ttyname_r(fd, ttyname_tls_buffer, ttyname_tls_buffer_size);
-  return (error == 0) ? ttyname_tls_buffer : NULL;
+  char* buf = g_ttyname_tls_buffer.get();
+  int error = ttyname_r(fd, buf, g_ttyname_tls_buffer.size());
+  return (error == 0) ? buf : NULL;
 }
 
 int ttyname_r(int fd, char* buf, size_t len) {
diff --git a/libc/bionic/readlink.cpp b/libc/bionic/readlink.cpp
index 3bb7bc1..a53f933 100644
--- a/libc/bionic/readlink.cpp
+++ b/libc/bionic/readlink.cpp
@@ -26,6 +26,8 @@
  * SUCH DAMAGE.
  */
 
+#undef _FORTIFY_SOURCE
+
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
diff --git a/libc/bionic/statvfs.cpp b/libc/bionic/statvfs.cpp
index f1e2833..39ffb64 100644
--- a/libc/bionic/statvfs.cpp
+++ b/libc/bionic/statvfs.cpp
@@ -21,13 +21,17 @@
 // Paper over the fact that 32-bit kernels use fstatfs64/statfs64 with an extra argument,
 // but 64-bit kernels don't have the "64" bit suffix or the extra size_t argument.
 #if __LP64__
-#  define __fstatfs64(fd,size,buf) fstatfs(fd,buf)
-#  define __statfs64(path,size,buf) statfs(path,buf)
+extern "C" int __fstatfs(int, struct statfs*);
+extern "C" int __statfs(const char*, struct statfs*);
+#  define __fstatfs64(fd,size,buf) __fstatfs(fd,buf)
+#  define __statfs64(path,size,buf) __statfs(path,buf)
 #else
 extern "C" int __fstatfs64(int, size_t, struct statfs*);
 extern "C" int __statfs64(const char*, size_t, struct statfs*);
 #endif
 
+// The kernel sets a private ST_VALID flag to signal to the C library whether the
+// f_flags field is valid. This flag should not be exposed to users of the C library.
 #define ST_VALID 0x0020
 
 static void __statfs_to_statvfs(const struct statfs& in, struct statvfs* out) {
@@ -40,13 +44,33 @@
   out->f_ffree = in.f_ffree;
   out->f_favail = in.f_ffree;
   out->f_fsid = in.f_fsid.__val[0] | (static_cast<uint64_t>(in.f_fsid.__val[1]) << 32);
-  out->f_flag = in.f_flags & ~ST_VALID;
+  out->f_flag = in.f_flags;
   out->f_namemax = in.f_namelen;
 }
 
+int fstatfs(int fd, struct statfs* result) {
+  int rc = __fstatfs64(fd, sizeof(*result), result);
+  if (rc != 0) {
+    return rc;
+  }
+  result->f_flags &= ~ST_VALID;
+  return 0;
+}
+__strong_alias(fstatfs64, fstatfs);
+
+int statfs(const char* path, struct statfs* result) {
+  int rc = __statfs64(path, sizeof(*result), result);
+  if (rc != 0) {
+    return rc;
+  }
+  result->f_flags &= ~ST_VALID;
+  return 0;
+}
+__strong_alias(statfs64, statfs);
+
 int statvfs(const char* path, struct statvfs* result) {
   struct statfs tmp;
-  int rc = __statfs64(path, sizeof(tmp), &tmp);
+  int rc = statfs(path, &tmp);
   if (rc != 0) {
     return rc;
   }
@@ -57,7 +81,7 @@
 
 int fstatvfs(int fd, struct statvfs* result) {
   struct statfs tmp;
-  int rc = __fstatfs64(fd, sizeof(tmp), &tmp);
+  int rc = fstatfs(fd, &tmp);
   if (rc != 0) {
     return rc;
   }
diff --git a/libc/bionic/strerror.cpp b/libc/bionic/strerror.cpp
index d1518ff..f74194f 100644
--- a/libc/bionic/strerror.cpp
+++ b/libc/bionic/strerror.cpp
@@ -31,16 +31,16 @@
 
 extern "C" const char* __strerror_lookup(int);
 
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(strerror);
+static ThreadLocalBuffer<char, NL_TEXTMAX> g_strerror_tls_buffer;
 
 char* strerror(int error_number) {
   // Just return the original constant in the easy cases.
   char* result = const_cast<char*>(__strerror_lookup(error_number));
-  if (result != NULL) {
+  if (result != nullptr) {
     return result;
   }
 
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, strerror, NL_TEXTMAX);
-  strerror_r(error_number, strerror_tls_buffer, strerror_tls_buffer_size);
-  return strerror_tls_buffer;
+  result = g_strerror_tls_buffer.get();
+  strerror_r(error_number, result, g_strerror_tls_buffer.size());
+  return result;
 }
diff --git a/libc/bionic/strsignal.cpp b/libc/bionic/strsignal.cpp
index 9f0193a..c389ddd 100644
--- a/libc/bionic/strsignal.cpp
+++ b/libc/bionic/strsignal.cpp
@@ -32,7 +32,7 @@
 extern "C" const char* __strsignal_lookup(int);
 extern "C" const char* __strsignal(int, char*, size_t);
 
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(strsignal);
+static ThreadLocalBuffer<char, NL_TEXTMAX> g_strsignal_tls_buffer;
 
 char* strsignal(int signal_number) {
   // Just return the original constant in the easy cases.
@@ -41,6 +41,6 @@
     return result;
   }
 
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, strsignal, NL_TEXTMAX);
-  return const_cast<char*>(__strsignal(signal_number, strsignal_tls_buffer, strsignal_tls_buffer_size));
+  return const_cast<char*>(__strsignal(signal_number, g_strsignal_tls_buffer.get(),
+                                       g_strsignal_tls_buffer.size()));
 }
diff --git a/libc/bionic/stubs.cpp b/libc/bionic/stubs.cpp
index f9a31b9..b57aeda 100644
--- a/libc/bionic/stubs.cpp
+++ b/libc/bionic/stubs.cpp
@@ -35,6 +35,7 @@
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 
 #include "private/android_filesystem_config.h"
@@ -49,25 +50,12 @@
 // functions to share state, but <grp.h> functions can't clobber <passwd.h>
 // functions' state and vice versa.
 
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(group);
-
 struct group_state_t {
   group group_;
   char* group_members_[2];
   char group_name_buffer_[32];
 };
 
-static group_state_t* __group_state() {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(group_state_t*, group, sizeof(group_state_t));
-  if (group_tls_buffer != NULL) {
-    memset(group_tls_buffer, 0, sizeof(group_state_t));
-    group_tls_buffer->group_.gr_mem = group_tls_buffer->group_members_;
-  }
-  return group_tls_buffer;
-}
-
-GLOBAL_INIT_THREAD_LOCAL_BUFFER(passwd);
-
 struct passwd_state_t {
   passwd passwd_;
   char name_buffer_[32];
@@ -75,9 +63,16 @@
   char sh_buffer_[32];
 };
 
-static passwd_state_t* __passwd_state() {
-  LOCAL_INIT_THREAD_LOCAL_BUFFER(passwd_state_t*, passwd, sizeof(passwd_state_t));
-  return passwd_tls_buffer;
+static ThreadLocalBuffer<group_state_t> g_group_tls_buffer;
+static ThreadLocalBuffer<passwd_state_t> g_passwd_tls_buffer;
+
+static group_state_t* __group_state() {
+  group_state_t* result = g_group_tls_buffer.get();
+  if (result != nullptr) {
+    memset(result, 0, sizeof(group_state_t));
+    result->group_.gr_mem = result->group_members_;
+  }
+  return result;
 }
 
 static int do_getpw_r(int by_name, const char* name, uid_t uid,
@@ -250,6 +245,7 @@
         appid = android_ids[n].aid;
         // Move the end pointer to the null terminator.
         end += strlen(android_ids[n].name) + 1;
+        break;
       }
     }
   }
@@ -361,7 +357,7 @@
 }
 
 passwd* getpwuid(uid_t uid) { // NOLINT: implementing bad function.
-  passwd_state_t* state = __passwd_state();
+  passwd_state_t* state = g_passwd_tls_buffer.get();
   if (state == NULL) {
     return NULL;
   }
@@ -374,7 +370,7 @@
 }
 
 passwd* getpwnam(const char* login) { // NOLINT: implementing bad function.
-  passwd_state_t* state = __passwd_state();
+  passwd_state_t* state = g_passwd_tls_buffer.get();
   if (state == NULL) {
     return NULL;
   }
diff --git a/libc/dns/resolv/res_state.c b/libc/dns/resolv/res_state.c
index 459f073..afccd99 100644
--- a/libc/dns/resolv/res_state.c
+++ b/libc/dns/resolv/res_state.c
@@ -39,8 +39,6 @@
 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
 #include <sys/_system_properties.h>
 
-#include "private/ThreadLocalBuffer.h"
-
 /* Set to 1 to enable debug traces */
 #define DEBUG 0
 
@@ -105,7 +103,11 @@
     free(rt);
 }
 
-BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(_res_key, _res_thread_free);
+static pthread_key_t _res_key;
+
+__attribute__((constructor)) static void __res_key_init() {
+    pthread_key_create(&_res_key, _res_thread_free);
+}
 
 static _res_thread*
 _res_thread_get(void)
diff --git a/libc/include/android/dlext.h b/libc/include/android/dlext.h
index 90daf30..f10a8a2 100644
--- a/libc/include/android/dlext.h
+++ b/libc/include/android/dlext.h
@@ -59,16 +59,28 @@
   /* If opening a library using library_fd read it starting at library_fd_offset.
    * This flag is only valid when ANDROID_DLEXT_USE_LIBRARY_FD is set.
    */
-
   ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET    = 0x20,
 
+  /* When set, do not check if the library has already been loaded by file stat(2)s.
+   *
+   * This flag allows forced loading of the library in the case when for some
+   * reason multiple ELF files share the same filename (because the already-loaded
+   * library has been removed and overwritten, for example).
+   *
+   * Note that if the library has the same dt_soname as an old one and some other
+   * library has the soname in DT_NEEDED list, the first one will be used to resolve any
+   * dependencies.
+   */
+  ANDROID_DLEXT_FORCE_LOAD = 0x40,
+
   /* Mask of valid bits */
   ANDROID_DLEXT_VALID_FLAG_BITS       = ANDROID_DLEXT_RESERVED_ADDRESS |
                                         ANDROID_DLEXT_RESERVED_ADDRESS_HINT |
                                         ANDROID_DLEXT_WRITE_RELRO |
                                         ANDROID_DLEXT_USE_RELRO |
                                         ANDROID_DLEXT_USE_LIBRARY_FD |
-                                        ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET,
+                                        ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET |
+                                        ANDROID_DLEXT_FORCE_LOAD,
 };
 
 typedef struct {
diff --git a/libc/include/elf.h b/libc/include/elf.h
index ee53ad1..df768ba 100644
--- a/libc/include/elf.h
+++ b/libc/include/elf.h
@@ -48,6 +48,71 @@
   } a_un;
 } Elf64_auxv_t;
 
+typedef Elf32_Half Elf32_Versym;
+typedef Elf64_Half Elf64_Versym;
+
+typedef struct {
+  Elf32_Half vd_version;
+  Elf32_Half vd_flags;
+  Elf32_Half vd_ndx;
+  Elf32_Half vd_cnt;
+  Elf32_Word vd_hash;
+  Elf32_Word vd_aux;
+  Elf32_Word vd_next;
+} Elf32_Verdef;
+
+typedef struct {
+  Elf32_Word vda_name;
+  Elf32_Word vda_next;
+} Elf32_Verdaux;
+
+typedef struct {
+  Elf64_Half vd_version;
+  Elf64_Half vd_flags;
+  Elf64_Half vd_ndx;
+  Elf64_Half vd_cnt;
+  Elf64_Word vd_hash;
+  Elf64_Word vd_aux;
+  Elf64_Word vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+  Elf64_Word vda_name;
+  Elf64_Word vda_next;
+} Elf64_Verdaux;
+
+typedef struct {
+  Elf32_Half vn_version;
+  Elf32_Half vn_cnt;
+  Elf32_Word vn_file;
+  Elf32_Word vn_aux;
+  Elf32_Word vn_next;
+} Elf32_Verneed;
+
+typedef struct {
+  Elf32_Word vna_hash;
+  Elf32_Half vna_flags;
+  Elf32_Half vna_other;
+  Elf32_Word vna_name;
+  Elf32_Word vna_next;
+} Elf32_Vernaux;
+
+typedef struct {
+  Elf64_Half vn_version;
+  Elf64_Half vn_cnt;
+  Elf64_Word vn_file;
+  Elf64_Word vn_aux;
+  Elf64_Word vn_next;
+} Elf64_Verneed;
+
+typedef struct {
+  Elf64_Word vna_hash;
+  Elf64_Half vna_flags;
+  Elf64_Half vna_other;
+  Elf64_Word vna_name;
+  Elf64_Word vna_next;
+} Elf64_Vernaux;
+
 #define DF_ORIGIN     0x00000001
 #define DF_SYMBOLIC   0x00000002
 #define DF_TEXTREL    0x00000004
@@ -129,4 +194,10 @@
 
 #define NT_GNU_BUILD_ID 3
 
+#define VER_FLG_BASE 0x1
+#define VER_FLG_WEAK 0x2
+
+#define VER_NDX_LOCAL  0
+#define VER_NDX_GLOBAL 1
+
 #endif /* _ELF_H */
diff --git a/libc/include/paths.h b/libc/include/paths.h
index 33c2eee..82c2804 100644
--- a/libc/include/paths.h
+++ b/libc/include/paths.h
@@ -32,18 +32,13 @@
 #ifndef _PATHS_H_
 #define	_PATHS_H_
 
-/* Default search path. */
-#define	_PATH_DEFPATH	"/system/bin:/system/xbin"
-
 #define	_PATH_BSHELL	"/system/bin/sh"
 #define	_PATH_CONSOLE	"/dev/console"
+#define	_PATH_DEFPATH	"/sbin:/vendor/bin:/system/sbin:/system/bin:/system/xbin"
+#define	_PATH_DEV	"/dev/"
 #define	_PATH_DEVNULL	"/dev/null"
 #define	_PATH_KLOG	"/proc/kmsg"
-
 #define	_PATH_MOUNTED	"/proc/mounts"
 #define	_PATH_TTY	"/dev/tty"
 
-/* Provide trailing slash, since mostly used for building pathnames. */
-#define	_PATH_DEV	"/dev/"
-
 #endif /* !_PATHS_H_ */
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index c701e30..26d68e4 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -36,30 +36,15 @@
 #include <sys/types.h>
 #include <time.h>
 
-#if defined(__LP64__)
-  #define __RESERVED_INITIALIZER , {0}
-#else
-  #define __RESERVED_INITIALIZER
-#endif
-
 typedef struct {
-  int value;
-#ifdef __LP64__
-  char __reserved[36];
+#if defined(__LP64__)
+  int32_t __private[10];
+#else
+  int32_t __private[1];
 #endif
 } pthread_mutex_t;
 
-#define  __PTHREAD_MUTEX_INIT_VALUE            0
-#define  __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE  0x4000
-#define  __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE 0x8000
-
-#define PTHREAD_MUTEX_INITIALIZER {__PTHREAD_MUTEX_INIT_VALUE __RESERVED_INITIALIZER}
-#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {__PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE __RESERVED_INITIALIZER}
-#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP {__PTHREAD_RECURSIVE_MUTEX_INIT_VALUE __RESERVED_INITIALIZER}
-
-/* TODO: remove this namespace pollution. */
-#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
-#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP
+typedef long pthread_mutexattr_t;
 
 enum {
     PTHREAD_MUTEX_NORMAL = 0,
@@ -72,30 +57,39 @@
     PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL
 };
 
-typedef struct {
-  unsigned int value;
-#ifdef __LP64__
-  char __reserved[44];
-#endif
-} pthread_cond_t;
-
-#define PTHREAD_COND_INITIALIZER  {0 __RESERVED_INITIALIZER}
-
-typedef long pthread_mutexattr_t;
-typedef long pthread_condattr_t;
-
-typedef long pthread_rwlockattr_t;
+#define PTHREAD_MUTEX_INITIALIZER { { ((PTHREAD_MUTEX_NORMAL & 3) << 14) } }
+#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP { { ((PTHREAD_MUTEX_RECURSIVE & 3) << 14) } }
+#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP { { ((PTHREAD_MUTEX_ERRORCHECK & 3) << 14) } }
 
 typedef struct {
 #if defined(__LP64__)
-  char __private[56];
+  int32_t __private[12];
 #else
-  char __private[40];
+  int32_t __private[1];
+#endif
+} pthread_cond_t;
+
+typedef long pthread_condattr_t;
+
+#define PTHREAD_COND_INITIALIZER  { { 0 } }
+
+typedef struct {
+#if defined(__LP64__)
+  int32_t __private[14];
+#else
+  int32_t __private[10];
 #endif
 } pthread_rwlock_t;
 
+typedef long pthread_rwlockattr_t;
+
 #define PTHREAD_RWLOCK_INITIALIZER  { { 0 } }
 
+enum {
+  PTHREAD_RWLOCK_PREFER_READER_NP = 0,
+  PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP = 1,
+};
+
 typedef int pthread_key_t;
 
 typedef int pthread_once_t;
@@ -189,10 +183,12 @@
 
 int pthread_once(pthread_once_t*, void (*)(void)) __nonnull((1, 2));
 
+int pthread_rwlockattr_init(pthread_rwlockattr_t*) __nonnull((1));
 int pthread_rwlockattr_destroy(pthread_rwlockattr_t*) __nonnull((1));
 int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t*, int*) __nonnull((1, 2));
-int pthread_rwlockattr_init(pthread_rwlockattr_t*) __nonnull((1));
 int pthread_rwlockattr_setpshared(pthread_rwlockattr_t*, int) __nonnull((1));
+int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t*, int*) __nonnull((1, 2));
+int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t*, int) __nonnull((1));
 
 int pthread_rwlock_destroy(pthread_rwlock_t*) __nonnull((1));
 int pthread_rwlock_init(pthread_rwlock_t*, const pthread_rwlockattr_t*) __nonnull((1));
diff --git a/libc/include/sys/_system_properties.h b/libc/include/sys/_system_properties.h
index 44fe991..7ff3ded 100644
--- a/libc/include/sys/_system_properties.h
+++ b/libc/include/sys/_system_properties.h
@@ -80,7 +80,6 @@
 
 #define PROP_PATH_RAMDISK_DEFAULT  "/default.prop"
 #define PROP_PATH_SYSTEM_BUILD     "/system/build.prop"
-#define PROP_PATH_SYSTEM_DEFAULT   "/system/default.prop"
 #define PROP_PATH_VENDOR_BUILD     "/vendor/build.prop"
 #define PROP_PATH_BOOTIMAGE_BUILD  "/build.prop"
 #define PROP_PATH_LOCAL_OVERRIDE   "/data/local.prop"
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index 92d3abe..f0de29e 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -224,13 +224,89 @@
     } while (_rc == -1 && errno == EINTR); \
     _rc; })
 
+extern ssize_t __pread_chk(int, void*, size_t, off_t, size_t);
+__errordecl(__pread_dest_size_error, "pread called with size bigger than destination");
+__errordecl(__pread_count_toobig_error, "pread called with count > SSIZE_MAX");
+extern ssize_t __pread_real(int, void*, size_t, off_t) __RENAME(pread);
+
+extern ssize_t __pread64_chk(int, void*, size_t, off64_t, size_t);
+__errordecl(__pread64_dest_size_error, "pread64 called with size bigger than destination");
+__errordecl(__pread64_count_toobig_error, "pread64 called with count > SSIZE_MAX");
+extern ssize_t __pread64_real(int, void*, size_t, off64_t) __RENAME(pread64);
+
 extern ssize_t __read_chk(int, void*, size_t, size_t);
 __errordecl(__read_dest_size_error, "read called with size bigger than destination");
 __errordecl(__read_count_toobig_error, "read called with count > SSIZE_MAX");
 extern ssize_t __read_real(int, void*, size_t) __RENAME(read);
 
+extern ssize_t __readlink_chk(const char*, char*, size_t, size_t);
+__errordecl(__readlink_dest_size_error, "readlink called with size bigger than destination");
+__errordecl(__readlink_size_toobig_error, "readlink called with size > SSIZE_MAX");
+extern ssize_t __readlink_real(const char*, char*, size_t) __RENAME(readlink);
+
+extern ssize_t __readlinkat_chk(int dirfd, const char*, char*, size_t, size_t);
+__errordecl(__readlinkat_dest_size_error, "readlinkat called with size bigger than destination");
+__errordecl(__readlinkat_size_toobig_error, "readlinkat called with size > SSIZE_MAX");
+extern ssize_t __readlinkat_real(int dirfd, const char*, char*, size_t) __RENAME(readlinkat);
+
 #if defined(__BIONIC_FORTIFY)
 
+#if defined(__USE_FILE_OFFSET64)
+#define __PREAD_PREFIX(x) __pread64_ ## x
+#else
+#define __PREAD_PREFIX(x) __pread_ ## x
+#endif
+
+__BIONIC_FORTIFY_INLINE
+ssize_t pread(int fd, void* buf, size_t count, off_t offset) {
+    size_t bos = __bos0(buf);
+
+#if !defined(__clang__)
+    if (__builtin_constant_p(count) && (count > SSIZE_MAX)) {
+        __PREAD_PREFIX(count_toobig_error)();
+    }
+
+    if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+        return __PREAD_PREFIX(real)(fd, buf, count, offset);
+    }
+
+    if (__builtin_constant_p(count) && (count > bos)) {
+        __PREAD_PREFIX(dest_size_error)();
+    }
+
+    if (__builtin_constant_p(count) && (count <= bos)) {
+        return __PREAD_PREFIX(real)(fd, buf, count, offset);
+    }
+#endif
+
+    return __PREAD_PREFIX(chk)(fd, buf, count, offset, bos);
+}
+
+__BIONIC_FORTIFY_INLINE
+ssize_t pread64(int fd, void* buf, size_t count, off64_t offset) {
+    size_t bos = __bos0(buf);
+
+#if !defined(__clang__)
+    if (__builtin_constant_p(count) && (count > SSIZE_MAX)) {
+        __pread64_count_toobig_error();
+    }
+
+    if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+        return __pread64_real(fd, buf, count, offset);
+    }
+
+    if (__builtin_constant_p(count) && (count > bos)) {
+        __pread64_dest_size_error();
+    }
+
+    if (__builtin_constant_p(count) && (count <= bos)) {
+        return __pread64_real(fd, buf, count, offset);
+    }
+#endif
+
+    return __pread64_chk(fd, buf, count, offset, bos);
+}
+
 __BIONIC_FORTIFY_INLINE
 ssize_t read(int fd, void* buf, size_t count) {
     size_t bos = __bos0(buf);
@@ -255,6 +331,57 @@
 
     return __read_chk(fd, buf, count, bos);
 }
+
+__BIONIC_FORTIFY_INLINE
+ssize_t readlink(const char* path, char* buf, size_t size) {
+    size_t bos = __bos(buf);
+
+#if !defined(__clang__)
+    if (__builtin_constant_p(size) && (size > SSIZE_MAX)) {
+        __readlink_size_toobig_error();
+    }
+
+    if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+        return __readlink_real(path, buf, size);
+    }
+
+    if (__builtin_constant_p(size) && (size > bos)) {
+        __readlink_dest_size_error();
+    }
+
+    if (__builtin_constant_p(size) && (size <= bos)) {
+        return __readlink_real(path, buf, size);
+    }
+#endif
+
+    return __readlink_chk(path, buf, size, bos);
+}
+
+__BIONIC_FORTIFY_INLINE
+ssize_t readlinkat(int dirfd, const char* path, char* buf, size_t size) {
+    size_t bos = __bos(buf);
+
+#if !defined(__clang__)
+    if (__builtin_constant_p(size) && (size > SSIZE_MAX)) {
+        __readlinkat_size_toobig_error();
+    }
+
+    if (bos == __BIONIC_FORTIFY_UNKNOWN_SIZE) {
+        return __readlinkat_real(dirfd, path, buf, size);
+    }
+
+    if (__builtin_constant_p(size) && (size > bos)) {
+        __readlinkat_dest_size_error();
+    }
+
+    if (__builtin_constant_p(size) && (size <= bos)) {
+        return __readlinkat_real(dirfd, path, buf, size);
+    }
+#endif
+
+    return __readlinkat_chk(dirfd, path, buf, size, bos);
+}
+
 #endif /* defined(__BIONIC_FORTIFY) */
 
 __END_DECLS
diff --git a/libc/kernel/uapi/linux/can/netlink.h b/libc/kernel/uapi/linux/can/netlink.h
index 96a90ff..a1c3159 100644
--- a/libc/kernel/uapi/linux/can/netlink.h
+++ b/libc/kernel/uapi/linux/can/netlink.h
@@ -79,33 +79,35 @@
 #define CAN_CTRLMODE_BERR_REPORTING 0x10
 #define CAN_CTRLMODE_FD 0x20
 #define CAN_CTRLMODE_PRESUME_ACK 0x40
-struct can_device_stats {
+#define CAN_CTRLMODE_FD_NON_ISO 0x80
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct can_device_stats {
   __u32 bus_error;
   __u32 error_warning;
   __u32 error_passive;
-  __u32 bus_off;
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  __u32 bus_off;
   __u32 arbitration_lost;
   __u32 restarts;
 };
-enum {
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+enum {
   IFLA_CAN_UNSPEC,
   IFLA_CAN_BITTIMING,
   IFLA_CAN_BITTIMING_CONST,
-  IFLA_CAN_CLOCK,
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  IFLA_CAN_CLOCK,
   IFLA_CAN_STATE,
   IFLA_CAN_CTRLMODE,
   IFLA_CAN_RESTART_MS,
-  IFLA_CAN_RESTART,
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  IFLA_CAN_RESTART,
   IFLA_CAN_BERR_COUNTER,
   IFLA_CAN_DATA_BITTIMING,
   IFLA_CAN_DATA_BITTIMING_CONST,
-  __IFLA_CAN_MAX
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  __IFLA_CAN_MAX
 };
 #define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1)
 #endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
diff --git a/libc/kernel/uapi/linux/in6.h b/libc/kernel/uapi/linux/in6.h
index 15bde3d..e54bc33 100644
--- a/libc/kernel/uapi/linux/in6.h
+++ b/libc/kernel/uapi/linux/in6.h
@@ -128,84 +128,87 @@
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_TLV_JUMBO 194
 #define IPV6_TLV_HAO 201
+#if __UAPI_DEF_IPV6_OPTIONS
 #define IPV6_ADDRFORM 1
-#define IPV6_2292PKTINFO 2
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_2292PKTINFO 2
 #define IPV6_2292HOPOPTS 3
 #define IPV6_2292DSTOPTS 4
 #define IPV6_2292RTHDR 5
-#define IPV6_2292PKTOPTIONS 6
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_2292PKTOPTIONS 6
 #define IPV6_CHECKSUM 7
 #define IPV6_2292HOPLIMIT 8
 #define IPV6_NEXTHOP 9
-#define IPV6_AUTHHDR 10
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_AUTHHDR 10
 #define IPV6_FLOWINFO 11
 #define IPV6_UNICAST_HOPS 16
 #define IPV6_MULTICAST_IF 17
-#define IPV6_MULTICAST_HOPS 18
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_MULTICAST_HOPS 18
 #define IPV6_MULTICAST_LOOP 19
 #define IPV6_ADD_MEMBERSHIP 20
 #define IPV6_DROP_MEMBERSHIP 21
-#define IPV6_ROUTER_ALERT 22
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_ROUTER_ALERT 22
 #define IPV6_MTU_DISCOVER 23
 #define IPV6_MTU 24
 #define IPV6_RECVERR 25
-#define IPV6_V6ONLY 26
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_V6ONLY 26
 #define IPV6_JOIN_ANYCAST 27
 #define IPV6_LEAVE_ANYCAST 28
 #define IPV6_PMTUDISC_DONT 0
-#define IPV6_PMTUDISC_WANT 1
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_PMTUDISC_WANT 1
 #define IPV6_PMTUDISC_DO 2
 #define IPV6_PMTUDISC_PROBE 3
 #define IPV6_PMTUDISC_INTERFACE 4
-#define IPV6_PMTUDISC_OMIT 5
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_PMTUDISC_OMIT 5
 #define IPV6_FLOWLABEL_MGR 32
 #define IPV6_FLOWINFO_SEND 33
 #define IPV6_IPSEC_POLICY 34
-#define IPV6_XFRM_POLICY 35
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define IPV6_XFRM_POLICY 35
+#endif
 #define IPV6_RECVPKTINFO 49
 #define IPV6_PKTINFO 50
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_RECVHOPLIMIT 51
 #define IPV6_HOPLIMIT 52
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_RECVHOPOPTS 53
 #define IPV6_HOPOPTS 54
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_RTHDRDSTOPTS 55
 #define IPV6_RECVRTHDR 56
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_RTHDR 57
 #define IPV6_RECVDSTOPTS 58
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_DSTOPTS 59
 #define IPV6_RECVPATHMTU 60
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_PATHMTU 61
 #define IPV6_DONTFRAG 62
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_RECVTCLASS 66
 #define IPV6_TCLASS 67
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_AUTOFLOWLABEL 70
 #define IPV6_ADDR_PREFERENCES 72
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_PREFER_SRC_TMP 0x0001
 #define IPV6_PREFER_SRC_PUBLIC 0x0002
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100
 #define IPV6_PREFER_SRC_COA 0x0004
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_PREFER_SRC_HOME 0x0400
 #define IPV6_PREFER_SRC_CGA 0x0008
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_PREFER_SRC_NONCGA 0x0800
 #define IPV6_MINHOPCOUNT 73
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_ORIGDSTADDR 74
 #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define IPV6_TRANSPARENT 75
 #define IPV6_UNICAST_IF 76
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #endif
diff --git a/libc/kernel/uapi/linux/libc-compat.h b/libc/kernel/uapi/linux/libc-compat.h
index 7854520..b66ebe2 100644
--- a/libc/kernel/uapi/linux/libc-compat.h
+++ b/libc/kernel/uapi/linux/libc-compat.h
@@ -32,29 +32,33 @@
 #define __UAPI_DEF_IPV6_MREQ 0
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define __UAPI_DEF_IPPROTO_V6 0
+#define __UAPI_DEF_IPV6_OPTIONS 0
 #else
 #define __UAPI_DEF_IN6_ADDR 1
-#define __UAPI_DEF_IN6_ADDR_ALT 1
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define __UAPI_DEF_IN6_ADDR_ALT 1
 #define __UAPI_DEF_SOCKADDR_IN6 1
 #define __UAPI_DEF_IPV6_MREQ 1
 #define __UAPI_DEF_IPPROTO_V6 1
-#endif
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define __UAPI_DEF_IPV6_OPTIONS 1
+#endif
 #ifdef _SYS_XATTR_H
 #define __UAPI_DEF_XATTR 0
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #else
 #define __UAPI_DEF_XATTR 1
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #endif
 #else
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define __UAPI_DEF_IN6_ADDR 1
 #define __UAPI_DEF_IN6_ADDR_ALT 1
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define __UAPI_DEF_SOCKADDR_IN6 1
 #define __UAPI_DEF_IPV6_MREQ 1
-#define __UAPI_DEF_IPPROTO_V6 1
-#define __UAPI_DEF_XATTR 1
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define __UAPI_DEF_IPPROTO_V6 1
+#define __UAPI_DEF_IPV6_OPTIONS 1
+#define __UAPI_DEF_XATTR 1
 #endif
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #endif
diff --git a/libc/kernel/uapi/linux/target_core_user.h b/libc/kernel/uapi/linux/target_core_user.h
index ce6d26d..7e0cf43 100644
--- a/libc/kernel/uapi/linux/target_core_user.h
+++ b/libc/kernel/uapi/linux/target_core_user.h
@@ -21,75 +21,71 @@
 #include <linux/types.h>
 #include <linux/uio.h>
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
-#ifndef __packed
-#define __packed __attribute__((packed))
-#endif
 #define TCMU_VERSION "1.0"
-/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define TCMU_MAILBOX_VERSION 1
 #define ALIGN_SIZE 64
 struct tcmu_mailbox {
-  __u16 version;
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  __u16 version;
   __u16 flags;
   __u32 cmdr_off;
   __u32 cmdr_size;
-  __u32 cmd_head;
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  __u32 cmd_head;
   __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
 } __packed;
 enum tcmu_opcode {
-  TCMU_OP_PAD = 0,
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  TCMU_OP_PAD = 0,
   TCMU_OP_CMD,
 };
 struct tcmu_cmd_entry_hdr {
-  __u32 len_op;
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  __u32 len_op;
 } __packed;
 #define TCMU_OP_MASK 0x7
 #define TCMU_SENSE_BUFFERSIZE 96
-struct tcmu_cmd_entry {
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct tcmu_cmd_entry {
   struct tcmu_cmd_entry_hdr hdr;
   uint16_t cmd_id;
   uint16_t __pad1;
-  union {
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  union {
     struct {
       uint64_t cdb_off;
       uint64_t iov_cnt;
-      struct iovec iov[0];
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+      struct iovec iov[0];
     } req;
     struct {
       uint8_t scsi_status;
-      uint8_t __pad1;
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+      uint8_t __pad1;
       uint16_t __pad2;
       uint32_t __pad3;
       char sense_buffer[TCMU_SENSE_BUFFERSIZE];
-    } rsp;
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+    } rsp;
   };
 } __packed;
 #define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
-enum tcmu_genl_cmd {
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+enum tcmu_genl_cmd {
   TCMU_CMD_UNSPEC,
   TCMU_CMD_ADDED_DEVICE,
   TCMU_CMD_REMOVED_DEVICE,
-  __TCMU_CMD_MAX,
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  __TCMU_CMD_MAX,
 };
 #define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
 enum tcmu_genl_attr {
-  TCMU_ATTR_UNSPEC,
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+  TCMU_ATTR_UNSPEC,
   TCMU_ATTR_DEVICE,
   TCMU_ATTR_MINOR,
   __TCMU_ATTR_MAX,
-};
 /* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+};
 #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
 #endif
diff --git a/libc/kernel/uapi/linux/version.h b/libc/kernel/uapi/linux/version.h
index cb6842a..a6c6a2f 100644
--- a/libc/kernel/uapi/linux/version.h
+++ b/libc/kernel/uapi/linux/version.h
@@ -16,5 +16,5 @@
  ***
  ****************************************************************************
  ****************************************************************************/
-#define LINUX_VERSION_CODE 201219
+#define LINUX_VERSION_CODE 201226
 #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
diff --git a/libc/private/ThreadLocalBuffer.h b/libc/private/ThreadLocalBuffer.h
index cc47317..5e43665 100644
--- a/libc/private/ThreadLocalBuffer.h
+++ b/libc/private/ThreadLocalBuffer.h
@@ -32,32 +32,30 @@
 #include <malloc.h>
 #include <pthread.h>
 
-// libstdc++ currently contains __cxa_guard_acquire and __cxa_guard_release,
-// so we make do with macros instead of a C++ class.
-// TODO: move __cxa_guard_acquire and __cxa_guard_release into libc.
+// TODO: use __thread instead?
 
-// We used to use pthread_once to initialize the keys, but life is more predictable
-// if we allocate them all up front when the C library starts up, via __constructor__.
-#define BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(key_name, key_destructor) \
-  static pthread_key_t key_name; \
-  __attribute__((constructor)) static void __bionic_tls_ ## key_name ## _key_init() { \
-    pthread_key_create(&key_name, key_destructor); \
+template <typename T, size_t Size = sizeof(T)>
+class ThreadLocalBuffer {
+ public:
+  ThreadLocalBuffer() {
+    // We used to use pthread_once to initialize the keys, but life is more predictable
+    // if we allocate them all up front when the C library starts up, via __constructor__.
+    pthread_key_create(&key_, free);
   }
 
-#define GLOBAL_INIT_THREAD_LOCAL_BUFFER(name) \
-  static void __bionic_tls_ ## name ## _key_destroy(void* buffer) { \
-    free(buffer); \
-  } \
-  BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR(__bionic_tls_ ## name ## _key, __bionic_tls_ ## name ## _key_destroy)
+  T* get() {
+    T* result = reinterpret_cast<T*>(pthread_getspecific(key_));
+    if (result == nullptr) {
+      result = reinterpret_cast<T*>(calloc(1, Size));
+      pthread_setspecific(key_, result);
+    }
+    return result;
+  }
 
-// Leaves "name_tls_buffer" and "name_tls_buffer_size" defined and initialized.
-#define LOCAL_INIT_THREAD_LOCAL_BUFFER(type, name, byte_count) \
-  type name ## _tls_buffer = \
-      reinterpret_cast<type>(pthread_getspecific(__bionic_tls_ ## name ## _key)); \
-  if (name ## _tls_buffer == NULL) { \
-    name ## _tls_buffer = reinterpret_cast<type>(calloc(1, byte_count)); \
-    pthread_setspecific(__bionic_tls_ ## name ## _key, name ## _tls_buffer); \
-  } \
-  const size_t name ## _tls_buffer_size __attribute__((unused)) = byte_count
+  size_t size() { return Size; }
+
+ private:
+  pthread_key_t key_;
+};
 
 #endif // _BIONIC_THREAD_LOCAL_BUFFER_H_included
diff --git a/libc/private/bionic_lock.h b/libc/private/bionic_lock.h
new file mode 100644
index 0000000..6a0fd06
--- /dev/null
+++ b/libc/private/bionic_lock.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _BIONIC_LOCK_H
+#define _BIONIC_LOCK_H
+
+#include <stdatomic.h>
+#include "private/bionic_futex.h"
+
+class Lock {
+ private:
+  enum LockState {
+    Unlocked = 0,
+    LockedWithoutWaiter,
+    LockedWithWaiter,
+  };
+  _Atomic(LockState) state;
+  bool process_shared;
+
+ public:
+  Lock(bool process_shared = false) {
+    init(process_shared);
+  }
+
+  void init(bool process_shared) {
+    atomic_init(&state, Unlocked);
+    this->process_shared = process_shared;
+  }
+
+  void lock() {
+    LockState old_state = Unlocked;
+    if (__predict_true(atomic_compare_exchange_strong_explicit(&state, &old_state,
+                         LockedWithoutWaiter, memory_order_acquire, memory_order_relaxed))) {
+      return;
+    }
+    while (atomic_exchange_explicit(&state, LockedWithWaiter, memory_order_acquire) != Unlocked) {
+      // TODO: As the critical section is brief, it is a better choice to spin a few times befor sleeping.
+      __futex_wait_ex(&state, process_shared, LockedWithWaiter, NULL);
+    }
+    return;
+  }
+
+  void unlock() {
+    if (atomic_exchange_explicit(&state, Unlocked, memory_order_release) == LockedWithWaiter) {
+      __futex_wake_ex(&state, process_shared, 1);
+    }
+  }
+};
+
+#endif  // _BIONIC_LOCK_H
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 1ab8d4a..30dc0eb 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -72,35 +72,33 @@
 
 /*
  * Bionic uses some pthread keys internally. All pthread keys used internally
- * should be created in constructors, except for keys that may be used in or before constructors.
+ * should be created in constructors, except for keys that may be used in or
+ * before constructors.
+ *
  * We need to manually maintain the count of pthread keys used internally, but
  * pthread_test should fail if we forget.
- * Following are current pthread keys used internally by libc:
- *  basename               libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  dirname                libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
+ *
+ * These are the pthread keys currently used internally by libc:
+ *
+ *  basename               libc (ThreadLocalBuffer)
+ *  dirname                libc (ThreadLocalBuffer)
  *  uselocale              libc (can be used in constructors)
- *  getmntent_mntent       libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  getmntent_strings      libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  ptsname                libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  ttyname                libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  strerror               libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  strsignal              libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  passwd                 libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  group                  libc (GLOBAL_INIT_THREAD_LOCAL_BUFFER)
- *  _res_key               libc (BIONIC_PTHREAD_KEY_WITH_CONSTRUCTOR)
+ *  getmntent_mntent       libc (ThreadLocalBuffer)
+ *  getmntent_strings      libc (ThreadLocalBuffer)
+ *  ptsname                libc (ThreadLocalBuffer)
+ *  ttyname                libc (ThreadLocalBuffer)
+ *  strerror               libc (ThreadLocalBuffer)
+ *  strsignal              libc (ThreadLocalBuffer)
+ *  passwd                 libc (ThreadLocalBuffer)
+ *  group                  libc (ThreadLocalBuffer)
+ *  _res_key               libc (constructor in BSD code)
  */
 
 #define LIBC_PTHREAD_KEY_RESERVED_COUNT 12
 
 #if defined(USE_JEMALLOC)
-/* Following are current pthread keys used internally by jemalloc:
- * je_thread_allocated_tsd jemalloc
- * je_arenas_tsd           jemalloc
- * je_tcache_tsd           jemalloc
- * je_tcache_enabled_tsd   jemalloc
- * je_quarantine_tsd       jemalloc
- */
-#define JEMALLOC_PTHREAD_KEY_RESERVED_COUNT 5
+/* Internally, jemalloc uses a single key for per thread data. */
+#define JEMALLOC_PTHREAD_KEY_RESERVED_COUNT 1
 #define BIONIC_PTHREAD_KEY_RESERVED_COUNT (LIBC_PTHREAD_KEY_RESERVED_COUNT + JEMALLOC_PTHREAD_KEY_RESERVED_COUNT)
 #else
 #define BIONIC_PTHREAD_KEY_RESERVED_COUNT LIBC_PTHREAD_KEY_RESERVED_COUNT
diff --git a/libc/stdio/fileext.h b/libc/stdio/fileext.h
index 209815a..6cacc0f 100644
--- a/libc/stdio/fileext.h
+++ b/libc/stdio/fileext.h
@@ -61,7 +61,11 @@
 	_UB(fp)._base = NULL; \
 	_UB(fp)._size = 0; \
 	WCIO_INIT(fp); \
-	_FLOCK(fp).value = __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE; \
+	pthread_mutexattr_t attr; \
+	pthread_mutexattr_init(&attr); \
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); \
+	pthread_mutex_init(&_FLOCK(fp), &attr); \
+	pthread_mutexattr_destroy(&attr); \
 	_EXT(fp)->_stdio_handles_locking = true; \
 } while (0)
 
diff --git a/libc/upstream-openbsd/lib/libc/stdlib/exit.c b/libc/stdlib/exit.c
similarity index 88%
rename from libc/upstream-openbsd/lib/libc/stdlib/exit.c
rename to libc/stdlib/exit.c
index 83fe3d2..10ce674 100644
--- a/libc/upstream-openbsd/lib/libc/stdlib/exit.c
+++ b/libc/stdlib/exit.c
@@ -32,8 +32,6 @@
 #include <sys/mman.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include "atexit.h"
-#include "thread_private.h"
 
 /*
  * This variable is zero until a process has created a thread.
@@ -44,12 +42,21 @@
  */
 int     __isthreaded    = 0;
 
+/* BEGIN android-added: using __cxa_finalize and __cxa_thread_finalize */
+extern void __cxa_finalize(void* dso_handle);
+extern void __cxa_thread_finalize();
+/* END android-added */
+
 /*
  * Exit, flushing stdio buffers if necessary.
  */
 void
 exit(int status)
 {
+  /* BEGIN android-added: call thread_local d-tors */
+  __cxa_thread_finalize();
+  /* END android-added */
+
 	/*
 	 * Call functions registered by atexit() or _cxa_atexit()
 	 * (including the stdio cleanup routine) and then _exit().
diff --git a/libc/tools/gensyscalls.py b/libc/tools/gensyscalls.py
index 7e11418..4d0afe2 100755
--- a/libc/tools/gensyscalls.py
+++ b/libc/tools/gensyscalls.py
@@ -56,12 +56,6 @@
 """
 
 
-function_alias = """
-    .globl %(alias)s
-    .equ %(alias)s, %(func)s
-"""
-
-
 #
 # ARM assembler templates for each syscall stub
 #
@@ -284,7 +278,7 @@
     # Add any aliases for this syscall.
     aliases = syscall["aliases"]
     for alias in aliases:
-        stub += function_alias % { "func" : syscall["func"], "alias" : alias }
+        stub += "\nALIAS_SYMBOL(%s, %s)\n" % (alias, syscall["func"])
 
     # Use hidden visibility on LP64 for any functions beginning with underscores.
     # Force hidden visibility for any functions which begin with 3 underscores
diff --git a/libc/tools/zoneinfo/update-tzdata.py b/libc/tools/zoneinfo/update-tzdata.py
index d5788af..68a5ff5 100755
--- a/libc/tools/zoneinfo/update-tzdata.py
+++ b/libc/tools/zoneinfo/update-tzdata.py
@@ -33,9 +33,12 @@
 CheckDirExists(bionic_libc_tools_zoneinfo_dir, 'bionic/libc/tools/zoneinfo')
 print 'Found bionic in %s ...' % bionic_dir
 
-# Find the icu4c directory.
-icu_dir = os.path.realpath('%s/../external/icu/icu4c/source' % bionic_dir)
-CheckDirExists(icu_dir, 'external/icu/icu4c/source')
+# Find the icu directory.
+icu_dir = os.path.realpath('%s/../external/icu' % bionic_dir)
+icu4c_dir = os.path.realpath('%s/icu4c/source' % icu_dir)
+icu4j_dir = os.path.realpath('%s/icu4j' % icu_dir)
+CheckDirExists(icu4c_dir, 'external/icu/icu4c/source')
+CheckDirExists(icu4j_dir, 'external/icu/icu4j')
 print 'Found icu in %s ...' % icu_dir
 
 
@@ -116,14 +119,14 @@
 
   # Build the ICU tools.
   print 'Configuring ICU tools...'
-  subprocess.check_call(['%s/runConfigureICU' % icu_dir, 'Linux'])
+  subprocess.check_call(['%s/runConfigureICU' % icu4c_dir, 'Linux'])
 
   # Run the ICU tools.
   os.chdir('tools/tzcode')
 
   # The tz2icu tool only picks up icuregions and icuzones in they are in the CWD
   for icu_data_file in [ 'icuregions', 'icuzones']:
-    icu_data_file_source = '%s/tools/tzcode/%s' % (icu_dir, icu_data_file)
+    icu_data_file_source = '%s/tools/tzcode/%s' % (icu4c_dir, icu_data_file)
     icu_data_file_symlink = './%s' % icu_data_file
     os.symlink(icu_data_file_source, icu_data_file_symlink)
 
@@ -134,7 +137,7 @@
   subprocess.check_call(['make'])
 
   # Copy the source file to its ultimate destination.
-  icu_txt_data_dir = '%s/data/misc' % icu_dir
+  icu_txt_data_dir = '%s/data/misc' % icu4c_dir
   print 'Copying zoneinfo64.txt to %s ...' % icu_txt_data_dir
   shutil.copy('zoneinfo64.txt', icu_txt_data_dir)
 
@@ -143,7 +146,7 @@
   subprocess.check_call(['make', 'INCLUDE_UNI_CORE_DATA=1', '-j32'])
 
   # Copy the .dat file to its ultimate destination.
-  icu_dat_data_dir = '%s/stubdata' % icu_dir
+  icu_dat_data_dir = '%s/stubdata' % icu4c_dir
   datfiles = glob.glob('data/out/tmp/icudt??l.dat')
   if len(datfiles) != 1:
     print 'ERROR: Unexpectedly found %d .dat files (%s). Halting.' % (len(datfiles), datfiles)
@@ -152,6 +155,20 @@
   print 'Copying %s to %s ...' % (datfile, icu_dat_data_dir)
   shutil.copy(datfile, icu_dat_data_dir)
 
+  # Generate the ICU4J .jar files
+  os.chdir('%s/data' % icu_working_dir)
+  subprocess.check_call(['make', 'icu4j-data'])
+
+  # Copy the ICU4J .jar files to their ultimate destination.
+  icu_jar_data_dir = '%s/main/shared/data' % icu4j_dir
+  jarfiles = glob.glob('out/icu4j/*.jar')
+  if len(jarfiles) != 2:
+    print 'ERROR: Unexpectedly found %d .jar files (%s). Halting.' % (len(jarfiles), jarfiles)
+    sys.exit(1)
+  for jarfile in jarfiles:
+    print 'Copying %s to %s ...' % (jarfile, icu_jar_data_dir)
+    shutil.copy(jarfile, icu_jar_data_dir)
+
   # Switch back to the original working cwd.
   os.chdir(original_working_dir)
 
diff --git a/libc/tzcode/localtime.c b/libc/tzcode/localtime.c
index 29f605c..bf09c5e 100644
--- a/libc/tzcode/localtime.c
+++ b/libc/tzcode/localtime.c
@@ -2253,11 +2253,14 @@
 }
 
 static int __bionic_open_tzdata(const char* olson_id, int* data_size) {
-  int fd = __bionic_open_tzdata_path("ANDROID_ROOT", "/usr/share/zoneinfo/tzdata", olson_id, data_size);
-  if (fd == -2) {
-    // The first thing that 'recovery' does is try to format the current time. It doesn't have
-    // any tzdata available, so we must not abort here --- doing so breaks the recovery image!
-    fprintf(stderr, "%s: couldn't find any tzdata when looking for %s!\n", __FUNCTION__, olson_id);
+  int fd = __bionic_open_tzdata_path("ANDROID_DATA", "/misc/zoneinfo/current/tzdata", olson_id, data_size);
+  if (fd < 0) {
+    fd = __bionic_open_tzdata_path("ANDROID_ROOT", "/usr/share/zoneinfo/tzdata", olson_id, data_size);
+    if (fd == -2) {
+      // The first thing that 'recovery' does is try to format the current time. It doesn't have
+      // any tzdata available, so we must not abort here --- doing so breaks the recovery image!
+      fprintf(stderr, "%s: couldn't find any tzdata when looking for %s!\n", __FUNCTION__, olson_id);
+    }
   }
   return fd;
 }
diff --git a/libc/version_script.txt b/libc/version_script.txt
new file mode 100644
index 0000000..349a2fc
--- /dev/null
+++ b/libc/version_script.txt
@@ -0,0 +1,17 @@
+LIBC {
+  global:
+    /* Work-around for http://b/20065774. */
+    __clear_cache;
+    _Unwind_Backtrace;
+    _Unwind_GetIP;
+  local:
+    _ZSt7nothrow;
+    _ZdaPv;
+    _ZdaPvRKSt9nothrow_t;
+    _ZdlPv;
+    _ZdlPvRKSt9nothrow_t;
+    _Znaj;
+    _ZnajRKSt9nothrow_t;
+    _Znwj;
+    _ZnwjRKSt9nothrow_t;
+};
diff --git a/libc/zoneinfo/tzdata b/libc/zoneinfo/tzdata
index b9a6621..3661b68 100644
--- a/libc/zoneinfo/tzdata
+++ b/libc/zoneinfo/tzdata
Binary files differ
diff --git a/linker/Android.mk b/linker/Android.mk
index 0ab0fda..5bdc2f9 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -6,9 +6,11 @@
     debugger.cpp \
     dlfcn.cpp \
     linker.cpp \
+    linker_allocator.cpp \
     linker_block_allocator.cpp \
     linker_environ.cpp \
     linker_libc_support.c \
+    linker_memory.cpp \
     linker_phdr.cpp \
     rt.cpp \
 
@@ -55,7 +57,7 @@
 
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
-LOCAL_STATIC_LIBRARIES := libc_nomalloc
+LOCAL_STATIC_LIBRARIES := libc_nomalloc libziparchive libutils libz liblog
 
 LOCAL_FORCE_STATIC_EXECUTABLE := true
 
@@ -68,7 +70,9 @@
 # meaningful name resolution.
 LOCAL_STRIP_MODULE := keep_symbols
 
-# Insert an extra objcopy step to add prefix to symbols.
+# Insert an extra objcopy step to add prefix to symbols. This is needed to prevent gdb
+# looking up symbols in the linker by mistake.
+#
 # Note we are using "=" instead of ":=" to defer the evaluation,
 # because LOCAL_2ND_ARCH_VAR_PREFIX or linked_module isn't set properly yet at this point.
 LOCAL_POST_LINK_CMD = $(hide) $($(LOCAL_2ND_ARCH_VAR_PREFIX)TARGET_OBJCOPY) \
diff --git a/linker/debugger.cpp b/linker/debugger.cpp
index 6fe9524..357fbdc 100644
--- a/linker/debugger.cpp
+++ b/linker/debugger.cpp
@@ -221,7 +221,7 @@
   if (ret != 0) {
     if (ret == EBUSY) {
       __libc_format_log(ANDROID_LOG_INFO, "libc",
-                        "Another thread has contacted debuggerd first, stop and wait for process to die.");
+          "Another thread contacted debuggerd first; not contacting debuggerd.");
       // This will never complete since the lock is never released.
       pthread_mutex_lock(&crash_mutex);
     } else {
diff --git a/linker/dlfcn.cpp b/linker/dlfcn.cpp
index 9a8dbc9..057c217 100644
--- a/linker/dlfcn.cpp
+++ b/linker/dlfcn.cpp
@@ -100,17 +100,12 @@
   }
 
   soinfo* found = nullptr;
-  ElfW(Sym)* sym = nullptr;
-  if (handle == RTLD_DEFAULT) {
-    sym = dlsym_linear_lookup(symbol, &found, nullptr);
-  } else if (handle == RTLD_NEXT) {
-    void* caller_addr = __builtin_return_address(0);
-    soinfo* si = find_containing_library(caller_addr);
+  const ElfW(Sym)* sym = nullptr;
+  void* caller_addr = __builtin_return_address(0);
+  soinfo* caller = find_containing_library(caller_addr);
 
-    sym = nullptr;
-    if (si && si->next) {
-      sym = dlsym_linear_lookup(symbol, &found, si->next);
-    }
+  if (handle == RTLD_DEFAULT || handle == RTLD_NEXT) {
+    sym = dlsym_linear_lookup(symbol, &found, caller, handle);
   } else {
     sym = dlsym_handle_lookup(reinterpret_cast<soinfo*>(handle), &found, symbol);
   }
@@ -141,7 +136,7 @@
 
   memset(info, 0, sizeof(Dl_info));
 
-  info->dli_fname = si->name;
+  info->dli_fname = si->get_realpath();
   // Address at which the shared object is loaded.
   info->dli_fbase = reinterpret_cast<void*>(si->base);
 
@@ -233,22 +228,28 @@
 static unsigned g_libdl_chains[] = { 0, 2, 3, 4, 5, 6, 7, 8, 9, 0 };
 #endif
 
-static soinfo __libdl_info("libdl.so", nullptr, 0, RTLD_GLOBAL);
+static uint8_t __libdl_info_buf[sizeof(soinfo)] __attribute__((aligned(8)));
+static soinfo* __libdl_info = nullptr;
 
 // This is used by the dynamic linker. Every process gets these symbols for free.
 soinfo* get_libdl_info() {
-  if ((__libdl_info.flags_ & FLAG_LINKED) == 0) {
-    __libdl_info.flags_ |= FLAG_LINKED;
-    __libdl_info.strtab_ = ANDROID_LIBDL_STRTAB;
-    __libdl_info.symtab_ = g_libdl_symtab;
-    __libdl_info.nbucket_ = sizeof(g_libdl_buckets)/sizeof(unsigned);
-    __libdl_info.nchain_ = sizeof(g_libdl_chains)/sizeof(unsigned);
-    __libdl_info.bucket_ = g_libdl_buckets;
-    __libdl_info.chain_ = g_libdl_chains;
-    __libdl_info.ref_count_ = 1;
-    __libdl_info.strtab_size_ = sizeof(ANDROID_LIBDL_STRTAB);
-    __libdl_info.local_group_root_ = &__libdl_info;
+  if (__libdl_info == nullptr) {
+    __libdl_info = new (__libdl_info_buf) soinfo("libdl.so", nullptr, 0, RTLD_GLOBAL);
+    __libdl_info->flags_ |= FLAG_LINKED;
+    __libdl_info->strtab_ = ANDROID_LIBDL_STRTAB;
+    __libdl_info->symtab_ = g_libdl_symtab;
+    __libdl_info->nbucket_ = sizeof(g_libdl_buckets)/sizeof(unsigned);
+    __libdl_info->nchain_ = sizeof(g_libdl_chains)/sizeof(unsigned);
+    __libdl_info->bucket_ = g_libdl_buckets;
+    __libdl_info->chain_ = g_libdl_chains;
+    __libdl_info->ref_count_ = 1;
+    __libdl_info->strtab_size_ = sizeof(ANDROID_LIBDL_STRTAB);
+    __libdl_info->local_group_root_ = __libdl_info;
+    __libdl_info->soname_ = "libdl.so";
+#if defined(__arm__)
+    strlcpy(__libdl_info->old_name_, __libdl_info->soname_, sizeof(__libdl_info->old_name_));
+#endif
   }
 
-  return &__libdl_info;
+  return __libdl_info;
 }
diff --git a/linker/linked_list.h b/linker/linked_list.h
index a72b73c..8003dbf 100644
--- a/linker/linked_list.h
+++ b/linker/linked_list.h
@@ -136,6 +136,17 @@
     }
   }
 
+  template<typename F>
+  T* find_if(F predicate) const {
+    for (LinkedListEntry<T>* e = head_; e != nullptr; e = e->next) {
+      if (predicate(e->element)) {
+        return e->element;
+      }
+    }
+
+    return nullptr;
+  }
+
   size_t copy_to_array(T* array[], size_t array_length) const {
     size_t sz = 0;
     for (LinkedListEntry<T>* e = head_; sz < array_length && e != nullptr; e = e->next) {
diff --git a/linker/linker.cpp b/linker/linker.cpp
index ea7d637..ebc0947 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008, 2009 The Android Open Source Project
+ * Copyright (C) 2008 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,8 @@
 #include <unistd.h>
 
 #include <new>
+#include <string>
+#include <vector>
 
 // Private C library headers.
 #include "private/bionic_tls.h"
@@ -57,6 +59,7 @@
 #include "linker_phdr.h"
 #include "linker_relocs.h"
 #include "linker_reloc_iterators.h"
+#include "ziparchive/zip_archive.h"
 
 /* >>> IMPORTANT NOTE - READ ME BEFORE MODIFYING <<<
  *
@@ -77,19 +80,6 @@
 #undef ELF_ST_TYPE
 #define ELF_ST_TYPE(x) (static_cast<uint32_t>(x) & 0xf)
 
-#if defined(__LP64__)
-#define SEARCH_NAME(x) x
-#else
-// Nvidia drivers are relying on the bug:
-// http://code.google.com/p/android/issues/detail?id=6670
-// so we continue to use base-name lookup for lp32
-static const char* get_base_name(const char* name) {
-  const char* bname = strrchr(name, '/');
-  return bname ? bname + 1 : name;
-}
-#define SEARCH_NAME(x) get_base_name(x)
-#endif
-
 static ElfW(Addr) get_elf_exec_load_bias(const ElfW(Ehdr)* elf);
 
 static LinkerTypeAllocator<soinfo> g_soinfo_allocator;
@@ -110,19 +100,13 @@
   nullptr
 };
 
-#define LDPATH_BUFSIZE (LDPATH_MAX*64)
-#define LDPATH_MAX 8
+static const ElfW(Versym) kVersymNotNeeded = 0;
+static const ElfW(Versym) kVersymGlobal = 1;
 
-#define LDPRELOAD_BUFSIZE (LDPRELOAD_MAX*64)
-#define LDPRELOAD_MAX 8
+static std::vector<std::string> g_ld_library_paths;
+static std::vector<std::string> g_ld_preload_names;
 
-static char g_ld_library_paths_buffer[LDPATH_BUFSIZE];
-static const char* g_ld_library_paths[LDPATH_MAX + 1];
-
-static char g_ld_preloads_buffer[LDPRELOAD_BUFSIZE];
-static const char* g_ld_preload_names[LDPRELOAD_MAX + 1];
-
-static soinfo* g_ld_preloads[LDPRELOAD_MAX + 1];
+static std::vector<soinfo*> g_ld_preloads;
 
 __LIBC_HIDDEN__ int g_ld_debug_verbosity;
 
@@ -147,18 +131,6 @@
 uint32_t bitmask[4096];
 #endif
 
-// You shouldn't try to call memory-allocating functions in the dynamic linker.
-// Guard against the most obvious ones.
-#define DISALLOW_ALLOCATION(return_type, name, ...) \
-    return_type name __VA_ARGS__ \
-    { \
-      __libc_fatal("ERROR: " #name " called from the dynamic linker!\n"); \
-    }
-DISALLOW_ALLOCATION(void*, malloc, (size_t u __unused));
-DISALLOW_ALLOCATION(void, free, (void* u __unused));
-DISALLOW_ALLOCATION(void*, realloc, (void* u1 __unused, size_t u2 __unused));
-DISALLOW_ALLOCATION(void*, calloc, (size_t u1 __unused, size_t u2 __unused));
-
 static char __linker_dl_err_buf[768];
 
 char* linker_get_error_buffer() {
@@ -171,17 +143,21 @@
 
 // This function is an empty stub where GDB locates a breakpoint to get notified
 // about linker activity.
-extern "C" void __attribute__((noinline)) __attribute__((visibility("default"))) rtld_db_dlactivity();
+extern "C"
+void __attribute__((noinline)) __attribute__((visibility("default"))) rtld_db_dlactivity();
 
 static pthread_mutex_t g__r_debug_mutex = PTHREAD_MUTEX_INITIALIZER;
-static r_debug _r_debug = {1, nullptr, reinterpret_cast<uintptr_t>(&rtld_db_dlactivity), r_debug::RT_CONSISTENT, 0};
+static r_debug _r_debug =
+    {1, nullptr, reinterpret_cast<uintptr_t>(&rtld_db_dlactivity), r_debug::RT_CONSISTENT, 0};
+
 static link_map* r_debug_tail = 0;
 
 static void insert_soinfo_into_debug_map(soinfo* info) {
   // Copy the necessary fields into the debug structure.
   link_map* map = &(info->link_map_head);
   map->l_addr = info->load_bias;
-  map->l_name = info->name;
+  // link_map l_name field is not const.
+  map->l_name = const_cast<char*>(info->get_realpath());
   map->l_ld = info->dynamic;
 
   // Stick the new library at the end of the list.
@@ -264,8 +240,9 @@
   g_soinfo_links_allocator.free(entry);
 }
 
-static soinfo* soinfo_alloc(const char* name, struct stat* file_stat, off64_t file_offset, uint32_t rtld_flags) {
-  if (strlen(name) >= SOINFO_NAME_LEN) {
+static soinfo* soinfo_alloc(const char* name, struct stat* file_stat,
+                            off64_t file_offset, uint32_t rtld_flags) {
+  if (strlen(name) >= PATH_MAX) {
     DL_ERR("library name \"%s\" too long", name);
     return nullptr;
   }
@@ -290,7 +267,7 @@
 
   soinfo *prev = nullptr, *trav;
 
-  TRACE("name %s: freeing soinfo @ %p", si->name, si);
+  TRACE("name %s: freeing soinfo @ %p", si->get_soname(), si);
 
   for (trav = solist; trav != nullptr; trav = trav->next) {
     if (trav == si) {
@@ -301,7 +278,7 @@
 
   if (trav == nullptr) {
     // si was not in solist
-    DL_ERR("name \"%s\"@%p is not in solist!", si->name, si);
+    DL_ERR("name \"%s\"@%p is not in solist!", si->get_soname(), si);
     return;
   }
 
@@ -319,39 +296,47 @@
 }
 
 static void parse_path(const char* path, const char* delimiters,
-                       const char** array, char* buf, size_t buf_size, size_t max_count) {
+                       std::vector<std::string>* paths) {
   if (path == nullptr) {
     return;
   }
 
-  size_t len = strlcpy(buf, path, buf_size);
+  paths->clear();
 
-  size_t i = 0;
-  char* buf_p = buf;
-  while (i < max_count && (array[i] = strsep(&buf_p, delimiters))) {
-    if (*array[i] != '\0') {
-      ++i;
+  for (const char *p = path; ; ++p) {
+    size_t len = strcspn(p, delimiters);
+    // skip empty tokens
+    if (len == 0) {
+      continue;
     }
-  }
 
-  // Forget the last path if we had to truncate; this occurs if the 2nd to
-  // last char isn't '\0' (i.e. wasn't originally a delimiter).
-  if (i > 0 && len >= buf_size && buf[buf_size - 2] != '\0') {
-    array[i - 1] = nullptr;
-  } else {
-    array[i] = nullptr;
+    paths->push_back(std::string(p, len));
+    p += len;
+
+    if (*p == '\0') {
+      break;
+    }
   }
 }
 
 static void parse_LD_LIBRARY_PATH(const char* path) {
-  parse_path(path, ":", g_ld_library_paths,
-             g_ld_library_paths_buffer, sizeof(g_ld_library_paths_buffer), LDPATH_MAX);
+  parse_path(path, ":", &g_ld_library_paths);
 }
 
 static void parse_LD_PRELOAD(const char* path) {
   // We have historically supported ':' as well as ' ' in LD_PRELOAD.
-  parse_path(path, " :", g_ld_preload_names,
-             g_ld_preloads_buffer, sizeof(g_ld_preloads_buffer), LDPRELOAD_MAX);
+  parse_path(path, " :", &g_ld_preload_names);
+}
+
+static bool realpath_fd(int fd, std::string* realpath) {
+  std::vector<char> buf(PATH_MAX), proc_self_fd(PATH_MAX);
+  snprintf(&proc_self_fd[0], proc_self_fd.size(), "/proc/self/fd/%d", fd);
+  if (readlink(&proc_self_fd[0], &buf[0], buf.size()) == -1) {
+    return false;
+  }
+
+  *realpath = std::string(&buf[0]);
+  return true;
 }
 
 #if defined(__arm__)
@@ -397,8 +382,129 @@
   return rv;
 }
 
-ElfW(Sym)* soinfo::find_symbol_by_name(SymbolName& symbol_name) {
-  return is_gnu_hash() ? gnu_lookup(symbol_name) : elf_lookup(symbol_name);
+const ElfW(Versym)* soinfo::get_versym(size_t n) const {
+  if (has_min_version(2) && versym_ != nullptr) {
+    return versym_ + n;
+  }
+
+  return nullptr;
+}
+
+ElfW(Addr) soinfo::get_verneed_ptr() const {
+  if (has_min_version(2)) {
+    return verneed_ptr_;
+  }
+
+  return 0;
+}
+
+size_t soinfo::get_verneed_cnt() const {
+  if (has_min_version(2)) {
+    return verneed_cnt_;
+  }
+
+  return 0;
+}
+
+ElfW(Addr) soinfo::get_verdef_ptr() const {
+  if (has_min_version(2)) {
+    return verdef_ptr_;
+  }
+
+  return 0;
+}
+
+size_t soinfo::get_verdef_cnt() const {
+  if (has_min_version(2)) {
+    return verdef_cnt_;
+  }
+
+  return 0;
+}
+
+template<typename F>
+static bool for_each_verdef(const soinfo* si, F functor) {
+  if (!si->has_min_version(2)) {
+    return true;
+  }
+
+  uintptr_t verdef_ptr = si->get_verdef_ptr();
+  if (verdef_ptr == 0) {
+    return true;
+  }
+
+  size_t offset = 0;
+
+  size_t verdef_cnt = si->get_verdef_cnt();
+  for (size_t i = 0; i<verdef_cnt; ++i) {
+    const ElfW(Verdef)* verdef = reinterpret_cast<ElfW(Verdef)*>(verdef_ptr + offset);
+    size_t verdaux_offset = offset + verdef->vd_aux;
+    offset += verdef->vd_next;
+
+    if (verdef->vd_version != 1) {
+      DL_ERR("unsupported verdef[%zd] vd_version: %d (expected 1) library: %s",
+          i, verdef->vd_version, si->get_soname());
+      return false;
+    }
+
+    if ((verdef->vd_flags & VER_FLG_BASE) != 0) {
+      // "this is the version of the file itself.  It must not be used for
+      //  matching a symbol. It can be used to match references."
+      //
+      // http://www.akkadia.org/drepper/symbol-versioning
+      continue;
+    }
+
+    if (verdef->vd_cnt == 0) {
+      DL_ERR("invalid verdef[%zd] vd_cnt == 0 (version without a name)", i);
+      return false;
+    }
+
+    const ElfW(Verdaux)* verdaux = reinterpret_cast<ElfW(Verdaux)*>(verdef_ptr + verdaux_offset);
+
+    if (functor(i, verdef, verdaux) == true) {
+      break;
+    }
+  }
+
+  return true;
+}
+
+bool soinfo::find_verdef_version_index(const version_info* vi, ElfW(Versym)* versym) const {
+  if (vi == nullptr) {
+    *versym = kVersymNotNeeded;
+    return true;
+  }
+
+  *versym = kVersymGlobal;
+
+  return for_each_verdef(this,
+    [&](size_t, const ElfW(Verdef)* verdef, const ElfW(Verdaux)* verdaux) {
+      if (verdef->vd_hash == vi->elf_hash &&
+          strcmp(vi->name, get_string(verdaux->vda_name)) == 0) {
+        *versym = verdef->vd_ndx;
+        return true;
+      }
+
+      return false;
+    }
+  );
+}
+
+bool soinfo::find_symbol_by_name(SymbolName& symbol_name,
+                                 const version_info* vi,
+                                 const ElfW(Sym)** symbol) const {
+  uint32_t symbol_index;
+  bool success =
+      is_gnu_hash() ?
+      gnu_lookup(symbol_name, vi, &symbol_index) :
+      elf_lookup(symbol_name, vi, &symbol_index);
+
+  if (success) {
+    *symbol = symbol_index == 0 ? nullptr : symtab_ + symbol_index;
+  }
+
+  return success;
 }
 
 static bool is_symbol_global_and_defined(const soinfo* si, const ElfW(Sym)* s) {
@@ -407,13 +513,29 @@
     return s->st_shndx != SHN_UNDEF;
   } else if (ELF_ST_BIND(s->st_info) != STB_LOCAL) {
     DL_WARN("unexpected ST_BIND value: %d for '%s' in '%s'",
-        ELF_ST_BIND(s->st_info), si->get_string(s->st_name), si->name);
+        ELF_ST_BIND(s->st_info), si->get_string(s->st_name), si->get_soname());
   }
 
   return false;
 }
 
-ElfW(Sym)* soinfo::gnu_lookup(SymbolName& symbol_name) {
+static const ElfW(Versym) kVersymHiddenBit = 0x8000;
+
+static inline bool is_versym_hidden(const ElfW(Versym)* versym) {
+  // the symbol is hidden if bit 15 of versym is set.
+  return versym != nullptr && (*versym & kVersymHiddenBit) != 0;
+}
+
+static inline bool check_symbol_version(const ElfW(Versym) verneed,
+                                        const ElfW(Versym)* verdef) {
+  return verneed == kVersymNotNeeded ||
+      verdef == nullptr ||
+      verneed == (*verdef & ~kVersymHiddenBit);
+}
+
+bool soinfo::gnu_lookup(SymbolName& symbol_name,
+                        const version_info* vi,
+                        uint32_t* symbol_index) const {
   uint32_t hash = symbol_name.gnu_hash();
   uint32_t h2 = hash >> gnu_shift2_;
 
@@ -421,15 +543,17 @@
   uint32_t word_num = (hash / bloom_mask_bits) & gnu_maskwords_;
   ElfW(Addr) bloom_word = gnu_bloom_filter_[word_num];
 
+  *symbol_index = 0;
+
   TRACE_TYPE(LOOKUP, "SEARCH %s in %s@%p (gnu)",
-      symbol_name.get_name(), name, reinterpret_cast<void*>(base));
+      symbol_name.get_name(), get_soname(), reinterpret_cast<void*>(base));
 
   // test against bloom filter
   if ((1 & (bloom_word >> (hash % bloom_mask_bits)) & (bloom_word >> (h2 % bloom_mask_bits))) == 0) {
     TRACE_TYPE(LOOKUP, "NOT FOUND %s in %s@%p",
-        symbol_name.get_name(), name, reinterpret_cast<void*>(base));
+        symbol_name.get_name(), get_soname(), reinterpret_cast<void*>(base));
 
-    return nullptr;
+    return true;
   }
 
   // bloom test says "probably yes"...
@@ -437,55 +561,98 @@
 
   if (n == 0) {
     TRACE_TYPE(LOOKUP, "NOT FOUND %s in %s@%p",
-        symbol_name.get_name(), name, reinterpret_cast<void*>(base));
+        symbol_name.get_name(), get_soname(), reinterpret_cast<void*>(base));
 
-    return nullptr;
+    return true;
+  }
+
+  // lookup versym for the version definition in this library
+  // note the difference between "version is not requested" (vi == nullptr)
+  // and "version not found". In the first case verneed is kVersymNotNeeded
+  // which implies that the default version can be accepted; the second case results in
+  // verneed = 1 (kVersymGlobal) and implies that we should ignore versioned symbols
+  // for this library and consider only *global* ones.
+  ElfW(Versym) verneed = 0;
+  if (!find_verdef_version_index(vi, &verneed)) {
+    return false;
   }
 
   do {
     ElfW(Sym)* s = symtab_ + n;
+    const ElfW(Versym)* verdef = get_versym(n);
+    // skip hidden versions when verneed == kVersymNotNeeded (0)
+    if (verneed == kVersymNotNeeded && is_versym_hidden(verdef)) {
+        continue;
+    }
     if (((gnu_chain_[n] ^ hash) >> 1) == 0 &&
+        check_symbol_version(verneed, verdef) &&
         strcmp(get_string(s->st_name), symbol_name.get_name()) == 0 &&
         is_symbol_global_and_defined(this, s)) {
       TRACE_TYPE(LOOKUP, "FOUND %s in %s (%p) %zd",
-          symbol_name.get_name(), name, reinterpret_cast<void*>(s->st_value),
+          symbol_name.get_name(), get_soname(), reinterpret_cast<void*>(s->st_value),
           static_cast<size_t>(s->st_size));
-      return s;
+      *symbol_index = n;
+      return true;
     }
   } while ((gnu_chain_[n++] & 1) == 0);
 
   TRACE_TYPE(LOOKUP, "NOT FOUND %s in %s@%p",
-             symbol_name.get_name(), name, reinterpret_cast<void*>(base));
+             symbol_name.get_name(), get_soname(), reinterpret_cast<void*>(base));
 
-  return nullptr;
+  return true;
 }
 
-ElfW(Sym)* soinfo::elf_lookup(SymbolName& symbol_name) {
+bool soinfo::elf_lookup(SymbolName& symbol_name,
+                        const version_info* vi,
+                        uint32_t* symbol_index) const {
   uint32_t hash = symbol_name.elf_hash();
 
   TRACE_TYPE(LOOKUP, "SEARCH %s in %s@%p h=%x(elf) %zd",
-             symbol_name.get_name(), name, reinterpret_cast<void*>(base), hash, hash % nbucket_);
+             symbol_name.get_name(), get_soname(),
+             reinterpret_cast<void*>(base), hash, hash % nbucket_);
+
+  ElfW(Versym) verneed = 0;
+  if (!find_verdef_version_index(vi, &verneed)) {
+    return false;
+  }
 
   for (uint32_t n = bucket_[hash % nbucket_]; n != 0; n = chain_[n]) {
     ElfW(Sym)* s = symtab_ + n;
-    if (strcmp(get_string(s->st_name), symbol_name.get_name()) == 0 && is_symbol_global_and_defined(this, s)) {
+    const ElfW(Versym)* verdef = get_versym(n);
+
+    // skip hidden versions when verneed == 0
+    if (verneed == kVersymNotNeeded && is_versym_hidden(verdef)) {
+        continue;
+    }
+
+    if (check_symbol_version(verneed, verdef) &&
+        strcmp(get_string(s->st_name), symbol_name.get_name()) == 0 &&
+        is_symbol_global_and_defined(this, s)) {
       TRACE_TYPE(LOOKUP, "FOUND %s in %s (%p) %zd",
-               symbol_name.get_name(), name, reinterpret_cast<void*>(s->st_value),
-               static_cast<size_t>(s->st_size));
-      return s;
+                 symbol_name.get_name(), get_soname(),
+                 reinterpret_cast<void*>(s->st_value),
+                 static_cast<size_t>(s->st_size));
+      *symbol_index = n;
+      return true;
     }
   }
 
   TRACE_TYPE(LOOKUP, "NOT FOUND %s in %s@%p %x %zd",
-             symbol_name.get_name(), name, reinterpret_cast<void*>(base), hash, hash % nbucket_);
+             symbol_name.get_name(), get_soname(),
+             reinterpret_cast<void*>(base), hash, hash % nbucket_);
 
-  return nullptr;
+  *symbol_index = 0;
+  return true;
 }
 
-soinfo::soinfo(const char* name, const struct stat* file_stat, off64_t file_offset, int rtld_flags) {
+soinfo::soinfo(const char* realpath, const struct stat* file_stat,
+               off64_t file_offset, int rtld_flags) {
   memset(this, 0, sizeof(*this));
 
-  strlcpy(this->name, name, sizeof(this->name));
+  if (realpath != nullptr) {
+    realpath_ = realpath;
+  }
+
   flags_ = FLAG_NEW_SOINFO;
   version_ = SOINFO_VERSION;
 
@@ -501,7 +668,7 @@
 
 uint32_t SymbolName::elf_hash() {
   if (!has_elf_hash_) {
-    const unsigned char* name = reinterpret_cast<const unsigned char*>(name_);
+    const uint8_t* name = reinterpret_cast<const uint8_t*>(name_);
     uint32_t h = 0, g;
 
     while (*name) {
@@ -521,7 +688,7 @@
 uint32_t SymbolName::gnu_hash() {
   if (!has_gnu_hash_) {
     uint32_t h = 5381;
-    const unsigned char* name = reinterpret_cast<const unsigned char*>(name_);
+    const uint8_t* name = reinterpret_cast<const uint8_t*>(name_);
     while (*name != 0) {
       h += (h << 5) + *name++; // h*33 + c = h + h * 32 + c = h + h << 5 + c
     }
@@ -533,10 +700,11 @@
   return gnu_hash_;
 }
 
-ElfW(Sym)* soinfo_do_lookup(soinfo* si_from, const char* name, soinfo** si_found_in,
-    const soinfo::soinfo_list_t& global_group, const soinfo::soinfo_list_t& local_group) {
+bool soinfo_do_lookup(soinfo* si_from, const char* name, const version_info* vi,
+                      soinfo** si_found_in, const soinfo::soinfo_list_t& global_group,
+                      const soinfo::soinfo_list_t& local_group, const ElfW(Sym)** symbol) {
   SymbolName symbol_name(name);
-  ElfW(Sym)* s = nullptr;
+  const ElfW(Sym)* s = nullptr;
 
   /* "This element's presence in a shared object library alters the dynamic linker's
    * symbol resolution algorithm for references within the library. Instead of starting
@@ -550,8 +718,11 @@
    * relocations for -Bsymbolic linked dynamic executables.
    */
   if (si_from->has_DT_SYMBOLIC) {
-    DEBUG("%s: looking up %s in local scope (DT_SYMBOLIC)", si_from->name, name);
-    s = si_from->find_symbol_by_name(symbol_name);
+    DEBUG("%s: looking up %s in local scope (DT_SYMBOLIC)", si_from->get_soname(), name);
+    if (!si_from->find_symbol_by_name(symbol_name, vi, &s)) {
+      return false;
+    }
+
     if (s != nullptr) {
       *si_found_in = si_from;
     }
@@ -559,9 +730,15 @@
 
   // 1. Look for it in global_group
   if (s == nullptr) {
+    bool error = false;
     global_group.visit([&](soinfo* global_si) {
-      DEBUG("%s: looking up %s in %s (from global group)", si_from->name, name, global_si->name);
-      s = global_si->find_symbol_by_name(symbol_name);
+      DEBUG("%s: looking up %s in %s (from global group)",
+          si_from->get_soname(), name, global_si->get_soname());
+      if (!global_si->find_symbol_by_name(symbol_name, vi, &s)) {
+        error = true;
+        return false;
+      }
+
       if (s != nullptr) {
         *si_found_in = global_si;
         return false;
@@ -569,18 +746,28 @@
 
       return true;
     });
+
+    if (error) {
+      return false;
+    }
   }
 
   // 2. Look for it in the local group
   if (s == nullptr) {
+    bool error = false;
     local_group.visit([&](soinfo* local_si) {
       if (local_si == si_from && si_from->has_DT_SYMBOLIC) {
         // we already did this - skip
         return true;
       }
 
-      DEBUG("%s: looking up %s in %s (from local group)", si_from->name, name, local_si->name);
-      s = local_si->find_symbol_by_name(symbol_name);
+      DEBUG("%s: looking up %s in %s (from local group)",
+          si_from->get_soname(), name, local_si->get_soname());
+      if (!local_si->find_symbol_by_name(symbol_name, vi, &s)) {
+        error = true;
+        return false;
+      }
+
       if (s != nullptr) {
         *si_found_in = local_si;
         return false;
@@ -588,17 +775,22 @@
 
       return true;
     });
+
+    if (error) {
+      return false;
+    }
   }
 
   if (s != nullptr) {
     TRACE_TYPE(LOOKUP, "si %s sym %s s->st_value = %p, "
                "found in %s, base = %p, load bias = %p",
-               si_from->name, name, reinterpret_cast<void*>(s->st_value),
-               (*si_found_in)->name, reinterpret_cast<void*>((*si_found_in)->base),
+               si_from->get_soname(), name, reinterpret_cast<void*>(s->st_value),
+               (*si_found_in)->get_soname(), reinterpret_cast<void*>((*si_found_in)->base),
                reinterpret_cast<void*>((*si_found_in)->load_bias));
   }
 
-  return s;
+  *symbol = s;
+  return true;
 }
 
 class ProtectedDataGuard {
@@ -743,13 +935,16 @@
 
 // This is used by dlsym(3).  It performs symbol lookup only within the
 // specified soinfo object and its dependencies in breadth first order.
-ElfW(Sym)* dlsym_handle_lookup(soinfo* si, soinfo** found, const char* name) {
-  ElfW(Sym)* result = nullptr;
+const ElfW(Sym)* dlsym_handle_lookup(soinfo* si, soinfo** found, const char* name) {
+  const ElfW(Sym)* result = nullptr;
   SymbolName symbol_name(name);
 
-
   walk_dependencies_tree(&si, 1, [&](soinfo* current_soinfo) {
-    result = current_soinfo->find_symbol_by_name(symbol_name);
+    if (!current_soinfo->find_symbol_by_name(symbol_name, nullptr, &result)) {
+      result = nullptr;
+      return false;
+    }
+
     if (result != nullptr) {
       *found = current_soinfo;
       return false;
@@ -766,26 +961,65 @@
    beginning of the global solist. Otherwise the search starts at the
    specified soinfo (for RTLD_NEXT).
  */
-ElfW(Sym)* dlsym_linear_lookup(const char* name, soinfo** found, soinfo* start) {
+const ElfW(Sym)* dlsym_linear_lookup(const char* name,
+                                     soinfo** found,
+                                     soinfo* caller,
+                                     void* handle) {
   SymbolName symbol_name(name);
 
-  if (start == nullptr) {
-    start = solist;
+  soinfo* start = solist;
+
+  if (handle == RTLD_NEXT) {
+    if (caller == nullptr || caller->next == nullptr) {
+      return nullptr;
+    } else {
+      start = caller->next;
+    }
   }
 
-  ElfW(Sym)* s = nullptr;
-  for (soinfo* si = start; (s == nullptr) && (si != nullptr); si = si->next) {
+  const ElfW(Sym)* s = nullptr;
+  for (soinfo* si = start; si != nullptr; si = si->next) {
     if ((si->get_rtld_flags() & RTLD_GLOBAL) == 0) {
       continue;
     }
 
-    s = si->find_symbol_by_name(symbol_name);
+    if (!si->find_symbol_by_name(symbol_name, nullptr, &s)) {
+      return nullptr;
+    }
+
     if (s != nullptr) {
       *found = si;
       break;
     }
   }
 
+  // If not found - look into local_group unless
+  // caller is part of the global group in which
+  // case we already did it.
+  if (s == nullptr && caller != nullptr &&
+      (caller->get_rtld_flags() & RTLD_GLOBAL) == 0) {
+    soinfo* local_group_root = caller->get_local_group_root();
+
+    if (handle == RTLD_DEFAULT) {
+      start = local_group_root;
+    }
+
+    for (soinfo* si = start; si != nullptr; si = si->next) {
+      if (si->get_local_group_root() != local_group_root) {
+        break;
+      }
+
+      if (!si->find_symbol_by_name(symbol_name, nullptr, &s)) {
+        return nullptr;
+      }
+
+      if (s != nullptr) {
+        *found = si;
+        break;
+      }
+    }
+  }
+
   if (s != nullptr) {
     TRACE_TYPE(LOOKUP, "%s s->st_value = %p, found->base = %p",
                name, reinterpret_cast<void*>(s->st_value), reinterpret_cast<void*>((*found)->base));
@@ -850,41 +1084,141 @@
   return nullptr;
 }
 
-static int open_library_on_path(const char* name, const char* const paths[]) {
+static int open_library_in_zipfile(const char* const path,
+                                   off64_t* file_offset) {
+  TRACE("Trying zip file open from path '%s'", path);
+
+  // Treat an '!' character inside a path as the separator between the name
+  // of the zip file on disk and the subdirectory to search within it.
+  // For example, if path is "foo.zip!bar/bas/x.so", then we search for
+  // "bar/bas/x.so" within "foo.zip".
+  const char* separator = strchr(path, '!');
+  if (separator == nullptr) {
+    return -1;
+  }
+
   char buf[512];
-  for (size_t i = 0; paths[i] != nullptr; ++i) {
-    int n = __libc_format_buffer(buf, sizeof(buf), "%s/%s", paths[i], name);
-    if (n < 0 || n >= static_cast<int>(sizeof(buf))) {
-      PRINT("Warning: ignoring very long library path: %s/%s", paths[i], name);
+  if (strlcpy(buf, path, sizeof(buf)) >= sizeof(buf)) {
+    PRINT("Warning: ignoring very long library path: %s", path);
+    return -1;
+  }
+
+  buf[separator - path] = '\0';
+
+  const char* zip_path = buf;
+  const char* file_path = &buf[separator - path + 1];
+  int fd = TEMP_FAILURE_RETRY(open(zip_path, O_RDONLY | O_CLOEXEC));
+  if (fd == -1) {
+    return -1;
+  }
+
+  ZipArchiveHandle handle;
+  if (OpenArchiveFd(fd, "", &handle, false) != 0) {
+    // invalid zip-file (?)
+    close(fd);
+    return -1;
+  }
+
+  auto archive_guard = make_scope_guard([&]() {
+    CloseArchive(handle);
+  });
+
+  ZipEntry entry;
+
+  if (FindEntry(handle, ZipEntryName(file_path), &entry) != 0) {
+    // Entry was not found.
+    close(fd);
+    return -1;
+  }
+
+  // Check if it is properly stored
+  if (entry.method != kCompressStored || (entry.offset % PAGE_SIZE) != 0) {
+    close(fd);
+    return -1;
+  }
+
+  *file_offset = entry.offset;
+  return fd;
+}
+
+static bool format_path(char* buf, size_t buf_size, const char* path, const char* name) {
+  int n = __libc_format_buffer(buf, buf_size, "%s/%s", path, name);
+  if (n < 0 || n >= static_cast<int>(buf_size)) {
+    PRINT("Warning: ignoring very long library path: %s/%s", path, name);
+    return false;
+  }
+
+  return true;
+}
+
+static int open_library_on_default_path(const char* name, off64_t* file_offset) {
+  for (size_t i = 0; kDefaultLdPaths[i] != nullptr; ++i) {
+    char buf[512];
+    if (!format_path(buf, sizeof(buf), kDefaultLdPaths[i], name)) {
       continue;
     }
+
     int fd = TEMP_FAILURE_RETRY(open(buf, O_RDONLY | O_CLOEXEC));
     if (fd != -1) {
+      *file_offset = 0;
+      return fd;
+    }
+  }
+
+  return -1;
+}
+
+static int open_library_on_ld_library_path(const char* name, off64_t* file_offset) {
+  for (const auto& path_str : g_ld_library_paths) {
+    char buf[512];
+    const char* const path = path_str.c_str();
+    if (!format_path(buf, sizeof(buf), path, name)) {
+      continue;
+    }
+
+    int fd = -1;
+    if (strchr(buf, '!') != nullptr) {
+      fd = open_library_in_zipfile(buf, file_offset);
+    }
+
+    if (fd == -1) {
+      fd = TEMP_FAILURE_RETRY(open(buf, O_RDONLY | O_CLOEXEC));
+      if (fd != -1) {
+        *file_offset = 0;
+      }
+    }
+
+    if (fd != -1) {
       return fd;
     }
   }
+
   return -1;
 }
 
-static int open_library(const char* name) {
+static int open_library(const char* name, off64_t* file_offset) {
   TRACE("[ opening %s ]", name);
 
   // If the name contains a slash, we should attempt to open it directly and not search the paths.
   if (strchr(name, '/') != nullptr) {
+    if (strchr(name, '!') != nullptr) {
+      int fd = open_library_in_zipfile(name, file_offset);
+      if (fd != -1) {
+        return fd;
+      }
+    }
+
     int fd = TEMP_FAILURE_RETRY(open(name, O_RDONLY | O_CLOEXEC));
     if (fd != -1) {
-      return fd;
+      *file_offset = 0;
     }
-    // ...but nvidia binary blobs (at least) rely on this behavior, so fall through for now.
-#if defined(__LP64__)
-    return -1;
-#endif
+    return fd;
   }
 
   // Otherwise we try LD_LIBRARY_PATH first, and fall back to the built-in well known paths.
-  int fd = open_library_on_path(name, g_ld_library_paths);
+  int fd = open_library_on_ld_library_path(name, file_offset);
   if (fd == -1) {
-    fd = open_library_on_path(name, kDefaultLdPaths);
+    fd = open_library_on_default_path(name, file_offset);
   }
   return fd;
 }
@@ -898,7 +1232,9 @@
   }
 }
 
-static soinfo* load_library(LoadTaskList& load_tasks, const char* name, int rtld_flags, const android_dlextinfo* extinfo) {
+static soinfo* load_library(LoadTaskList& load_tasks,
+                            const char* name, int rtld_flags,
+                            const android_dlextinfo* extinfo) {
   int fd = -1;
   off64_t file_offset = 0;
   ScopedFd file_guard(-1);
@@ -910,7 +1246,7 @@
     }
   } else {
     // Open the file.
-    fd = open_library(name);
+    fd = open_library(name, &file_offset);
     if (fd == -1) {
       DL_ERR("library \"%s\" not found", name);
       return nullptr;
@@ -934,20 +1270,24 @@
     return nullptr;
   }
   if (file_offset >= file_stat.st_size) {
-    DL_ERR("file offset for the library \"%s\" >= file size: %" PRId64 " >= %" PRId64, name, file_offset, file_stat.st_size);
+    DL_ERR("file offset for the library \"%s\" >= file size: %" PRId64 " >= %" PRId64,
+        name, file_offset, file_stat.st_size);
     return nullptr;
   }
 
   // Check for symlink and other situations where
-  // file can have different names.
-  for (soinfo* si = solist; si != nullptr; si = si->next) {
-    if (si->get_st_dev() != 0 &&
-        si->get_st_ino() != 0 &&
-        si->get_st_dev() == file_stat.st_dev &&
-        si->get_st_ino() == file_stat.st_ino &&
-        si->get_file_offset() == file_offset) {
-      TRACE("library \"%s\" is already loaded under different name/path \"%s\" - will return existing soinfo", name, si->name);
-      return si;
+  // file can have different names, unless ANDROID_DLEXT_FORCE_LOAD is set
+  if (extinfo == nullptr || (extinfo->flags & ANDROID_DLEXT_FORCE_LOAD) == 0) {
+    for (soinfo* si = solist; si != nullptr; si = si->next) {
+      if (si->get_st_dev() != 0 &&
+          si->get_st_ino() != 0 &&
+          si->get_st_dev() == file_stat.st_dev &&
+          si->get_st_ino() == file_stat.st_ino &&
+          si->get_file_offset() == file_offset) {
+        TRACE("library \"%s\" is already loaded under different name/path \"%s\" - "
+            "will return existing soinfo", name, si->get_realpath());
+        return si;
+      }
     }
   }
 
@@ -956,13 +1296,19 @@
     return nullptr;
   }
 
+  std::string realpath = name;
+  if (!realpath_fd(fd, &realpath)) {
+    PRINT("cannot resolve realpath for the library \"%s\": %s", name, strerror(errno));
+    realpath = name;
+  }
+
   // Read the ELF header and load the segments.
-  ElfReader elf_reader(name, fd, file_offset);
+  ElfReader elf_reader(realpath.c_str(), fd, file_offset);
   if (!elf_reader.Load(extinfo)) {
     return nullptr;
   }
 
-  soinfo* si = soinfo_alloc(SEARCH_NAME(name), &file_stat, file_offset, rtld_flags);
+  soinfo* si = soinfo_alloc(realpath.c_str(), &file_stat, file_offset, rtld_flags);
   if (si == nullptr) {
     return nullptr;
   }
@@ -984,24 +1330,29 @@
   return si;
 }
 
-static soinfo *find_loaded_library_by_name(const char* name) {
-  const char* search_name = SEARCH_NAME(name);
+static soinfo *find_loaded_library_by_soname(const char* name) {
+  // Ignore filename with path.
+  if (strchr(name, '/') != nullptr) {
+    return nullptr;
+  }
+
   for (soinfo* si = solist; si != nullptr; si = si->next) {
-    if (!strcmp(search_name, si->name)) {
+    const char* soname = si->get_soname();
+    if (soname != nullptr && (strcmp(name, soname) == 0)) {
       return si;
     }
   }
   return nullptr;
 }
 
-static soinfo* find_library_internal(LoadTaskList& load_tasks, const char* name, int rtld_flags, const android_dlextinfo* extinfo) {
-
-  soinfo* si = find_loaded_library_by_name(name);
+static soinfo* find_library_internal(LoadTaskList& load_tasks, const char* name,
+                                     int rtld_flags, const android_dlextinfo* extinfo) {
+  soinfo* si = find_loaded_library_by_soname(name);
 
   // Library might still be loaded, the accurate detection
   // of this fact is done by load_library.
   if (si == nullptr) {
-    TRACE("[ '%s' has not been found by name.  Trying harder...]", name);
+    TRACE("[ '%s' has not been found by soname.  Trying harder...]", name);
     si = load_library(load_tasks, name, rtld_flags, extinfo);
   }
 
@@ -1028,8 +1379,9 @@
   return global_group;
 }
 
-static bool find_libraries(soinfo* start_with, const char* const library_names[], size_t library_names_count, soinfo* soinfos[],
-    soinfo* ld_preloads[], size_t ld_preloads_count, int rtld_flags, const android_dlextinfo* extinfo) {
+static bool find_libraries(soinfo* start_with, const char* const library_names[],
+      size_t library_names_count, soinfo* soinfos[], std::vector<soinfo*>* ld_preloads,
+      size_t ld_preloads_count, int rtld_flags, const android_dlextinfo* extinfo) {
   // Step 0: prepare.
   LoadTaskList load_tasks;
   for (size_t i = 0; i < library_names_count; ++i) {
@@ -1068,7 +1420,8 @@
   });
 
   // Step 1: load and pre-link all DT_NEEDED libraries in breadth first order.
-  for (LoadTask::unique_ptr task(load_tasks.pop_front()); task.get() != nullptr; task.reset(load_tasks.pop_front())) {
+  for (LoadTask::unique_ptr task(load_tasks.pop_front());
+      task.get() != nullptr; task.reset(load_tasks.pop_front())) {
     soinfo* si = find_library_internal(load_tasks, task->get_name(), rtld_flags, extinfo);
     if (si == nullptr) {
       return false;
@@ -1092,7 +1445,7 @@
       // for this run because they are going to appear in the local
       // group in the correct order.
       si->set_dt_flags_1(si->get_dt_flags_1() | DF_1_GLOBAL);
-      ld_preloads[soinfos_count] = si;
+      ld_preloads->push_back(si);
     }
 
     if (soinfos_count < library_names_count) {
@@ -1160,7 +1513,7 @@
   }
 
   if (!root->can_unload()) {
-    TRACE("not unloading '%s' - the binary is flagged with NODELETE", root->name);
+    TRACE("not unloading '%s' - the binary is flagged with NODELETE", root->get_soname());
     return;
   }
 
@@ -1183,7 +1536,7 @@
       if (si->has_min_version(0)) {
         soinfo* child = nullptr;
         while ((child = si->get_children().pop_front()) != nullptr) {
-          TRACE("%s@%p needs to unload %s@%p", si->name, si, child->name, child);
+          TRACE("%s@%p needs to unload %s@%p", si->get_soname(), si, child->get_soname(), child);
           if (local_unload_list.contains(child)) {
             continue;
           } else if (child->is_linked() && child->get_local_group_root() != root) {
@@ -1193,17 +1546,20 @@
           }
         }
       } else {
-#ifdef __LP64__
-        __libc_fatal("soinfo for \"%s\"@%p has no version", si->name, si);
+#if !defined(__arm__)
+        __libc_fatal("soinfo for \"%s\"@%p has no version", si->get_soname(), si);
 #else
-        PRINT("warning: soinfo for \"%s\"@%p has no version", si->name, si);
+        PRINT("warning: soinfo for \"%s\"@%p has no version", si->get_soname(), si);
         for_each_dt_needed(si, [&] (const char* library_name) {
-          TRACE("deprecated (old format of soinfo): %s needs to unload %s", si->name, library_name);
+          TRACE("deprecated (old format of soinfo): %s needs to unload %s",
+              si->get_soname(), library_name);
+
           soinfo* needed = find_library(library_name, RTLD_NOLOAD, nullptr);
           if (needed != nullptr) {
             // Not found: for example if symlink was deleted between dlopen and dlclose
             // Since we cannot really handle errors at this point - print and continue.
-            PRINT("warning: couldn't find %s needed by %s on unload.", library_name, si->name);
+            PRINT("warning: couldn't find %s needed by %s on unload.",
+                library_name, si->get_soname());
             return;
           } else if (local_unload_list.contains(needed)) {
             // already visited
@@ -1233,7 +1589,7 @@
       soinfo_unload(si);
     }
   } else {
-    TRACE("not unloading '%s' group, decrementing ref_count to %zd", root->name, ref_count);
+    TRACE("not unloading '%s' group, decrementing ref_count to %zd", root->get_soname(), ref_count);
   }
 }
 
@@ -1247,8 +1603,8 @@
   // snprintf again.
   size_t required_len = strlen(kDefaultLdPaths[0]) + strlen(kDefaultLdPaths[1]) + 2;
   if (buffer_size < required_len) {
-    __libc_fatal("android_get_LD_LIBRARY_PATH failed, buffer too small: buffer len %zu, required len %zu",
-                 buffer_size, required_len);
+    __libc_fatal("android_get_LD_LIBRARY_PATH failed, buffer too small: "
+                 "buffer len %zu, required len %zu", buffer_size, required_len);
   }
   char* end = stpcpy(buffer, kDefaultLdPaths[0]);
   *end = ':';
@@ -1271,7 +1627,8 @@
     }
     if ((extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD) == 0 &&
         (extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET) != 0) {
-      DL_ERR("invalid extended flag combination (ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET without ANDROID_DLEXT_USE_LIBRARY_FD): 0x%" PRIx64, extinfo->flags);
+      DL_ERR("invalid extended flag combination (ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET without "
+          "ANDROID_DLEXT_USE_LIBRARY_FD): 0x%" PRIx64, extinfo->flags);
       return nullptr;
     }
   }
@@ -1293,11 +1650,99 @@
   typedef ElfW(Addr) (*ifunc_resolver_t)(void);
   ifunc_resolver_t ifunc_resolver = reinterpret_cast<ifunc_resolver_t>(resolver_addr);
   ElfW(Addr) ifunc_addr = ifunc_resolver();
-  TRACE_TYPE(RELO, "Called ifunc_resolver@%p. The result is %p", ifunc_resolver, reinterpret_cast<void*>(ifunc_addr));
+  TRACE_TYPE(RELO, "Called ifunc_resolver@%p. The result is %p",
+      ifunc_resolver, reinterpret_cast<void*>(ifunc_addr));
 
   return ifunc_addr;
 }
 
+const version_info* VersionTracker::get_version_info(ElfW(Versym) source_symver) const {
+  if (source_symver < 2 ||
+      source_symver >= version_infos.size() ||
+      version_infos[source_symver].name == nullptr) {
+    return nullptr;
+  }
+
+  return &version_infos[source_symver];
+}
+
+void VersionTracker::add_version_info(size_t source_index,
+                                      ElfW(Word) elf_hash,
+                                      const char* ver_name,
+                                      const soinfo* target_si) {
+  if (source_index >= version_infos.size()) {
+    version_infos.resize(source_index+1);
+  }
+
+  version_infos[source_index].elf_hash = elf_hash;
+  version_infos[source_index].name = ver_name;
+  version_infos[source_index].target_si = target_si;
+}
+
+bool VersionTracker::init_verneed(const soinfo* si_from) {
+  uintptr_t verneed_ptr = si_from->get_verneed_ptr();
+
+  if (verneed_ptr == 0) {
+    return true;
+  }
+
+  size_t verneed_cnt = si_from->get_verneed_cnt();
+
+  for (size_t i = 0, offset = 0; i<verneed_cnt; ++i) {
+    const ElfW(Verneed)* verneed = reinterpret_cast<ElfW(Verneed)*>(verneed_ptr + offset);
+    size_t vernaux_offset = offset + verneed->vn_aux;
+    offset += verneed->vn_next;
+
+    if (verneed->vn_version != 1) {
+      DL_ERR("unsupported verneed[%zd] vn_version: %d (expected 1)", i, verneed->vn_version);
+      return false;
+    }
+
+    const char* target_soname = si_from->get_string(verneed->vn_file);
+    // find it in dependencies
+    soinfo* target_si = si_from->get_children().find_if([&](const soinfo* si) {
+      return strcmp(si->get_soname(), target_soname) == 0;
+    });
+
+    if (target_si == nullptr) {
+      DL_ERR("cannot find \"%s\" from verneed[%zd] in DT_NEEDED list for \"%s\"",
+          target_soname, i, si_from->get_soname());
+      return false;
+    }
+
+    for (size_t j = 0; j<verneed->vn_cnt; ++j) {
+      const ElfW(Vernaux)* vernaux = reinterpret_cast<ElfW(Vernaux)*>(verneed_ptr + vernaux_offset);
+      vernaux_offset += vernaux->vna_next;
+
+      const ElfW(Word) elf_hash = vernaux->vna_hash;
+      const char* ver_name = si_from->get_string(vernaux->vna_name);
+      ElfW(Half) source_index = vernaux->vna_other;
+
+      add_version_info(source_index, elf_hash, ver_name, target_si);
+    }
+  }
+
+  return true;
+}
+
+bool VersionTracker::init_verdef(const soinfo* si_from) {
+  return for_each_verdef(si_from,
+    [&](size_t, const ElfW(Verdef)* verdef, const ElfW(Verdaux)* verdaux) {
+      add_version_info(verdef->vd_ndx, verdef->vd_hash,
+          si_from->get_string(verdaux->vda_name), si_from);
+      return false;
+    }
+  );
+}
+
+bool VersionTracker::init(const soinfo* si_from) {
+  if (!si_from->has_min_version(2)) {
+    return true;
+  }
+
+  return init_verneed(si_from) && init_verdef(si_from);
+}
+
 #if !defined(__mips__)
 #if defined(USE_RELA)
 static ElfW(Addr) get_addend(ElfW(Rela)* rela, ElfW(Addr) reloc_addr __unused) {
@@ -1305,7 +1750,8 @@
 }
 #else
 static ElfW(Addr) get_addend(ElfW(Rel)* rel, ElfW(Addr) reloc_addr) {
-  if (ELFW(R_TYPE)(rel->r_info) == R_GENERIC_RELATIVE || ELFW(R_TYPE)(rel->r_info) == R_GENERIC_IRELATIVE) {
+  if (ELFW(R_TYPE)(rel->r_info) == R_GENERIC_RELATIVE ||
+      ELFW(R_TYPE)(rel->r_info) == R_GENERIC_IRELATIVE) {
     return *reinterpret_cast<ElfW(Addr)*>(reloc_addr);
   }
   return 0;
@@ -1313,7 +1759,14 @@
 #endif
 
 template<typename ElfRelIteratorT>
-bool soinfo::relocate(ElfRelIteratorT&& rel_iterator, const soinfo_list_t& global_group, const soinfo_list_t& local_group) {
+bool soinfo::relocate(ElfRelIteratorT&& rel_iterator, const soinfo_list_t& global_group,
+                      const soinfo_list_t& local_group) {
+  VersionTracker version_tracker;
+
+  if (!version_tracker.init(this)) {
+    return false;
+  }
+
   for (size_t idx = 0; rel_iterator.has_next(); ++idx) {
     const auto rel = rel_iterator.next();
     if (rel == nullptr) {
@@ -1328,22 +1781,42 @@
     const char* sym_name = nullptr;
     ElfW(Addr) addend = get_addend(rel, reloc);
 
-    DEBUG("Processing '%s' relocation at index %zd", this->name, idx);
+    DEBUG("Processing '%s' relocation at index %zd", get_soname(), idx);
     if (type == R_GENERIC_NONE) {
       continue;
     }
 
-    ElfW(Sym)* s = nullptr;
+    const ElfW(Sym)* s = nullptr;
     soinfo* lsi = nullptr;
 
     if (sym != 0) {
       sym_name = get_string(symtab_[sym].st_name);
-      s = soinfo_do_lookup(this, sym_name, &lsi, global_group,local_group);
+      const ElfW(Versym)* sym_ver_ptr = get_versym(sym);
+      ElfW(Versym) sym_ver = sym_ver_ptr == nullptr ? 0 : *sym_ver_ptr;
+
+      if (sym_ver == VER_NDX_LOCAL || sym_ver == VER_NDX_GLOBAL) {
+        // there is no version info for this one
+        if (!soinfo_do_lookup(this, sym_name, nullptr, &lsi, global_group, local_group, &s)) {
+          return false;
+        }
+      } else {
+        const version_info* vi = version_tracker.get_version_info(sym_ver);
+
+        if (vi == nullptr) {
+          DL_ERR("cannot find verneed/verdef for version index=%d "
+              "referenced by symbol \"%s\" at \"%s\"", sym_ver, sym_name, get_soname());
+          return false;
+        }
+
+        if (!soinfo_do_lookup(this, sym_name, vi, &lsi, global_group, local_group, &s)) {
+          return false;
+        }
+      }
       if (s == nullptr) {
         // We only allow an undefined symbol if this is a weak reference...
         s = &symtab_[sym];
         if (ELF_ST_BIND(s->st_info) != STB_WEAK) {
-          DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, name);
+          DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, get_soname());
           return false;
         }
 
@@ -1450,15 +1923,18 @@
         MARK(rel->r_offset);
         TRACE_TYPE(RELO, "RELO ABS32 %16llx <- %16llx %s\n",
                    reloc, (sym_addr + addend), sym_name);
-        if ((static_cast<ElfW(Addr)>(INT32_MIN) <= (*reinterpret_cast<ElfW(Addr)*>(reloc) + (sym_addr + addend))) &&
-            ((*reinterpret_cast<ElfW(Addr)*>(reloc) + (sym_addr + addend)) <= static_cast<ElfW(Addr)>(UINT32_MAX))) {
-          *reinterpret_cast<ElfW(Addr)*>(reloc) += (sym_addr + addend);
-        } else {
-          DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
-                 (*reinterpret_cast<ElfW(Addr)*>(reloc) + (sym_addr + addend)),
-                 static_cast<ElfW(Addr)>(INT32_MIN),
-                 static_cast<ElfW(Addr)>(UINT32_MAX));
-          return false;
+        {
+          const ElfW(Addr) reloc_value = *reinterpret_cast<ElfW(Addr)*>(reloc);
+          const ElfW(Addr) min_value = static_cast<ElfW(Addr)>(INT32_MIN);
+          const ElfW(Addr) max_value = static_cast<ElfW(Addr)>(UINT32_MAX);
+          if ((min_value <= (reloc_value + (sym_addr + addend))) &&
+              ((reloc_value + (sym_addr + addend)) <= max_value)) {
+            *reinterpret_cast<ElfW(Addr)*>(reloc) += (sym_addr + addend);
+          } else {
+            DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
+                   (reloc_value + (sym_addr + addend)), min_value, max_value);
+            return false;
+          }
         }
         break;
       case R_AARCH64_ABS16:
@@ -1466,15 +1942,18 @@
         MARK(rel->r_offset);
         TRACE_TYPE(RELO, "RELO ABS16 %16llx <- %16llx %s\n",
                    reloc, (sym_addr + addend), sym_name);
-        if ((static_cast<ElfW(Addr)>(INT16_MIN) <= (*reinterpret_cast<ElfW(Addr)*>(reloc) + (sym_addr + addend))) &&
-            ((*reinterpret_cast<ElfW(Addr)*>(reloc) + (sym_addr + addend)) <= static_cast<ElfW(Addr)>(UINT16_MAX))) {
-          *reinterpret_cast<ElfW(Addr)*>(reloc) += (sym_addr + addend);
-        } else {
-          DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
-                 (*reinterpret_cast<ElfW(Addr)*>(reloc) + (sym_addr + addend)),
-                 static_cast<ElfW(Addr)>(INT16_MIN),
-                 static_cast<ElfW(Addr)>(UINT16_MAX));
-          return false;
+        {
+          const ElfW(Addr) reloc_value = *reinterpret_cast<ElfW(Addr)*>(reloc);
+          const ElfW(Addr) min_value = static_cast<ElfW(Addr)>(INT16_MIN);
+          const ElfW(Addr) max_value = static_cast<ElfW(Addr)>(UINT16_MAX);
+          if ((min_value <= (reloc_value + (sym_addr + addend))) &&
+              ((reloc_value + (sym_addr + addend)) <= max_value)) {
+            *reinterpret_cast<ElfW(Addr)*>(reloc) += (sym_addr + addend);
+          } else {
+            DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
+                   reloc_value + (sym_addr + addend), min_value, max_value);
+            return false;
+          }
         }
         break;
       case R_AARCH64_PREL64:
@@ -1489,15 +1968,18 @@
         MARK(rel->r_offset);
         TRACE_TYPE(RELO, "RELO REL32 %16llx <- %16llx - %16llx %s\n",
                    reloc, (sym_addr + addend), rel->r_offset, sym_name);
-        if ((static_cast<ElfW(Addr)>(INT32_MIN) <= (*reinterpret_cast<ElfW(Addr)*>(reloc) + ((sym_addr + addend) - rel->r_offset))) &&
-            ((*reinterpret_cast<ElfW(Addr)*>(reloc) + ((sym_addr + addend) - rel->r_offset)) <= static_cast<ElfW(Addr)>(UINT32_MAX))) {
-          *reinterpret_cast<ElfW(Addr)*>(reloc) += ((sym_addr + addend) - rel->r_offset);
-        } else {
-          DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
-                 (*reinterpret_cast<ElfW(Addr)*>(reloc) + ((sym_addr + addend) - rel->r_offset)),
-                 static_cast<ElfW(Addr)>(INT32_MIN),
-                 static_cast<ElfW(Addr)>(UINT32_MAX));
-          return false;
+        {
+          const ElfW(Addr) reloc_value = *reinterpret_cast<ElfW(Addr)*>(reloc);
+          const ElfW(Addr) min_value = static_cast<ElfW(Addr)>(INT32_MIN);
+          const ElfW(Addr) max_value = static_cast<ElfW(Addr)>(UINT32_MAX);
+          if ((min_value <= (reloc_value + ((sym_addr + addend) - rel->r_offset))) &&
+              ((reloc_value + ((sym_addr + addend) - rel->r_offset)) <= max_value)) {
+            *reinterpret_cast<ElfW(Addr)*>(reloc) += ((sym_addr + addend) - rel->r_offset);
+          } else {
+            DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
+                   reloc_value + ((sym_addr + addend) - rel->r_offset), min_value, max_value);
+            return false;
+          }
         }
         break;
       case R_AARCH64_PREL16:
@@ -1505,15 +1987,18 @@
         MARK(rel->r_offset);
         TRACE_TYPE(RELO, "RELO REL16 %16llx <- %16llx - %16llx %s\n",
                    reloc, (sym_addr + addend), rel->r_offset, sym_name);
-        if ((static_cast<ElfW(Addr)>(INT16_MIN) <= (*reinterpret_cast<ElfW(Addr)*>(reloc) + ((sym_addr + addend) - rel->r_offset))) &&
-            ((*reinterpret_cast<ElfW(Addr)*>(reloc) + ((sym_addr + addend) - rel->r_offset)) <= static_cast<ElfW(Addr)>(UINT16_MAX))) {
-          *reinterpret_cast<ElfW(Addr)*>(reloc) += ((sym_addr + addend) - rel->r_offset);
-        } else {
-          DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
-                 (*reinterpret_cast<ElfW(Addr)*>(reloc) + ((sym_addr + addend) - rel->r_offset)),
-                 static_cast<ElfW(Addr)>(INT16_MIN),
-                 static_cast<ElfW(Addr)>(UINT16_MAX));
-          return false;
+        {
+          const ElfW(Addr) reloc_value = *reinterpret_cast<ElfW(Addr)*>(reloc);
+          const ElfW(Addr) min_value = static_cast<ElfW(Addr)>(INT16_MIN);
+          const ElfW(Addr) max_value = static_cast<ElfW(Addr)>(UINT16_MAX);
+          if ((min_value <= (reloc_value + ((sym_addr + addend) - rel->r_offset))) &&
+              ((reloc_value + ((sym_addr + addend) - rel->r_offset)) <= max_value)) {
+            *reinterpret_cast<ElfW(Addr)*>(reloc) += ((sym_addr + addend) - rel->r_offset);
+          } else {
+            DL_ERR("0x%016llx out of range 0x%016llx to 0x%016llx",
+                   reloc_value + ((sym_addr + addend) - rel->r_offset), min_value, max_value);
+            return false;
+          }
         }
         break;
 
@@ -1521,13 +2006,13 @@
         /*
          * ET_EXEC is not supported so this should not happen.
          *
-         * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044d/IHI0044D_aaelf.pdf
+         * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056b/IHI0056B_aaelf64.pdf
          *
-         * Section 4.7.1.10 "Dynamic relocations"
+         * Section 4.6.11 "Dynamic relocations"
          * R_AARCH64_COPY may only appear in executable objects where e_type is
          * set to ET_EXEC.
          */
-        DL_ERR("%s R_AARCH64_COPY relocations are not supported", name);
+        DL_ERR("%s R_AARCH64_COPY relocations are not supported", get_soname());
         return false;
       case R_AARCH64_TLS_TPREL64:
         TRACE_TYPE(RELO, "RELO TLS_TPREL64 *** %16llx <- %16llx - %16llx\n",
@@ -1580,11 +2065,11 @@
          *
          * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044d/IHI0044D_aaelf.pdf
          *
-         * Section 4.7.1.10 "Dynamic relocations"
+         * Section 4.6.1.10 "Dynamic relocations"
          * R_ARM_COPY may only appear in executable objects where e_type is
          * set to ET_EXEC.
          */
-        DL_ERR("%s R_ARM_COPY relocations are not supported", name);
+        DL_ERR("%s R_ARM_COPY relocations are not supported", get_soname());
         return false;
 #elif defined(__i386__)
       case R_386_32:
@@ -1610,12 +2095,13 @@
 }
 #endif  // !defined(__mips__)
 
-void soinfo::call_array(const char* array_name __unused, linker_function_t* functions, size_t count, bool reverse) {
+void soinfo::call_array(const char* array_name __unused, linker_function_t* functions,
+                        size_t count, bool reverse) {
   if (functions == nullptr) {
     return;
   }
 
-  TRACE("[ Calling %s (size %zd) @ %p for '%s' ]", array_name, count, functions, name);
+  TRACE("[ Calling %s (size %zd) @ %p for '%s' ]", array_name, count, functions, get_soname());
 
   int begin = reverse ? (count - 1) : 0;
   int end = reverse ? -1 : count;
@@ -1626,7 +2112,7 @@
     call_function("function", functions[i]);
   }
 
-  TRACE("[ Done calling %s for '%s' ]", array_name, name);
+  TRACE("[ Done calling %s for '%s' ]", array_name, get_soname());
 }
 
 void soinfo::call_function(const char* function_name __unused, linker_function_t function) {
@@ -1634,9 +2120,9 @@
     return;
   }
 
-  TRACE("[ Calling %s @ %p for '%s' ]", function_name, function, name);
+  TRACE("[ Calling %s @ %p for '%s' ]", function_name, function, get_soname());
   function();
-  TRACE("[ Done calling %s @ %p for '%s' ]", function_name, function, name);
+  TRACE("[ Done calling %s @ %p for '%s' ]", function_name, function, get_soname());
 }
 
 void soinfo::call_pre_init_constructors() {
@@ -1665,14 +2151,14 @@
   if (!is_main_executable() && preinit_array_ != nullptr) {
     // The GNU dynamic linker silently ignores these, but we warn the developer.
     PRINT("\"%s\": ignoring %zd-entry DT_PREINIT_ARRAY in shared library!",
-          name, preinit_array_count_);
+          get_soname(), preinit_array_count_);
   }
 
   get_children().for_each([] (soinfo* si) {
     si->call_constructors();
   });
 
-  TRACE("\"%s\": calling constructors", name);
+  TRACE("\"%s\": calling constructors", get_soname());
 
   // DT_INIT should be called before DT_INIT_ARRAY if both are present.
   call_function("DT_INIT", init_func_);
@@ -1683,7 +2169,7 @@
   if (!constructors_called) {
     return;
   }
-  TRACE("\"%s\": calling destructors", name);
+  TRACE("\"%s\": calling destructors", get_soname());
 
   // DT_FINI_ARRAY must be parsed in reverse order.
   call_array("DT_FINI_ARRAY", fini_array_, fini_array_count_, true);
@@ -1765,6 +2251,7 @@
 
   return 0;
 }
+
 void soinfo::set_dt_flags_1(uint32_t dt_flags_1) {
   if (has_min_version(1)) {
     if ((dt_flags_1 & DF_1_GLOBAL) != 0) {
@@ -1779,6 +2266,30 @@
   }
 }
 
+const char* soinfo::get_realpath() const {
+#if defined(__arm__)
+  if (has_min_version(2)) {
+    return realpath_.c_str();
+  } else {
+    return old_name_;
+  }
+#else
+  return realpath_.c_str();
+#endif
+}
+
+const char* soinfo::get_soname() const {
+#if defined(__arm__)
+  if (has_min_version(2)) {
+    return soname_;
+  } else {
+    return old_name_;
+  }
+#else
+  return soname_;
+#endif
+}
+
 // This is a return on get_children()/get_parents() if
 // 'this->flags' does not have FLAG_NEW_SOINFO set.
 static soinfo::soinfo_list_t g_empty_list;
@@ -1791,6 +2302,14 @@
   return g_empty_list;
 }
 
+const soinfo::soinfo_list_t& soinfo::get_children() const {
+  if (has_min_version(0)) {
+    return children_;
+  }
+
+  return g_empty_list;
+}
+
 soinfo::soinfo_list_t& soinfo::get_parents() {
   if (has_min_version(0)) {
     return parents_;
@@ -1799,7 +2318,7 @@
   return g_empty_list;
 }
 
-ElfW(Addr) soinfo::resolve_symbol_address(ElfW(Sym)* s) {
+ElfW(Addr) soinfo::resolve_symbol_address(const ElfW(Sym)* s) const {
   if (ELF_ST_TYPE(s->st_info) == STT_GNU_IFUNC) {
     return call_ifunc_resolver(s->st_value + load_bias);
   }
@@ -1809,7 +2328,8 @@
 
 const char* soinfo::get_string(ElfW(Word) index) const {
   if (has_min_version(1) && (index >= strtab_size_)) {
-    __libc_fatal("%s: strtab out of bounds error; STRSZ=%zd, name=%d", name, strtab_size_, index);
+    __libc_fatal("%s: strtab out of bounds error; STRSZ=%zd, name=%d",
+        get_soname(), strtab_size_, index);
   }
 
   return strtab_ + index;
@@ -1924,13 +2444,13 @@
   /* We can't log anything until the linker is relocated */
   bool relocating_linker = (flags_ & FLAG_LINKER) != 0;
   if (!relocating_linker) {
-    INFO("[ linking %s ]", name);
+    INFO("[ linking %s ]", get_soname());
     DEBUG("si->base = %p si->flags = 0x%08x", reinterpret_cast<void*>(base), flags_);
   }
 
   if (dynamic == nullptr) {
     if (!relocating_linker) {
-      DL_ERR("missing PT_DYNAMIC in \"%s\"", name);
+      DL_ERR("missing PT_DYNAMIC in \"%s\"", get_soname());
     }
     return false;
   } else {
@@ -1945,14 +2465,17 @@
 #endif
 
   // Extract useful information from dynamic section.
+  // Note that: "Except for the DT_NULL element at the end of the array,
+  // and the relative order of DT_NEEDED elements, entries may appear in any order."
+  //
+  // source: http://www.sco.com/developers/gabi/1998-04-29/ch5.dynamic.html
   uint32_t needed_count = 0;
   for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) {
     DEBUG("d = %p, d[0](tag) = %p d[1](val) = %p",
           d, reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val));
     switch (d->d_tag) {
       case DT_SONAME:
-        // TODO: glibc dynamic linker uses this name for
-        // initial library lookup; consider doing the same here.
+        // this is parsed after we have strtab initialized (see below).
         break;
 
       case DT_HASH:
@@ -1971,10 +2494,12 @@
         gnu_bloom_filter_ = reinterpret_cast<ElfW(Addr)*>(load_bias + d->d_un.d_ptr + 16);
         gnu_bucket_ = reinterpret_cast<uint32_t*>(gnu_bloom_filter_ + gnu_maskwords_);
         // amend chain for symndx = header[1]
-        gnu_chain_ = gnu_bucket_ + gnu_nbucket_ - reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[1];
+        gnu_chain_ = gnu_bucket_ + gnu_nbucket_ -
+            reinterpret_cast<uint32_t*>(load_bias + d->d_un.d_ptr)[1];
 
         if (!powerof2(gnu_maskwords_)) {
-          DL_ERR("invalid maskwords for gnu_hash = 0x%x, in \"%s\" expecting power to two", gnu_maskwords_, name);
+          DL_ERR("invalid maskwords for gnu_hash = 0x%x, in \"%s\" expecting power to two",
+              gnu_maskwords_, get_realpath());
           return false;
         }
         --gnu_maskwords_;
@@ -1996,7 +2521,8 @@
 
       case DT_SYMENT:
         if (d->d_un.d_val != sizeof(ElfW(Sym))) {
-          DL_ERR("invalid DT_SYMENT: %zd in \"%s\"", static_cast<size_t>(d->d_un.d_val), name);
+          DL_ERR("invalid DT_SYMENT: %zd in \"%s\"",
+              static_cast<size_t>(d->d_un.d_val), get_realpath());
           return false;
         }
         break;
@@ -2004,12 +2530,12 @@
       case DT_PLTREL:
 #if defined(USE_RELA)
         if (d->d_un.d_val != DT_RELA) {
-          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_RELA", name);
+          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_RELA", get_realpath());
           return false;
         }
 #else
         if (d->d_un.d_val != DT_REL) {
-          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_REL", name);
+          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_REL", get_realpath());
           return false;
         }
 #endif
@@ -2070,11 +2596,11 @@
         break;
 
       case DT_ANDROID_REL:
-        DL_ERR("unsupported DT_ANDROID_REL in \"%s\"", name);
+        DL_ERR("unsupported DT_ANDROID_REL in \"%s\"", get_realpath());
         return false;
 
       case DT_ANDROID_RELSZ:
-        DL_ERR("unsupported DT_ANDROID_RELSZ in \"%s\"", name);
+        DL_ERR("unsupported DT_ANDROID_RELSZ in \"%s\"", get_realpath());
         return false;
 
       case DT_RELAENT:
@@ -2089,11 +2615,11 @@
         break;
 
       case DT_REL:
-        DL_ERR("unsupported DT_REL in \"%s\"", name);
+        DL_ERR("unsupported DT_REL in \"%s\"", get_realpath());
         return false;
 
       case DT_RELSZ:
-        DL_ERR("unsupported DT_RELSZ in \"%s\"", name);
+        DL_ERR("unsupported DT_RELSZ in \"%s\"", get_realpath());
         return false;
 
 #else
@@ -2121,11 +2647,11 @@
         break;
 
       case DT_ANDROID_RELA:
-        DL_ERR("unsupported DT_ANDROID_RELA in \"%s\"", name);
+        DL_ERR("unsupported DT_ANDROID_RELA in \"%s\"", get_realpath());
         return false;
 
       case DT_ANDROID_RELASZ:
-        DL_ERR("unsupported DT_ANDROID_RELASZ in \"%s\"", name);
+        DL_ERR("unsupported DT_ANDROID_RELASZ in \"%s\"", get_realpath());
         return false;
 
       // "Indicates that all RELATIVE relocations have been concatenated together,
@@ -2137,27 +2663,27 @@
         break;
 
       case DT_RELA:
-        DL_ERR("unsupported DT_RELA in \"%s\"", name);
+        DL_ERR("unsupported DT_RELA in \"%s\"", get_realpath());
         return false;
 
       case DT_RELASZ:
-        DL_ERR("unsupported DT_RELASZ in \"%s\"", name);
+        DL_ERR("unsupported DT_RELASZ in \"%s\"", get_realpath());
         return false;
 
 #endif
       case DT_INIT:
         init_func_ = reinterpret_cast<linker_function_t>(load_bias + d->d_un.d_ptr);
-        DEBUG("%s constructors (DT_INIT) found at %p", name, init_func_);
+        DEBUG("%s constructors (DT_INIT) found at %p", get_realpath(), init_func_);
         break;
 
       case DT_FINI:
         fini_func_ = reinterpret_cast<linker_function_t>(load_bias + d->d_un.d_ptr);
-        DEBUG("%s destructors (DT_FINI) found at %p", name, fini_func_);
+        DEBUG("%s destructors (DT_FINI) found at %p", get_realpath(), fini_func_);
         break;
 
       case DT_INIT_ARRAY:
         init_array_ = reinterpret_cast<linker_function_t*>(load_bias + d->d_un.d_ptr);
-        DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", name, init_array_);
+        DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", get_realpath(), init_array_);
         break;
 
       case DT_INIT_ARRAYSZ:
@@ -2166,7 +2692,7 @@
 
       case DT_FINI_ARRAY:
         fini_array_ = reinterpret_cast<linker_function_t*>(load_bias + d->d_un.d_ptr);
-        DEBUG("%s destructors (DT_FINI_ARRAY) found at %p", name, fini_array_);
+        DEBUG("%s destructors (DT_FINI_ARRAY) found at %p", get_realpath(), fini_array_);
         break;
 
       case DT_FINI_ARRAYSZ:
@@ -2175,7 +2701,7 @@
 
       case DT_PREINIT_ARRAY:
         preinit_array_ = reinterpret_cast<linker_function_t*>(load_bias + d->d_un.d_ptr);
-        DEBUG("%s constructors (DT_PREINIT_ARRAY) found at %p", name, preinit_array_);
+        DEBUG("%s constructors (DT_PREINIT_ARRAY) found at %p", get_realpath(), preinit_array_);
         break;
 
       case DT_PREINIT_ARRAYSZ:
@@ -2184,7 +2710,7 @@
 
       case DT_TEXTREL:
 #if defined(__LP64__)
-        DL_ERR("text relocations (DT_TEXTREL) found in 64-bit ELF file \"%s\"", name);
+        DL_ERR("text relocations (DT_TEXTREL) found in 64-bit ELF file \"%s\"", get_realpath());
         return false;
 #else
         has_text_relocations = true;
@@ -2202,7 +2728,7 @@
       case DT_FLAGS:
         if (d->d_un.d_val & DF_TEXTREL) {
 #if defined(__LP64__)
-          DL_ERR("text relocations (DF_TEXTREL) found in 64-bit ELF file \"%s\"", name);
+          DL_ERR("text relocations (DF_TEXTREL) found in 64-bit ELF file \"%s\"", get_realpath());
           return false;
 #else
           has_text_relocations = true;
@@ -2231,7 +2757,8 @@
       case DT_MIPS_RLD_MAP2:
         // Set the DT_MIPS_RLD_MAP2 entry to the address of _r_debug for GDB.
         {
-          r_debug** dp = reinterpret_cast<r_debug**>(reinterpret_cast<ElfW(Addr)>(d) + d->d_un.d_val);
+          r_debug** dp = reinterpret_cast<r_debug**>(
+              reinterpret_cast<ElfW(Addr)>(d) + d->d_un.d_val);
           *dp = &_r_debug;
         }
         break;
@@ -2258,23 +2785,45 @@
       case DT_BIND_NOW:
         break;
 
-      // Ignore: bionic does not support symbol versioning...
       case DT_VERSYM:
+        versym_ = reinterpret_cast<ElfW(Versym)*>(load_bias + d->d_un.d_ptr);
+        break;
+
       case DT_VERDEF:
+        verdef_ptr_ = load_bias + d->d_un.d_ptr;
+        break;
       case DT_VERDEFNUM:
+        verdef_cnt_ = d->d_un.d_val;
+        break;
+
       case DT_VERNEED:
+        verneed_ptr_ = load_bias + d->d_un.d_ptr;
+        break;
+
       case DT_VERNEEDNUM:
+        verneed_cnt_ = d->d_un.d_val;
         break;
 
       default:
         if (!relocating_linker) {
-          DL_WARN("%s: unused DT entry: type %p arg %p", name,
+          DL_WARN("%s: unused DT entry: type %p arg %p", get_realpath(),
               reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val));
         }
         break;
     }
   }
 
+  // second pass - parse entries relying on strtab
+  for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) {
+    if (d->d_tag == DT_SONAME) {
+      soname_ = get_string(d->d_un.d_val);
+#if defined(__arm__)
+      strlcpy(old_name_, soname_, sizeof(old_name_));
+#endif
+      break;
+    }
+  }
+
   DEBUG("si->base = %p, si->strtab = %p, si->symtab = %p",
         reinterpret_cast<void*>(base), strtab_, symtab_);
 
@@ -2284,15 +2833,16 @@
     return false;
   }
   if (nbucket_ == 0 && gnu_nbucket_ == 0) {
-    DL_ERR("empty/missing DT_HASH/DT_GNU_HASH in \"%s\" (new hash type from the future?)", name);
+    DL_ERR("empty/missing DT_HASH/DT_GNU_HASH in \"%s\" "
+        "(new hash type from the future?)", get_soname());
     return false;
   }
   if (strtab_ == 0) {
-    DL_ERR("empty/missing DT_STRTAB in \"%s\"", name);
+    DL_ERR("empty/missing DT_STRTAB in \"%s\"", get_soname());
     return false;
   }
   if (symtab_ == 0) {
-    DL_ERR("empty/missing DT_SYMTAB in \"%s\"", name);
+    DL_ERR("empty/missing DT_SYMTAB in \"%s\"", get_soname());
     return false;
   }
   return true;
@@ -2311,10 +2861,10 @@
     // Make segments writable to allow text relocations to work properly. We will later call
     // phdr_table_protect_segments() after all of them are applied and all constructors are run.
     DL_WARN("%s has text relocations. This is wasting memory and prevents "
-            "security hardening. Please fix.", name);
+            "security hardening. Please fix.", get_soname());
     if (phdr_table_unprotect_segments(phdr, phnum, load_bias) < 0) {
       DL_ERR("can't unprotect loadable segments for \"%s\": %s",
-             name, strerror(errno));
+             get_soname(), strerror(errno));
       return false;
     }
   }
@@ -2327,7 +2877,7 @@
         android_relocs_[1] == 'P' &&
         (android_relocs_[2] == 'U' || android_relocs_[2] == 'S') &&
         android_relocs_[3] == '2') {
-      DEBUG("[ android relocating %s ]", name);
+      DEBUG("[ android relocating %s ]", get_soname());
 
       bool relocated = false;
       const uint8_t* packed_relocs = android_relocs_ + 4;
@@ -2356,26 +2906,26 @@
 
 #if defined(USE_RELA)
   if (rela_ != nullptr) {
-    DEBUG("[ relocating %s ]", name);
+    DEBUG("[ relocating %s ]", get_soname());
     if (!relocate(plain_reloc_iterator(rela_, rela_count_), global_group, local_group)) {
       return false;
     }
   }
   if (plt_rela_ != nullptr) {
-    DEBUG("[ relocating %s plt ]", name);
+    DEBUG("[ relocating %s plt ]", get_soname());
     if (!relocate(plain_reloc_iterator(plt_rela_, plt_rela_count_), global_group, local_group)) {
       return false;
     }
   }
 #else
   if (rel_ != nullptr) {
-    DEBUG("[ relocating %s ]", name);
+    DEBUG("[ relocating %s ]", get_soname());
     if (!relocate(plain_reloc_iterator(rel_, rel_count_), global_group, local_group)) {
       return false;
     }
   }
   if (plt_rel_ != nullptr) {
-    DEBUG("[ relocating %s plt ]", name);
+    DEBUG("[ relocating %s plt ]", get_soname());
     if (!relocate(plain_reloc_iterator(plt_rel_, plt_rel_count_), global_group, local_group)) {
       return false;
     }
@@ -2388,14 +2938,14 @@
   }
 #endif
 
-  DEBUG("[ finished linking %s ]", name);
+  DEBUG("[ finished linking %s ]", get_soname());
 
 #if !defined(__LP64__)
   if (has_text_relocations) {
     // All relocations are done, we can protect our segments back to read-only.
     if (phdr_table_protect_segments(phdr, phnum, load_bias) < 0) {
       DL_ERR("can't protect segments for \"%s\": %s",
-             name, strerror(errno));
+             get_soname(), strerror(errno));
       return false;
     }
   }
@@ -2404,7 +2954,7 @@
   /* We can also turn on GNU RELRO protection */
   if (phdr_table_protect_gnu_relro(phdr, phnum, load_bias) < 0) {
     DL_ERR("can't enable GNU RELRO protection for \"%s\": %s",
-           name, strerror(errno));
+           get_soname(), strerror(errno));
     return false;
   }
 
@@ -2413,14 +2963,14 @@
     if (phdr_table_serialize_gnu_relro(phdr, phnum, load_bias,
                                        extinfo->relro_fd) < 0) {
       DL_ERR("failed serializing GNU RELRO section for \"%s\": %s",
-             name, strerror(errno));
+             get_soname(), strerror(errno));
       return false;
     }
   } else if (extinfo && (extinfo->flags & ANDROID_DLEXT_USE_RELRO)) {
     if (phdr_table_map_gnu_relro(phdr, phnum, load_bias,
                                  extinfo->relro_fd) < 0) {
       DL_ERR("failed mapping GNU RELRO section for \"%s\": %s",
-             name, strerror(errno));
+             get_soname(), strerror(errno));
       return false;
     }
   }
@@ -2462,7 +3012,12 @@
 #else
 #define LINKER_PATH "/system/bin/linker"
 #endif
-static soinfo linker_soinfo_for_gdb(LINKER_PATH, nullptr, 0, 0);
+
+// This is done to avoid calling c-tor prematurely
+// because soinfo c-tor needs memory allocator
+// which might be initialized after global variables.
+static uint8_t linker_soinfo_for_gdb_buf[sizeof(soinfo)] __attribute__((aligned(8)));
+static soinfo* linker_soinfo_for_gdb = nullptr;
 
 /* gdb expects the linker to be in the debug shared object list.
  * Without this, gdb has trouble locating the linker's ".text"
@@ -2472,7 +3027,9 @@
  * be on the soinfo list.
  */
 static void init_linker_info_for_gdb(ElfW(Addr) linker_base) {
-  linker_soinfo_for_gdb.base = linker_base;
+  linker_soinfo_for_gdb = new (linker_soinfo_for_gdb_buf) soinfo(LINKER_PATH, nullptr, 0, 0);
+
+  linker_soinfo_for_gdb->base = linker_base;
 
   /*
    * Set the dynamic field in the link map otherwise gdb will complain with
@@ -2483,8 +3040,8 @@
   ElfW(Ehdr)* elf_hdr = reinterpret_cast<ElfW(Ehdr)*>(linker_base);
   ElfW(Phdr)* phdr = reinterpret_cast<ElfW(Phdr)*>(linker_base + elf_hdr->e_phoff);
   phdr_table_get_dynamic_section(phdr, elf_hdr->e_phnum, linker_base,
-                                 &linker_soinfo_for_gdb.dynamic, nullptr);
-  insert_soinfo_into_debug_map(&linker_soinfo_for_gdb);
+                                 &linker_soinfo_for_gdb->dynamic, nullptr);
+  insert_soinfo_into_debug_map(linker_soinfo_for_gdb);
 }
 
 /*
@@ -2596,8 +3153,9 @@
   StringLinkedList needed_library_name_list;
   size_t needed_libraries_count = 0;
   size_t ld_preloads_count = 0;
-  while (g_ld_preload_names[ld_preloads_count] != nullptr) {
-    needed_library_name_list.push_back(g_ld_preload_names[ld_preloads_count++]);
+
+  for (const auto& ld_preload_name : g_ld_preload_names) {
+    needed_library_name_list.push_back(ld_preload_name.c_str());
     ++needed_libraries_count;
   }
 
@@ -2611,7 +3169,9 @@
   memset(needed_library_names, 0, sizeof(needed_library_names));
   needed_library_name_list.copy_to_array(needed_library_names, needed_libraries_count);
 
-  if (needed_libraries_count > 0 && !find_libraries(si, needed_library_names, needed_libraries_count, nullptr, g_ld_preloads, ld_preloads_count, RTLD_GLOBAL, nullptr)) {
+  if (needed_libraries_count > 0 &&
+      !find_libraries(si, needed_library_names, needed_libraries_count, nullptr,
+          &g_ld_preloads, ld_preloads_count, RTLD_GLOBAL, nullptr)) {
     __libc_format_fd(2, "CANNOT LINK EXECUTABLE: %s\n", linker_get_error_buffer());
     exit(EXIT_FAILURE);
   } else if (needed_libraries_count == 0) {
@@ -2679,7 +3239,7 @@
   fflush(stdout);
 #endif
 
-  TRACE("[ Ready to execute '%s' @ %p ]", si->name, reinterpret_cast<void*>(si->entry));
+  TRACE("[ Ready to execute '%s' @ %p ]", si->get_soname(), reinterpret_cast<void*>(si->entry));
   return si->entry;
 }
 
@@ -2725,7 +3285,7 @@
   ElfW(Ehdr)* elf_hdr = reinterpret_cast<ElfW(Ehdr)*>(linker_addr);
   ElfW(Phdr)* phdr = reinterpret_cast<ElfW(Phdr)*>(linker_addr + elf_hdr->e_phoff);
 
-  soinfo linker_so("[dynamic linker]", nullptr, 0, 0);
+  soinfo linker_so(nullptr, nullptr, 0, 0);
 
   // If the linker is not acting as PT_INTERP entry_point is equal to
   // _start. Which means that the linker is running as an executable and
diff --git a/linker/linker.h b/linker/linker.h
index e4681eb..dae3972 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -39,6 +39,9 @@
 #include "private/libc_logging.h"
 #include "linked_list.h"
 
+#include <string>
+#include <vector>
+
 #define DL_ERR(fmt, x...) \
     do { \
       __libc_format_buffer(linker_get_error_buffer(), linker_get_error_buffer_size(), fmt, ##x); \
@@ -92,9 +95,11 @@
 
 #define SUPPORTED_DT_FLAGS_1 (DF_1_NOW | DF_1_GLOBAL | DF_1_NODELETE)
 
-#define SOINFO_VERSION 1
+#define SOINFO_VERSION 2
 
+#if defined(__arm__)
 #define SOINFO_NAME_LEN 128
+#endif
 
 typedef void (*linker_function_t)();
 
@@ -138,24 +143,53 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(SymbolName);
 };
 
+struct version_info {
+  version_info() : elf_hash(0), name(nullptr), target_si(nullptr) {}
+
+  uint32_t elf_hash;
+  const char* name;
+  const soinfo* target_si;
+};
+
+// Class used construct version dependency graph.
+class VersionTracker {
+ public:
+  VersionTracker() = default;
+  bool init(const soinfo* si_from);
+
+  const version_info* get_version_info(ElfW(Versym) source_symver) const;
+ private:
+  bool init_verneed(const soinfo* si_from);
+  bool init_verdef(const soinfo* si_from);
+  void add_version_info(size_t source_index, ElfW(Word) elf_hash,
+      const char* ver_name, const soinfo* target_si);
+
+  std::vector<version_info> version_infos;
+
+  DISALLOW_COPY_AND_ASSIGN(VersionTracker);
+};
+
 struct soinfo {
  public:
   typedef LinkedList<soinfo, SoinfoListAllocator> soinfo_list_t;
+#if defined(__arm__)
+ private:
+  char old_name_[SOINFO_NAME_LEN];
+#endif
  public:
-  char name[SOINFO_NAME_LEN];
   const ElfW(Phdr)* phdr;
   size_t phnum;
   ElfW(Addr) entry;
   ElfW(Addr) base;
   size_t size;
 
-#ifndef __LP64__
+#if defined(__arm__)
   uint32_t unused1;  // DO NOT USE, maintained for compatibility.
 #endif
 
   ElfW(Dyn)* dynamic;
 
-#ifndef __LP64__
+#if defined(__arm__)
   uint32_t unused2; // DO NOT USE, maintained for compatibility
   uint32_t unused3; // DO NOT USE, maintained for compatibility
 #endif
@@ -238,7 +272,8 @@
   void call_destructors();
   void call_pre_init_constructors();
   bool prelink_image();
-  bool link_image(const soinfo_list_t& global_group, const soinfo_list_t& local_group, const android_dlextinfo* extinfo);
+  bool link_image(const soinfo_list_t& global_group, const soinfo_list_t& local_group,
+                  const android_dlextinfo* extinfo);
 
   void add_child(soinfo* child);
   void remove_all_links();
@@ -252,18 +287,27 @@
   void set_dt_flags_1(uint32_t dt_flags_1);
 
   soinfo_list_t& get_children();
+  const soinfo_list_t& get_children() const;
+
   soinfo_list_t& get_parents();
 
-  ElfW(Sym)* find_symbol_by_name(SymbolName& symbol_name);
+  bool find_symbol_by_name(SymbolName& symbol_name,
+                           const version_info* vi,
+                           const ElfW(Sym)** symbol) const;
+
   ElfW(Sym)* find_symbol_by_address(const void* addr);
-  ElfW(Addr) resolve_symbol_address(ElfW(Sym)* s);
+  ElfW(Addr) resolve_symbol_address(const ElfW(Sym)* s) const;
 
   const char* get_string(ElfW(Word) index) const;
   bool can_unload() const;
   bool is_gnu_hash() const;
 
-  bool inline has_min_version(uint32_t min_version) const {
+  bool inline has_min_version(uint32_t min_version __unused) const {
+#if defined(__arm__)
     return (flags_ & FLAG_NEW_SOINFO) != 0 && version_ >= min_version;
+#else
+    return true;
+#endif
   }
 
   bool is_linked() const;
@@ -278,16 +322,27 @@
 
   soinfo* get_local_group_root() const;
 
+  const char* get_soname() const;
+  const char* get_realpath() const;
+  const ElfW(Versym)* get_versym(size_t n) const;
+  ElfW(Addr) get_verneed_ptr() const;
+  size_t get_verneed_cnt() const;
+  ElfW(Addr) get_verdef_ptr() const;
+  size_t get_verdef_cnt() const;
+
+  bool find_verdef_version_index(const version_info* vi, ElfW(Versym)* versym) const;
+
  private:
-  ElfW(Sym)* elf_lookup(SymbolName& symbol_name);
+  bool elf_lookup(SymbolName& symbol_name, const version_info* vi, uint32_t* symbol_index) const;
   ElfW(Sym)* elf_addr_lookup(const void* addr);
-  ElfW(Sym)* gnu_lookup(SymbolName& symbol_name);
+  bool gnu_lookup(SymbolName& symbol_name, const version_info* vi, uint32_t* symbol_index) const;
   ElfW(Sym)* gnu_addr_lookup(const void* addr);
 
   void call_array(const char* array_name, linker_function_t* functions, size_t count, bool reverse);
   void call_function(const char* function_name, linker_function_t function);
   template<typename ElfRelIteratorT>
-  bool relocate(ElfRelIteratorT&& rel_iterator, const soinfo_list_t& global_group, const soinfo_list_t& local_group);
+  bool relocate(ElfRelIteratorT&& rel_iterator, const soinfo_list_t& global_group,
+                const soinfo_list_t& local_group);
 
  private:
   // This part of the structure is only available
@@ -322,11 +377,23 @@
   uint8_t* android_relocs_;
   size_t android_relocs_size_;
 
+  const char* soname_;
+  std::string realpath_;
+
+  const ElfW(Versym)* versym_;
+
+  ElfW(Addr) verdef_ptr_;
+  size_t verdef_cnt_;
+
+  ElfW(Addr) verneed_ptr_;
+  size_t verneed_cnt_;
+
   friend soinfo* get_libdl_info();
 };
 
-ElfW(Sym)* soinfo_do_lookup(soinfo* si_from, const char* name, soinfo** si_found_in,
-    const soinfo::soinfo_list_t& global_group, const soinfo::soinfo_list_t& local_group);
+bool soinfo_do_lookup(soinfo* si_from, const char* name, const version_info* vi,
+                      soinfo** si_found_in, const soinfo::soinfo_list_t& global_group,
+                      const soinfo::soinfo_list_t& local_group, const ElfW(Sym)** symbol);
 
 enum RelocationKind {
   kRelocAbsolute = 0,
@@ -345,10 +412,10 @@
 soinfo* do_dlopen(const char* name, int flags, const android_dlextinfo* extinfo);
 void do_dlclose(soinfo* si);
 
-ElfW(Sym)* dlsym_linear_lookup(const char* name, soinfo** found, soinfo* start);
+const ElfW(Sym)* dlsym_linear_lookup(const char* name, soinfo** found, soinfo* caller, void* handle);
 soinfo* find_containing_library(const void* addr);
 
-ElfW(Sym)* dlsym_handle_lookup(soinfo* si, soinfo** found, const char* name);
+const ElfW(Sym)* dlsym_handle_lookup(soinfo* si, soinfo** found, const char* name);
 
 void debuggerd_init();
 extern "C" abort_msg_t* g_abort_message;
diff --git a/linker/linker_allocator.cpp b/linker/linker_allocator.cpp
new file mode 100644
index 0000000..1b16cf1
--- /dev/null
+++ b/linker/linker_allocator.cpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker_allocator.h"
+#include "linker.h"
+
+#include <algorithm>
+#include <vector>
+
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "private/bionic_prctl.h"
+
+//
+// LinkerMemeoryAllocator is general purpose allocator
+// designed to provide the same functionality as the malloc/free/realloc
+// libc functions.
+//
+// On alloc:
+// If size is >= 1k allocator proxies malloc call directly to mmap
+// If size < 1k allocator uses SmallObjectAllocator for the size
+// rounded up to the nearest power of two.
+//
+// On free:
+//
+// For a pointer allocated using proxy-to-mmap allocator unmaps
+// the memory.
+//
+// For a pointer allocated using SmallObjectAllocator it adds
+// the block to free_blocks_list_. If the number of free pages reaches 2,
+// SmallObjectAllocator munmaps one of the pages keeping the other one
+// in reserve.
+
+static const char kSignature[4] = {'L', 'M', 'A', 1};
+
+static const size_t kSmallObjectMaxSize = 1 << kSmallObjectMaxSizeLog2;
+
+// This type is used for large allocations (with size >1k)
+static const uint32_t kLargeObject = 111;
+
+bool operator<(const small_object_page_record& one, const small_object_page_record& two) {
+  return one.page_addr < two.page_addr;
+}
+
+static inline uint16_t log2(size_t number) {
+  uint16_t result = 0;
+  number--;
+
+  while (number != 0) {
+    result++;
+    number >>= 1;
+  }
+
+  return result;
+}
+
+LinkerSmallObjectAllocator::LinkerSmallObjectAllocator()
+    : type_(0), name_(nullptr), block_size_(0), free_pages_cnt_(0), free_blocks_list_(nullptr) {}
+
+void* LinkerSmallObjectAllocator::alloc() {
+  if (free_blocks_list_ == nullptr) {
+    alloc_page();
+  }
+
+  small_object_block_record* block_record = free_blocks_list_;
+  if (block_record->free_blocks_cnt > 1) {
+    small_object_block_record* next_free = reinterpret_cast<small_object_block_record*>(
+        reinterpret_cast<uint8_t*>(block_record) + block_size_);
+    next_free->next = block_record->next;
+    next_free->free_blocks_cnt = block_record->free_blocks_cnt - 1;
+    free_blocks_list_ = next_free;
+  } else {
+    free_blocks_list_ = block_record->next;
+  }
+
+  // bookkeeping...
+  auto page_record = find_page_record(block_record);
+
+  if (page_record->allocated_blocks_cnt == 0) {
+    free_pages_cnt_--;
+  }
+
+  page_record->free_blocks_cnt--;
+  page_record->allocated_blocks_cnt++;
+
+  memset(block_record, 0, block_size_);
+
+  return block_record;
+}
+
+void LinkerSmallObjectAllocator::free_page(linker_vector_t::iterator page_record) {
+  void* page_start = reinterpret_cast<void*>(page_record->page_addr);
+  void* page_end = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(page_start) + PAGE_SIZE);
+
+  while (free_blocks_list_ != nullptr &&
+      free_blocks_list_ > page_start &&
+      free_blocks_list_ < page_end) {
+    free_blocks_list_ = free_blocks_list_->next;
+  }
+
+  small_object_block_record* current = free_blocks_list_;
+
+  while (current != nullptr) {
+    while (current->next > page_start && current->next < page_end) {
+      current->next = current->next->next;
+    }
+
+    current = current->next;
+  }
+
+  munmap(page_start, PAGE_SIZE);
+  page_records_.erase(page_record);
+  free_pages_cnt_--;
+}
+
+void LinkerSmallObjectAllocator::free(void* ptr) {
+  auto page_record = find_page_record(ptr);
+
+  ssize_t offset = reinterpret_cast<uintptr_t>(ptr) - sizeof(page_info);
+
+  if (offset % block_size_ != 0) {
+    __libc_fatal("invalid pointer: %p (block_size=%zd)", ptr, block_size_);
+  }
+
+  memset(ptr, 0, block_size_);
+  small_object_block_record* block_record = reinterpret_cast<small_object_block_record*>(ptr);
+
+  block_record->next = free_blocks_list_;
+  block_record->free_blocks_cnt = 1;
+
+  free_blocks_list_ = block_record;
+
+  page_record->free_blocks_cnt++;
+  page_record->allocated_blocks_cnt--;
+
+  if (page_record->allocated_blocks_cnt == 0) {
+    if (free_pages_cnt_++ > 1) {
+      // if we already have a free page - unmap this one.
+      free_page(page_record);
+    }
+  }
+}
+
+void LinkerSmallObjectAllocator::init(uint32_t type, size_t block_size, const char* name) {
+  type_ = type;
+  block_size_ = block_size;
+  name_ = name;
+}
+
+linker_vector_t::iterator LinkerSmallObjectAllocator::find_page_record(void* ptr) {
+  void* addr = reinterpret_cast<void*>(PAGE_START(reinterpret_cast<uintptr_t>(ptr)));
+  small_object_page_record boundary;
+  boundary.page_addr = addr;
+  linker_vector_t::iterator it = std::lower_bound(
+      page_records_.begin(), page_records_.end(), boundary);
+
+  if (it == page_records_.end() || it->page_addr != addr) {
+    // not found...
+    __libc_fatal("page record for %p was not found (block_size=%zd)", ptr, block_size_);
+  }
+
+  return it;
+}
+
+void LinkerSmallObjectAllocator::create_page_record(void* page_addr, size_t free_blocks_cnt) {
+  small_object_page_record record;
+  record.page_addr = page_addr;
+  record.free_blocks_cnt = free_blocks_cnt;
+  record.allocated_blocks_cnt = 0;
+
+  linker_vector_t::iterator it = std::lower_bound(
+      page_records_.begin(), page_records_.end(), record);
+  page_records_.insert(it, record);
+}
+
+void LinkerSmallObjectAllocator::alloc_page() {
+  void* map_ptr = mmap(nullptr, PAGE_SIZE,
+      PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+  if (map_ptr == MAP_FAILED) {
+    __libc_fatal("mmap failed");
+  }
+
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, PAGE_SIZE, name_);
+
+  memset(map_ptr, 0, PAGE_SIZE);
+
+  page_info* info = reinterpret_cast<page_info*>(map_ptr);
+  memcpy(info->signature, kSignature, sizeof(kSignature));
+  info->type = type_;
+  info->allocator_addr = this;
+
+  size_t free_blocks_cnt = (PAGE_SIZE - sizeof(page_info))/block_size_;
+
+  create_page_record(map_ptr, free_blocks_cnt);
+
+  small_object_block_record* first_block = reinterpret_cast<small_object_block_record*>(info + 1);
+
+  first_block->next = free_blocks_list_;
+  first_block->free_blocks_cnt = free_blocks_cnt;
+
+  free_blocks_list_ = first_block;
+}
+
+
+LinkerMemoryAllocator::LinkerMemoryAllocator() {
+  static const char* allocator_names[kSmallObjectAllocatorsCount] = {
+    "linker_alloc_16", // 2^4
+    "linker_alloc_32", // 2^5
+    "linker_alloc_64", // and so on...
+    "linker_alloc_128",
+    "linker_alloc_256",
+    "linker_alloc_512",
+    "linker_alloc_1024", // 2^10
+  };
+
+  for (size_t i = 0; i < kSmallObjectAllocatorsCount; ++i) {
+    uint32_t type = i + kSmallObjectMinSizeLog2;
+    allocators_[i].init(type, 1 << type, allocator_names[i]);
+  }
+}
+
+void* LinkerMemoryAllocator::alloc_mmap(size_t size) {
+  size_t allocated_size = PAGE_END(size + sizeof(page_info));
+  void* map_ptr = mmap(nullptr, allocated_size,
+      PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+
+  if (map_ptr == MAP_FAILED) {
+    __libc_fatal("mmap failed");
+  }
+
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, allocated_size, "linker_alloc_lob");
+
+  memset(map_ptr, 0, allocated_size);
+
+  page_info* info = reinterpret_cast<page_info*>(map_ptr);
+  memcpy(info->signature, kSignature, sizeof(kSignature));
+  info->type = kLargeObject;
+  info->allocated_size = allocated_size;
+
+  return info + 1;
+}
+
+void* LinkerMemoryAllocator::alloc(size_t size) {
+  // treat alloc(0) as alloc(1)
+  if (size == 0) {
+    size = 1;
+  }
+
+  if (size > kSmallObjectMaxSize) {
+    return alloc_mmap(size);
+  }
+
+  uint16_t log2_size = log2(size);
+
+  if (log2_size < kSmallObjectMinSizeLog2) {
+    log2_size = kSmallObjectMinSizeLog2;
+  }
+
+  return get_small_object_allocator(log2_size)->alloc();
+}
+
+page_info* LinkerMemoryAllocator::get_page_info(void* ptr) {
+  page_info* info = reinterpret_cast<page_info*>(PAGE_START(reinterpret_cast<size_t>(ptr)));
+  if (memcmp(info->signature, kSignature, sizeof(kSignature)) != 0) {
+    __libc_fatal("invalid pointer %p (page signature mismatch)", ptr);
+  }
+
+  return info;
+}
+
+void* LinkerMemoryAllocator::realloc(void* ptr, size_t size) {
+  if (ptr == nullptr) {
+    return alloc(size);
+  }
+
+  if (size == 0) {
+    free(ptr);
+    return nullptr;
+  }
+
+  page_info* info = get_page_info(ptr);
+
+  size_t old_size = 0;
+
+  if (info->type == kLargeObject) {
+    old_size = info->allocated_size - sizeof(page_info);
+  } else {
+    LinkerSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+    if (allocator != info->allocator_addr) {
+      __libc_fatal("invalid pointer %p (page signature mismatch)", ptr);
+    }
+
+    old_size = allocator->get_block_size();
+  }
+
+  if (old_size < size) {
+    void *result = alloc(size);
+    memcpy(result, ptr, old_size);
+    free(ptr);
+    return result;
+  }
+
+  return ptr;
+}
+
+void LinkerMemoryAllocator::free(void* ptr) {
+  if (ptr == nullptr) {
+    return;
+  }
+
+  page_info* info = get_page_info(ptr);
+
+  if (info->type == kLargeObject) {
+    munmap(info, info->allocated_size);
+  } else {
+    LinkerSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+    if (allocator != info->allocator_addr) {
+      __libc_fatal("invalid pointer %p (invalid allocator address for the page)", ptr);
+    }
+
+    allocator->free(ptr);
+  }
+}
+
+LinkerSmallObjectAllocator* LinkerMemoryAllocator::get_small_object_allocator(uint32_t type) {
+  if (type < kSmallObjectMinSizeLog2 || type > kSmallObjectMaxSizeLog2) {
+    __libc_fatal("invalid type: %u", type);
+  }
+
+  return &allocators_[type - kSmallObjectMinSizeLog2];
+}
diff --git a/linker/linker_allocator.h b/linker/linker_allocator.h
new file mode 100644
index 0000000..2adad56
--- /dev/null
+++ b/linker/linker_allocator.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LINKER_ALLOCATOR_H
+#define __LINKER_ALLOCATOR_H
+
+#include <stdlib.h>
+#include <sys/cdefs.h>
+#include <sys/mman.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "private/bionic_prctl.h"
+#include "private/libc_logging.h"
+
+const uint32_t kSmallObjectMaxSizeLog2 = 10;
+const uint32_t kSmallObjectMinSizeLog2 = 4;
+const uint32_t kSmallObjectAllocatorsCount = kSmallObjectMaxSizeLog2 - kSmallObjectMinSizeLog2 + 1;
+
+class LinkerSmallObjectAllocator;
+
+// This structure is placed at the beginning of each addressable page
+// and has all information we need to find the corresponding memory allocator.
+struct page_info {
+  char signature[4];
+  uint32_t type;
+  union {
+    // we use allocated_size for large objects allocator
+    size_t allocated_size;
+    // and allocator_addr for small ones.
+    LinkerSmallObjectAllocator* allocator_addr;
+  };
+};
+
+struct small_object_page_record {
+  void* page_addr;
+  size_t free_blocks_cnt;
+  size_t allocated_blocks_cnt;
+};
+
+// for lower_bound...
+bool operator<(const small_object_page_record& one, const small_object_page_record& two);
+
+struct small_object_block_record {
+  small_object_block_record* next;
+  size_t free_blocks_cnt;
+};
+
+// This is implementation for std::vector allocator
+template <typename T>
+class linker_vector_allocator {
+ public:
+  typedef T value_type;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  T* allocate(size_t n, const T* hint = nullptr) {
+    size_t size = n * sizeof(T);
+    void* ptr = mmap(const_cast<T*>(hint), size,
+        PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+    if (ptr == MAP_FAILED) {
+      // Spec says we need to throw std::bad_alloc here but because our
+      // code does not support exception handling anyways - we are going to abort.
+      __libc_fatal("mmap failed");
+    }
+
+    prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "linker_alloc_vector");
+
+    return reinterpret_cast<T*>(ptr);
+  }
+
+  void deallocate(T* ptr, size_t n) {
+    munmap(ptr, n * sizeof(T));
+  }
+};
+
+typedef
+    std::vector<small_object_page_record, linker_vector_allocator<small_object_page_record>>
+    linker_vector_t;
+
+
+class LinkerSmallObjectAllocator {
+ public:
+  LinkerSmallObjectAllocator();
+  void init(uint32_t type, size_t block_size, const char* name);
+  void* alloc();
+  void free(void* ptr);
+
+  size_t get_block_size() const { return block_size_; }
+ private:
+  void alloc_page();
+  void free_page(linker_vector_t::iterator page_record);
+  linker_vector_t::iterator find_page_record(void* ptr);
+  void create_page_record(void* page_addr, size_t free_blocks_cnt);
+
+  uint32_t type_;
+  const char* name_;
+  size_t block_size_;
+
+  size_t free_pages_cnt_;
+  small_object_block_record* free_blocks_list_;
+
+  // sorted vector of page records
+  linker_vector_t page_records_;
+};
+
+class LinkerMemoryAllocator {
+ public:
+  LinkerMemoryAllocator();
+  void* alloc(size_t size);
+
+  // Note that this implementation of realloc never shrinks allocation
+  void* realloc(void* ptr, size_t size);
+  void free(void* ptr);
+ private:
+  void* alloc_mmap(size_t size);
+  page_info* get_page_info(void* ptr);
+  LinkerSmallObjectAllocator* get_small_object_allocator(uint32_t type);
+
+  LinkerSmallObjectAllocator allocators_[kSmallObjectAllocatorsCount];
+};
+
+
+#endif  /* __LINKER_ALLOCATOR_H */
diff --git a/linker/linker_block_allocator.h b/linker/linker_block_allocator.h
index 1d41806..4b9b995 100644
--- a/linker/linker_block_allocator.h
+++ b/linker/linker_block_allocator.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __LINKER_ALLOCATOR_H
-#define __LINKER_ALLOCATOR_H
+#ifndef __LINKER_BLOCK_ALLOCATOR_H
+#define __LINKER_BLOCK_ALLOCATOR_H
 
 #include <stdlib.h>
 #include <limits.h>
@@ -24,11 +24,11 @@
 struct LinkerBlockAllocatorPage;
 
 /*
- * This class is a non-template version of the LinkerAllocator
+ * This class is a non-template version of the LinkerTypeAllocator
  * It keeps code inside .cpp file by keeping the interface
  * template-free.
  *
- * Please use LinkerAllocator<type> where possible (everywhere).
+ * Please use LinkerTypeAllocator<type> where possible (everywhere).
  */
 class LinkerBlockAllocator {
  public:
@@ -50,11 +50,21 @@
 };
 
 /*
- * We can't use malloc(3) in the dynamic linker.
- *
  * A simple allocator for the dynamic linker. An allocator allocates instances
  * of a single fixed-size type. Allocations are backed by page-sized private
  * anonymous mmaps.
+ *
+ * The differences between this allocator and LinkerMemoryAllocator are:
+ * 1. This allocator manages space more efficiently. LinkerMemoryAllocator
+ *    operates in power-of-two sized blocks up to 1k, when this implementation
+ *    splits the page to aligned size of structure; For example for structures
+ *    with size 513 this allocator will use 516 (520 for lp64) bytes of data
+ *    where generalized implementation is going to use 1024 sized blocks.
+ *
+ * 2. This allocator does not munmap allocated memory, where LinkerMemoryAllocator does.
+ *
+ * 3. This allocator provides mprotect services to the user, where LinkerMemoryAllocator
+ *    always treats it's memory as READ|WRITE.
  */
 template<typename T>
 class LinkerTypeAllocator {
@@ -68,4 +78,4 @@
   DISALLOW_COPY_AND_ASSIGN(LinkerTypeAllocator);
 };
 
-#endif // __LINKER_ALLOCATOR_H
+#endif // __LINKER_BLOCK_ALLOCATOR_H
diff --git a/linker/linker_environ.cpp b/linker/linker_environ.cpp
index 7272f4e..9a0f009 100644
--- a/linker/linker_environ.cpp
+++ b/linker/linker_environ.cpp
@@ -115,6 +115,7 @@
       "GCONV_PATH",
       "GETCONF_DIR",
       "HOSTALIASES",
+      "JE_MALLOC_CONF",
       "LD_AOUT_LIBRARY_PATH",
       "LD_AOUT_PRELOAD",
       "LD_AUDIT",
@@ -130,6 +131,7 @@
       "LOCALDOMAIN",
       "LOCPATH",
       "MALLOC_CHECK_",
+      "MALLOC_CONF",
       "MALLOC_TRACE",
       "NIS_PATH",
       "NLSPATH",
diff --git a/linker/linker_memory.cpp b/linker/linker_memory.cpp
new file mode 100644
index 0000000..1892d02
--- /dev/null
+++ b/linker/linker_memory.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker_allocator.h"
+
+#include <stdlib.h>
+
+static LinkerMemoryAllocator g_linker_allocator;
+
+void* malloc(size_t byte_count) {
+  return g_linker_allocator.alloc(byte_count);
+}
+
+void* calloc(size_t item_count, size_t item_size) {
+  return g_linker_allocator.alloc(item_count*item_size);
+}
+
+void* realloc(void* p, size_t byte_count) {
+  return g_linker_allocator.realloc(p, byte_count);
+}
+
+void free(void* ptr) {
+  g_linker_allocator.free(ptr);
+}
+
diff --git a/linker/linker_mips.cpp b/linker/linker_mips.cpp
index f0bde55..c162111 100644
--- a/linker/linker_mips.cpp
+++ b/linker/linker_mips.cpp
@@ -47,7 +47,15 @@
     const soinfo_list_t& local_group);
 
 template <typename ElfRelIteratorT>
-bool soinfo::relocate(ElfRelIteratorT&& rel_iterator, const soinfo_list_t& global_group, const soinfo_list_t& local_group) {
+bool soinfo::relocate(ElfRelIteratorT&& rel_iterator,
+                      const soinfo_list_t& global_group,
+                      const soinfo_list_t& local_group) {
+  VersionTracker version_tracker;
+
+  if (!version_tracker.init(this)) {
+    return false;
+  }
+
   for (size_t idx = 0; rel_iterator.has_next(); ++idx) {
     const auto rel = rel_iterator.next();
 
@@ -62,20 +70,41 @@
     ElfW(Addr) sym_addr = 0;
     const char* sym_name = nullptr;
 
-    DEBUG("Processing '%s' relocation at index %zd", this->name, idx);
+    DEBUG("Processing '%s' relocation at index %zd", get_soname(), idx);
     if (type == R_GENERIC_NONE) {
       continue;
     }
 
-    ElfW(Sym)* s = nullptr;
+    const ElfW(Sym)* s = nullptr;
     soinfo* lsi = nullptr;
 
     if (sym != 0) {
       sym_name = get_string(symtab_[sym].st_name);
-      s = soinfo_do_lookup(this, sym_name, &lsi, global_group,local_group);
+      const ElfW(Versym)* sym_ver_ptr = get_versym(sym);
+      ElfW(Versym) sym_ver = sym_ver_ptr == nullptr ? 0 : *sym_ver_ptr;
+
+      if (sym_ver == VER_NDX_LOCAL || sym_ver == VER_NDX_GLOBAL) {
+        // there is no version info for this one
+        if (!soinfo_do_lookup(this, sym_name, nullptr, &lsi, global_group, local_group, &s)) {
+          return false;
+        }
+      } else {
+        const version_info* vi = version_tracker.get_version_info(sym_ver);
+
+        if (vi == nullptr) {
+          DL_ERR("cannot find verneed/verdef for version index=%d "
+              "referenced by symbol \"%s\" at \"%s\"", sym_ver, sym_name, get_soname());
+          return false;
+        }
+
+        if (!soinfo_do_lookup(this, sym_name, vi, &lsi, global_group, local_group, &s)) {
+          return false;
+        }
+      }
+
       if (s == nullptr) {
         // mips does not support relocation with weak-undefined symbols
-        DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, name);
+        DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, get_soname());
         return false;
       } else {
         // We got a definition.
@@ -115,7 +144,8 @@
   return true;
 }
 
-bool soinfo::mips_relocate_got(const soinfo_list_t& global_group, const soinfo_list_t& local_group) {
+bool soinfo::mips_relocate_got(const soinfo_list_t& global_group,
+                               const soinfo_list_t& local_group) {
   ElfW(Addr)** got = plt_got_;
   if (got == nullptr) {
     return true;
@@ -144,7 +174,11 @@
     // This is an undefined reference... try to locate it.
     const char* sym_name = get_string(sym->st_name);
     soinfo* lsi = nullptr;
-    ElfW(Sym)* s = soinfo_do_lookup(this, sym_name, &lsi, global_group, local_group);
+    const ElfW(Sym)* s = nullptr;
+    if (!soinfo_do_lookup(this, sym_name, nullptr, &lsi, global_group, local_group, &s)) {
+      return false;
+    }
+
     if (s == nullptr) {
       // We only allow an undefined symbol if this is a weak reference.
       s = &symtab_[g];
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 38e6262..638c9d6 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -240,7 +240,8 @@
 
   phdr_size_ = page_max - page_min;
 
-  void* mmap_result = mmap64(nullptr, phdr_size_, PROT_READ, MAP_PRIVATE, fd_, file_offset_ + page_min);
+  void* mmap_result =
+      mmap64(nullptr, phdr_size_, PROT_READ, MAP_PRIVATE, fd_, file_offset_ + page_min);
   if (mmap_result == MAP_FAILED) {
     DL_ERR("\"%s\" phdr mmap failed: %s", name_, strerror(errno));
     return false;
@@ -428,9 +429,15 @@
     ElfW(Addr) seg_page_start = PAGE_START(phdr->p_vaddr) + load_bias;
     ElfW(Addr) seg_page_end   = PAGE_END(phdr->p_vaddr + phdr->p_memsz) + load_bias;
 
+    int prot = PFLAGS_TO_PROT(phdr->p_flags);
+    if ((extra_prot_flags & PROT_WRITE) != 0) {
+      // make sure we're never simultaneously writable / executable
+      prot &= ~PROT_EXEC;
+    }
+
     int ret = mprotect(reinterpret_cast<void*>(seg_page_start),
                        seg_page_end - seg_page_start,
-                       PFLAGS_TO_PROT(phdr->p_flags) | extra_prot_flags);
+                       prot | extra_prot_flags);
     if (ret < 0) {
       return -1;
     }
@@ -449,7 +456,8 @@
  * Return:
  *   0 on error, -1 on failure (error code in errno).
  */
-int phdr_table_protect_segments(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias) {
+int phdr_table_protect_segments(const ElfW(Phdr)* phdr_table,
+                                size_t phdr_count, ElfW(Addr) load_bias) {
   return _phdr_table_set_load_prot(phdr_table, phdr_count, load_bias, 0);
 }
 
@@ -469,7 +477,8 @@
  * Return:
  *   0 on error, -1 on failure (error code in errno).
  */
-int phdr_table_unprotect_segments(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias) {
+int phdr_table_unprotect_segments(const ElfW(Phdr)* phdr_table,
+                                  size_t phdr_count, ElfW(Addr) load_bias) {
   return _phdr_table_set_load_prot(phdr_table, phdr_count, load_bias, PROT_WRITE);
 }
 
@@ -531,7 +540,8 @@
  * Return:
  *   0 on error, -1 on failure (error code in errno).
  */
-int phdr_table_protect_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias) {
+int phdr_table_protect_gnu_relro(const ElfW(Phdr)* phdr_table,
+                                 size_t phdr_count, ElfW(Addr) load_bias) {
   return _phdr_table_set_gnu_relro_prot(phdr_table, phdr_count, load_bias, PROT_READ);
 }
 
@@ -547,7 +557,9 @@
  * Return:
  *   0 on error, -1 on failure (error code in errno).
  */
-int phdr_table_serialize_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias,
+int phdr_table_serialize_gnu_relro(const ElfW(Phdr)* phdr_table,
+                                   size_t phdr_count,
+                                   ElfW(Addr) load_bias,
                                    int fd) {
   const ElfW(Phdr)* phdr = phdr_table;
   const ElfW(Phdr)* phdr_limit = phdr + phdr_count;
@@ -592,7 +604,9 @@
  * Return:
  *   0 on error, -1 on failure (error code in errno).
  */
-int phdr_table_map_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias,
+int phdr_table_map_gnu_relro(const ElfW(Phdr)* phdr_table,
+                             size_t phdr_count,
+                             ElfW(Addr) load_bias,
                              int fd) {
   // Map the file at a temporary location so we can compare its contents.
   struct stat file_stat;
@@ -725,11 +739,12 @@
                                     ElfW(Addr) load_bias, ElfW(Dyn)** dynamic,
                                     ElfW(Word)* dynamic_flags) {
   *dynamic = nullptr;
-  for (const ElfW(Phdr)* phdr = phdr_table, *phdr_limit = phdr + phdr_count; phdr < phdr_limit; phdr++) {
-    if (phdr->p_type == PT_DYNAMIC) {
-      *dynamic = reinterpret_cast<ElfW(Dyn)*>(load_bias + phdr->p_vaddr);
+  for (size_t i = 0; i<phdr_count; ++i) {
+    const ElfW(Phdr)& phdr = phdr_table[i];
+    if (phdr.p_type == PT_DYNAMIC) {
+      *dynamic = reinterpret_cast<ElfW(Dyn)*>(load_bias + phdr.p_vaddr);
       if (dynamic_flags) {
-        *dynamic_flags = phdr->p_flags;
+        *dynamic_flags = phdr.p_flags;
       }
       return;
     }
diff --git a/linker/linker_phdr.h b/linker/linker_phdr.h
index 6b917b4..50f2117 100644
--- a/linker/linker_phdr.h
+++ b/linker/linker_phdr.h
@@ -84,17 +84,20 @@
 size_t phdr_table_get_load_size(const ElfW(Phdr)* phdr_table, size_t phdr_count,
                                 ElfW(Addr)* min_vaddr = nullptr, ElfW(Addr)* max_vaddr = nullptr);
 
-int phdr_table_protect_segments(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias);
+int phdr_table_protect_segments(const ElfW(Phdr)* phdr_table,
+                                size_t phdr_count, ElfW(Addr) load_bias);
 
-int phdr_table_unprotect_segments(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias);
+int phdr_table_unprotect_segments(const ElfW(Phdr)* phdr_table, size_t phdr_count,
+                                  ElfW(Addr) load_bias);
 
-int phdr_table_protect_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias);
+int phdr_table_protect_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count,
+                                 ElfW(Addr) load_bias);
 
-int phdr_table_serialize_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias,
-                                   int fd);
+int phdr_table_serialize_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count,
+                                   ElfW(Addr) load_bias, int fd);
 
-int phdr_table_map_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias,
-                             int fd);
+int phdr_table_map_gnu_relro(const ElfW(Phdr)* phdr_table, size_t phdr_count,
+                             ElfW(Addr) load_bias, int fd);
 
 #if defined(__arm__)
 int phdr_table_get_arm_exidx(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr) load_bias,
diff --git a/linker/linker_reloc_iterators.h b/linker/linker_reloc_iterators.h
index 5db31f9..f28c0e0 100644
--- a/linker/linker_reloc_iterators.h
+++ b/linker/linker_reloc_iterators.h
@@ -21,15 +21,10 @@
 
 #include <string.h>
 
-#define RELOCATION_GROUPED_BY_INFO_FLAG 1
-#define RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG 2
-#define RELOCATION_GROUPED_BY_ADDEND_FLAG 4
-#define RELOCATION_GROUP_HAS_ADDEND_FLAG 8
-
-#define RELOCATION_GROUPED_BY_INFO(flags) (((flags) & RELOCATION_GROUPED_BY_INFO_FLAG) != 0)
-#define RELOCATION_GROUPED_BY_OFFSET_DELTA(flags) (((flags) & RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG) != 0)
-#define RELOCATION_GROUPED_BY_ADDEND(flags) (((flags) & RELOCATION_GROUPED_BY_ADDEND_FLAG) != 0)
-#define RELOCATION_GROUP_HAS_ADDEND(flags) (((flags) & RELOCATION_GROUP_HAS_ADDEND_FLAG) != 0)
+const size_t RELOCATION_GROUPED_BY_INFO_FLAG = 1;
+const size_t RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG = 2;
+const size_t RELOCATION_GROUPED_BY_ADDEND_FLAG = 4;
+const size_t RELOCATION_GROUP_HAS_ADDEND_FLAG = 8;
 
 class plain_reloc_iterator {
 #if defined(USE_RELA)
@@ -89,18 +84,19 @@
       }
     }
 
-    if (RELOCATION_GROUPED_BY_OFFSET_DELTA(group_flags_)) {
+    if (is_relocation_grouped_by_offset_delta()) {
       reloc_.r_offset += group_r_offset_delta_;
     } else {
       reloc_.r_offset += decoder_.pop_front();
     }
 
-    if (!RELOCATION_GROUPED_BY_INFO(group_flags_)) {
+    if (!is_relocation_grouped_by_info()) {
       reloc_.r_info = decoder_.pop_front();
     }
 
 #if defined(USE_RELA)
-    if (RELOCATION_GROUP_HAS_ADDEND(group_flags_) && !RELOCATION_GROUPED_BY_ADDEND(group_flags_)) {
+    if (is_relocation_group_has_addend() &&
+        !is_relocation_grouped_by_addend()) {
       reloc_.r_addend += decoder_.pop_front();
     }
 #endif
@@ -115,22 +111,23 @@
     group_size_ = decoder_.pop_front();
     group_flags_ = decoder_.pop_front();
 
-    if (RELOCATION_GROUPED_BY_OFFSET_DELTA(group_flags_)) {
+    if (is_relocation_grouped_by_offset_delta()) {
       group_r_offset_delta_ = decoder_.pop_front();
     }
 
-    if (RELOCATION_GROUPED_BY_INFO(group_flags_)) {
+    if (is_relocation_grouped_by_info()) {
       reloc_.r_info = decoder_.pop_front();
     }
 
-    if (RELOCATION_GROUP_HAS_ADDEND(group_flags_) && RELOCATION_GROUPED_BY_ADDEND(group_flags_)) {
+    if (is_relocation_group_has_addend() &&
+        is_relocation_grouped_by_addend()) {
 #if !defined(USE_RELA)
       // This platform does not support rela, and yet we have it encoded in android_rel section.
       DL_ERR("unexpected r_addend in android.rel section");
       return false;
 #else
       reloc_.r_addend += decoder_.pop_front();
-    } else if (!RELOCATION_GROUP_HAS_ADDEND(group_flags_)) {
+    } else if (!is_relocation_group_has_addend()) {
       reloc_.r_addend = 0;
 #endif
     }
@@ -139,6 +136,22 @@
     return true;
   }
 
+  bool is_relocation_grouped_by_info() {
+    return (group_flags_ & RELOCATION_GROUPED_BY_INFO_FLAG) != 0;
+  }
+
+  bool is_relocation_grouped_by_offset_delta() {
+    return (group_flags_ & RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG) != 0;
+  }
+
+  bool is_relocation_grouped_by_addend() {
+    return (group_flags_ & RELOCATION_GROUPED_BY_ADDEND_FLAG) != 0;
+  }
+
+  bool is_relocation_group_has_addend() {
+    return (group_flags_ & RELOCATION_GROUP_HAS_ADDEND_FLAG) != 0;
+  }
+
   decoder_t decoder_;
   size_t relocation_count_;
   size_t group_size_;
diff --git a/linker/tests/Android.mk b/linker/tests/Android.mk
index 9a08bec..35992c5 100644
--- a/linker/tests/Android.mk
+++ b/linker/tests/Android.mk
@@ -29,6 +29,11 @@
 LOCAL_SRC_FILES := \
   linked_list_test.cpp \
   linker_block_allocator_test.cpp \
-  ../linker_block_allocator.cpp
+  ../linker_block_allocator.cpp \
+  linker_memory_allocator_test.cpp \
+  ../linker_allocator.cpp
+
+# for __libc_fatal
+LOCAL_SRC_FILES += ../../libc/bionic/libc_logging.cpp
 
 include $(BUILD_NATIVE_TEST)
diff --git a/linker/tests/linker_memory_allocator_test.cpp b/linker/tests/linker_memory_allocator_test.cpp
new file mode 100644
index 0000000..f002a0d
--- /dev/null
+++ b/linker/tests/linker_memory_allocator_test.cpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <gtest/gtest.h>
+
+#include "../linker_allocator.h"
+
+#include <unistd.h>
+
+namespace {
+
+/*
+ * this one has size below allocator cap which is 2*sizeof(void*)
+ */
+struct test_struct_small {
+  char dummy_str[5];
+};
+
+struct test_struct_large {
+  char dummy_str[1009];
+};
+
+struct test_struct_huge {
+  char dummy_str[73939];
+};
+
+struct test_struct_512 {
+  char dummy_str[503];
+};
+
+};
+
+static size_t kPageSize = sysconf(_SC_PAGE_SIZE);
+
+TEST(linker_memory, test_alloc_0) {
+  LinkerMemoryAllocator allocator;
+  void* ptr = allocator.alloc(0);
+  ASSERT_TRUE(ptr != nullptr);
+  free(ptr);
+}
+
+TEST(linker_memory, test_free_nullptr) {
+  LinkerMemoryAllocator allocator;
+  allocator.free(nullptr);
+}
+
+TEST(linker_memory, test_realloc) {
+  LinkerMemoryAllocator allocator;
+  uint32_t* array = reinterpret_cast<uint32_t*>(allocator.alloc(512));
+  const size_t array_size = 512 / sizeof(uint32_t);
+
+  uint32_t model[1000];
+
+  model[0] = 1;
+  model[1] = 1;
+
+  for (size_t i = 2; i < 1000; ++i) {
+    model[i] = model[i - 1] + model[i - 2];
+  }
+
+  memcpy(array, model, array_size);
+
+  uint32_t* reallocated_ptr = reinterpret_cast<uint32_t*>(allocator.realloc(array, 1024));
+
+  ASSERT_TRUE(reallocated_ptr != nullptr);
+  ASSERT_TRUE(reallocated_ptr != array);
+
+  ASSERT_TRUE(memcmp(reallocated_ptr, model, array_size) == 0);
+
+  array = reallocated_ptr;
+
+  memcpy(array, model, 2*array_size);
+
+  reallocated_ptr = reinterpret_cast<uint32_t*>(allocator.realloc(array, 62));
+
+  ASSERT_TRUE(reallocated_ptr == array);
+
+  reallocated_ptr = reinterpret_cast<uint32_t*>(allocator.realloc(array, 4000));
+
+  ASSERT_TRUE(reallocated_ptr != nullptr);
+  ASSERT_TRUE(reallocated_ptr != array);
+
+  ASSERT_TRUE(memcmp(reallocated_ptr, model, array_size * 2) == 0);
+
+  array = reallocated_ptr;
+
+  memcpy(array, model, 4000);
+
+  reallocated_ptr = reinterpret_cast<uint32_t*>(allocator.realloc(array, 64000));
+
+  ASSERT_TRUE(reallocated_ptr != nullptr);
+  ASSERT_TRUE(reallocated_ptr != array);
+
+  ASSERT_TRUE(memcmp(reallocated_ptr, model, 4000) == 0);
+
+  ASSERT_EQ(nullptr, realloc(reallocated_ptr, 0));
+}
+
+TEST(linker_memory, test_small_smoke) {
+  LinkerMemoryAllocator allocator;
+
+  uint8_t zeros[16];
+  memset(zeros, 0, sizeof(zeros));
+
+  test_struct_small* ptr1 =
+      reinterpret_cast<test_struct_small*>(allocator.alloc(sizeof(test_struct_small)));
+  test_struct_small* ptr2 =
+      reinterpret_cast<test_struct_small*>(allocator.alloc(sizeof(test_struct_small)));
+
+  ASSERT_TRUE(ptr1 != nullptr);
+  ASSERT_TRUE(ptr2 != nullptr);
+  ASSERT_EQ(reinterpret_cast<uintptr_t>(ptr1)+16, reinterpret_cast<uintptr_t>(ptr2));
+  ASSERT_TRUE(memcmp(ptr1, zeros, 16) == 0);
+
+  allocator.free(ptr1);
+  allocator.free(ptr2);
+}
+
+TEST(linker_memory, test_huge_smoke) {
+  LinkerMemoryAllocator allocator;
+
+  // this should trigger proxy-to-mmap
+  test_struct_huge* ptr1 =
+      reinterpret_cast<test_struct_huge*>(allocator.alloc(sizeof(test_struct_huge)));
+  test_struct_huge* ptr2 =
+      reinterpret_cast<test_struct_huge*>(allocator.alloc(sizeof(test_struct_huge)));
+
+  ASSERT_TRUE(ptr1 != nullptr);
+  ASSERT_TRUE(ptr2 != nullptr);
+
+  ASSERT_TRUE(
+      reinterpret_cast<uintptr_t>(ptr1)/kPageSize != reinterpret_cast<uintptr_t>(ptr2)/kPageSize);
+  allocator.free(ptr2);
+  allocator.free(ptr1);
+}
+
+TEST(linker_memory, test_large) {
+  LinkerMemoryAllocator allocator;
+
+  test_struct_large* ptr1 =
+      reinterpret_cast<test_struct_large*>(allocator.alloc(sizeof(test_struct_large)));
+  test_struct_large* ptr2 =
+      reinterpret_cast<test_struct_large*>(allocator.alloc(1024));
+
+  ASSERT_TRUE(ptr1 != nullptr);
+  ASSERT_TRUE(ptr2 != nullptr);
+
+  ASSERT_EQ(reinterpret_cast<uintptr_t>(ptr1) + 1024, reinterpret_cast<uintptr_t>(ptr2));
+
+  // let's allocate until we reach the next page.
+  size_t n = kPageSize / sizeof(test_struct_large) + 1 - 2;
+  test_struct_large* objects[n];
+
+  for (size_t i = 0; i < n; ++i) {
+    test_struct_large* obj_ptr =
+        reinterpret_cast<test_struct_large*>(allocator.alloc(sizeof(test_struct_large)));
+    ASSERT_TRUE(obj_ptr != nullptr);
+    objects[i] = obj_ptr;
+  }
+
+  test_struct_large* ptr_to_free =
+      reinterpret_cast<test_struct_large*>(allocator.alloc(sizeof(test_struct_large)));
+
+  ASSERT_TRUE(ptr_to_free != nullptr);
+
+  allocator.free(ptr1);
+
+  for (size_t i=0; i<n; ++i) {
+    allocator.free(objects[i]);
+  }
+
+  allocator.free(ptr2);
+  allocator.free(ptr_to_free);
+}
+
+
diff --git a/tests/Android.mk b/tests/Android.mk
index 0a83e84..c942375 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -128,6 +128,9 @@
     bionic/libc \
     external/tinyxml2 \
 
+libBionicStandardTests_static_libraries := \
+    libbase \
+
 libBionicStandardTests_ldlibs_host := \
     -lrt \
 
@@ -257,11 +260,14 @@
 bionic-unit-tests_static_libraries := \
     libtinyxml2 \
     liblog \
+    libbase \
 
+# TODO: Include __cxa_thread_atexit_test.cpp to glibc tests once it is upgraded (glibc 2.18+)
 bionic-unit-tests_src_files := \
     atexit_test.cpp \
     dl_test.cpp \
     dlext_test.cpp \
+    __cxa_thread_atexit_test.cpp \
     dlfcn_test.cpp \
 
 bionic-unit-tests_cflags := $(test_cflags)
@@ -285,6 +291,10 @@
     libdl_preempt_test_1 \
     libdl_preempt_test_2
 
+# TODO: clang support for thread_local on arm is done via __aeabi_read_tp()
+# which bionic does not support. Reenable this once this question is resolved.
+bionic-unit-tests_clang_target := false
+
 ifneq ($(filter $(TARGET_ARCH),arm arm64),$(TARGET_ARCH))
 bionic-unit-tests_shared_libraries_target += libdl_test_df_1_global
 endif
@@ -311,6 +321,7 @@
     libdl \
     libtinyxml2 \
     liblog \
+    libbase \
 
 bionic-unit-tests-static_force_static_executable := true
 
@@ -349,6 +360,11 @@
     libBionicGtestMain \
     $(fortify_libs) \
 
+bionic-unit-tests-glibc_static_libraries := \
+    libbase \
+    liblog \
+    libcutils \
+
 bionic-unit-tests-glibc_ldlibs := \
     -lrt -ldl -lutil \
 
diff --git a/tests/__cxa_thread_atexit_test.cpp b/tests/__cxa_thread_atexit_test.cpp
new file mode 100644
index 0000000..e388f3b
--- /dev/null
+++ b/tests/__cxa_thread_atexit_test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <stdint.h>
+
+#include <string>
+
+static std::string class_with_dtor_output;
+
+class ClassWithDtor {
+ public:
+  void set_message(const std::string& msg) {
+    message = msg;
+  }
+
+  ~ClassWithDtor() {
+    class_with_dtor_output += message;
+  }
+ private:
+  std::string message;
+};
+
+static thread_local ClassWithDtor class_with_dtor;
+
+static void* thread_nop(void* arg) {
+  class_with_dtor.set_message(*static_cast<std::string*>(arg));
+  return nullptr;
+}
+
+TEST(thread_local, smoke) {
+  std::string msg("dtor called.");
+  pthread_t t;
+  ASSERT_EQ(0, pthread_create(&t, nullptr, thread_nop, &msg));
+  ASSERT_EQ(0, pthread_join(t, nullptr));
+  ASSERT_EQ("dtor called.", class_with_dtor_output);
+}
+
+class ClassWithDtorForMainThread {
+ public:
+  void set_message(const std::string& msg) {
+    message = msg;
+  }
+
+  ~ClassWithDtorForMainThread() {
+    fprintf(stderr, "%s", message.c_str());
+  }
+ private:
+  std::string message;
+};
+
+static void thread_atexit_main() {
+  static thread_local ClassWithDtorForMainThread class_with_dtor_for_main_thread;
+  class_with_dtor_for_main_thread.set_message("d-tor for main thread called.");
+  exit(0);
+}
+
+TEST(thread_local, dtor_for_main_thread) {
+  ASSERT_EXIT(thread_atexit_main(), testing::ExitedWithCode(0), "d-tor for main thread called.");
+}
+
+extern "C" int __cxa_thread_atexit_impl(void (*fn)(void*), void* arg, void* dso_handle);
+
+static void thread_atexit_fn1(void* arg) {
+  std::string* call_sequence = static_cast<std::string*>(arg);
+  *call_sequence += "one, ";
+}
+
+static void thread_atexit_fn2(void* arg) {
+  std::string* call_sequence = static_cast<std::string*>(arg);
+  *call_sequence += "two, ";
+}
+
+static void thread_atexit_from_atexit(void* arg) {
+  std::string* call_sequence = static_cast<std::string*>(arg);
+  *call_sequence += "oops, ";
+}
+
+static void thread_atexit_fn3(void* arg) {
+  __cxa_thread_atexit_impl(thread_atexit_from_atexit, arg, nullptr);
+  std::string* call_sequence = static_cast<std::string*>(arg);
+  *call_sequence += "three, ";
+}
+
+static void thread_atexit_fn4(void* arg) {
+  std::string* call_sequence = static_cast<std::string*>(arg);
+  *call_sequence += "four, ";
+}
+
+static void thread_atexit_fn5(void* arg) {
+  std::string* call_sequence = static_cast<std::string*>(arg);
+  *call_sequence += "five.";
+}
+
+static void* thread_main(void* arg) {
+  __cxa_thread_atexit_impl(thread_atexit_fn5, arg, nullptr);
+  __cxa_thread_atexit_impl(thread_atexit_fn4, arg, nullptr);
+  __cxa_thread_atexit_impl(thread_atexit_fn3, arg, nullptr);
+  __cxa_thread_atexit_impl(thread_atexit_fn2, arg, nullptr);
+  __cxa_thread_atexit_impl(thread_atexit_fn1, arg, nullptr);
+  return nullptr;
+}
+
+TEST(__cxa_thread_atexit_impl, smoke) {
+  std::string atexit_call_sequence;
+
+  pthread_t t;
+  ASSERT_EQ(0, pthread_create(&t, nullptr, thread_main, &atexit_call_sequence));
+  ASSERT_EQ(0, pthread_join(t, nullptr));
+  ASSERT_EQ("one, two, three, oops, four, five.", atexit_call_sequence);
+}
+
+
diff --git a/tests/dlext_test.cpp b/tests/dlext_test.cpp
index d832653..56a8f6f 100644
--- a/tests/dlext_test.cpp
+++ b/tests/dlext_test.cpp
@@ -52,9 +52,9 @@
 #define LIBSIZE 1024*1024 // how much address space to reserve for it
 
 #if defined(__LP64__)
-#define LIBPATH_PREFIX "%s/nativetest64/libdlext_test_fd/"
+#define LIBPATH_PREFIX "/nativetest64/libdlext_test_fd/"
 #else
-#define LIBPATH_PREFIX "%s/nativetest/libdlext_test_fd/"
+#define LIBPATH_PREFIX "/nativetest/libdlext_test_fd/"
 #endif
 
 #define LIBPATH LIBPATH_PREFIX "libdlext_test_fd.so"
@@ -103,16 +103,13 @@
 }
 
 TEST_F(DlExtTest, ExtInfoUseFd) {
-  const char* android_data = getenv("ANDROID_DATA");
-  ASSERT_TRUE(android_data != nullptr);
-  char lib_path[PATH_MAX];
-  snprintf(lib_path, sizeof(lib_path), LIBPATH, android_data);
+  const std::string lib_path = std::string(getenv("ANDROID_DATA")) + LIBPATH;
 
   android_dlextinfo extinfo;
   extinfo.flags = ANDROID_DLEXT_USE_LIBRARY_FD;
-  extinfo.library_fd = TEMP_FAILURE_RETRY(open(lib_path, O_RDONLY | O_CLOEXEC));
+  extinfo.library_fd = TEMP_FAILURE_RETRY(open(lib_path.c_str(), O_RDONLY | O_CLOEXEC));
   ASSERT_TRUE(extinfo.library_fd != -1);
-  handle_ = android_dlopen_ext(lib_path, RTLD_NOW, &extinfo);
+  handle_ = android_dlopen_ext(lib_path.c_str(), RTLD_NOW, &extinfo);
   ASSERT_DL_NOTNULL(handle_);
   fn f = reinterpret_cast<fn>(dlsym(handle_, "getRandomNumber"));
   ASSERT_DL_NOTNULL(f);
@@ -120,18 +117,14 @@
 }
 
 TEST_F(DlExtTest, ExtInfoUseFdWithOffset) {
-  const char* android_data = getenv("ANDROID_DATA");
-  ASSERT_TRUE(android_data != nullptr);
-
-  char lib_path[PATH_MAX];
-  snprintf(lib_path, sizeof(lib_path), LIBZIPPATH, android_data);
+  const std::string lib_path = std::string(getenv("ANDROID_DATA")) + LIBZIPPATH;
 
   android_dlextinfo extinfo;
   extinfo.flags = ANDROID_DLEXT_USE_LIBRARY_FD | ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET;
-  extinfo.library_fd = TEMP_FAILURE_RETRY(open(lib_path, O_RDONLY | O_CLOEXEC));
+  extinfo.library_fd = TEMP_FAILURE_RETRY(open(lib_path.c_str(), O_RDONLY | O_CLOEXEC));
   extinfo.library_fd_offset = LIBZIP_OFFSET;
 
-  handle_ = android_dlopen_ext(lib_path, RTLD_NOW, &extinfo);
+  handle_ = android_dlopen_ext(lib_path.c_str(), RTLD_NOW, &extinfo);
   ASSERT_DL_NOTNULL(handle_);
 
   fn f = reinterpret_cast<fn>(dlsym(handle_, "getRandomNumber"));
@@ -140,15 +133,15 @@
 }
 
 TEST_F(DlExtTest, ExtInfoUseFdWithInvalidOffset) {
-  const char* android_data = getenv("ANDROID_DATA");
-  ASSERT_TRUE(android_data != nullptr);
-
-  char lib_path[PATH_MAX];
-  snprintf(lib_path, sizeof(lib_path), LIBZIPPATH, android_data);
+  const std::string lib_path = std::string(getenv("ANDROID_DATA")) + LIBZIPPATH;
+  // lib_path is relative when $ANDROID_DATA is relative
+  char lib_realpath_buf[PATH_MAX];
+  ASSERT_TRUE(realpath(lib_path.c_str(), lib_realpath_buf) == lib_realpath_buf);
+  const std::string lib_realpath = std::string(lib_realpath_buf);
 
   android_dlextinfo extinfo;
   extinfo.flags = ANDROID_DLEXT_USE_LIBRARY_FD | ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET;
-  extinfo.library_fd = TEMP_FAILURE_RETRY(open(lib_path, O_RDONLY | O_CLOEXEC));
+  extinfo.library_fd = TEMP_FAILURE_RETRY(open(lib_path.c_str(), O_RDONLY | O_CLOEXEC));
   extinfo.library_fd_offset = 17;
 
   handle_ = android_dlopen_ext("libname_placeholder", RTLD_NOW, &extinfo);
@@ -167,9 +160,9 @@
   ASSERT_SUBSTR("dlopen failed: file offset for the library \"libname_placeholder\" is negative", dlerror());
 
   extinfo.library_fd_offset = PAGE_SIZE;
-  handle_ = android_dlopen_ext("libname_placeholder", RTLD_NOW, &extinfo);
+  handle_ = android_dlopen_ext("libname_ignored", RTLD_NOW, &extinfo);
   ASSERT_TRUE(handle_ == nullptr);
-  ASSERT_STREQ("dlopen failed: \"libname_placeholder\" has bad ELF magic", dlerror());
+  ASSERT_EQ("dlopen failed: \"" + lib_realpath + "\" has bad ELF magic", dlerror());
 
   close(extinfo.library_fd);
 }
@@ -184,6 +177,80 @@
   ASSERT_STREQ("dlopen failed: invalid extended flag combination (ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET without ANDROID_DLEXT_USE_LIBRARY_FD): 0x20", dlerror());
 }
 
+TEST(dlext, android_dlopen_ext_force_load_smoke) {
+  // 1. Open actual file
+  void* handle = dlopen("libdlext_test.so", RTLD_NOW);
+  ASSERT_DL_NOTNULL(handle);
+  // 2. Open link with force_load flag set
+  android_dlextinfo extinfo;
+  extinfo.flags = ANDROID_DLEXT_FORCE_LOAD;
+  void* handle2 = android_dlopen_ext("libdlext_test_v2.so", RTLD_NOW, &extinfo);
+  ASSERT_DL_NOTNULL(handle2);
+  ASSERT_TRUE(handle != handle2);
+
+  dlclose(handle2);
+  dlclose(handle);
+}
+
+TEST(dlext, android_dlopen_ext_force_load_soname_exception) {
+  // Check if soname lookup still returns already loaded library
+  // when ANDROID_DLEXT_FORCE_LOAD flag is specified.
+  void* handle = dlopen("libdlext_test_v2.so", RTLD_NOW);
+  ASSERT_DL_NOTNULL(handle);
+
+  android_dlextinfo extinfo;
+  extinfo.flags = ANDROID_DLEXT_FORCE_LOAD;
+
+  // Note that 'libdlext_test.so' is dt_soname for libdlext_test_v2.so
+  void* handle2 = android_dlopen_ext("libdlext_test.so", RTLD_NOW, &extinfo);
+
+  ASSERT_DL_NOTNULL(handle2);
+  ASSERT_TRUE(handle == handle2);
+
+  dlclose(handle2);
+  dlclose(handle);
+}
+
+TEST(dlfcn, dlopen_from_zip_absolute_path) {
+  const std::string lib_path = std::string(getenv("ANDROID_DATA")) + LIBZIPPATH;
+
+  void* handle = dlopen((lib_path + "!libdir/libdlext_test_fd.so").c_str(), RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+
+  int (*fn)(void);
+  fn = reinterpret_cast<int (*)(void)>(dlsym(handle, "getRandomNumber"));
+  ASSERT_TRUE(fn != nullptr);
+  EXPECT_EQ(4, fn());
+
+  dlclose(handle);
+}
+
+TEST(dlfcn, dlopen_from_zip_ld_library_path) {
+  const std::string lib_path = std::string(getenv("ANDROID_DATA")) + LIBZIPPATH + "!libdir";
+
+  typedef void (*fn_t)(const char*);
+  fn_t android_update_LD_LIBRARY_PATH =
+      reinterpret_cast<fn_t>(dlsym(RTLD_DEFAULT, "android_update_LD_LIBRARY_PATH"));
+
+  ASSERT_TRUE(android_update_LD_LIBRARY_PATH != nullptr) << dlerror();
+
+  void* handle = dlopen("libdlext_test_fd.so", RTLD_NOW);
+  ASSERT_TRUE(handle == nullptr);
+
+  android_update_LD_LIBRARY_PATH(lib_path.c_str());
+
+  handle = dlopen("libdlext_test_fd.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+
+  int (*fn)(void);
+  fn = reinterpret_cast<int (*)(void)>(dlsym(handle, "getRandomNumber"));
+  ASSERT_TRUE(fn != nullptr);
+  EXPECT_EQ(4, fn());
+
+  dlclose(handle);
+}
+
+
 TEST_F(DlExtTest, Reserved) {
   void* start = mmap(nullptr, LIBSIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
                      -1, 0);
diff --git a/tests/dlfcn_test.cpp b/tests/dlfcn_test.cpp
index 3b1001a..1023644 100644
--- a/tests/dlfcn_test.cpp
+++ b/tests/dlfcn_test.cpp
@@ -22,14 +22,16 @@
 #include <stdio.h>
 #include <stdint.h>
 
-#include "gtest_ex.h"
 #include "private/ScopeGuard.h"
 
 #include <string>
 
+#include "utils.h"
+
 #define ASSERT_SUBSTR(needle, haystack) \
     ASSERT_PRED_FORMAT2(::testing::IsSubstring, needle, haystack)
 
+
 static bool g_called = false;
 extern "C" void DlSymTestFunction() {
   g_called = true;
@@ -47,7 +49,7 @@
   ASSERT_EQ(17, g_ctor_function_called);
 }
 
-TEST(dlfcn, dlsym_in_self) {
+TEST(dlfcn, dlsym_in_executable) {
   dlerror(); // Clear any pending errors.
   void* self = dlopen(NULL, RTLD_NOW);
   ASSERT_TRUE(self != NULL);
@@ -65,6 +67,27 @@
   ASSERT_EQ(0, dlclose(self));
 }
 
+TEST(dlfcn, dlsym_from_sofile) {
+  void* handle = dlopen("libtest_dlsym_from_this.so", RTLD_LAZY | RTLD_LOCAL);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+
+  // check that we cant find '_test_dlsym_symbol' via dlsym(RTLD_DEFAULT)
+  void* symbol = dlsym(RTLD_DEFAULT, "test_dlsym_symbol");
+  ASSERT_TRUE(symbol == nullptr);
+  ASSERT_SUBSTR("undefined symbol: test_dlsym_symbol", dlerror());
+
+  typedef int* (*fn_t)();
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle, "lookup_dlsym_symbol_using_RTLD_DEFAULT"));
+
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+
+  int* ptr = fn();
+  ASSERT_TRUE(ptr != nullptr) << dlerror();
+  ASSERT_EQ(42, *ptr);
+
+  dlclose(handle);
+}
+
 TEST(dlfcn, dlsym_with_dependencies) {
   void* handle = dlopen("libtest_with_dependency.so", RTLD_NOW);
   ASSERT_TRUE(handle != NULL);
@@ -90,6 +113,32 @@
   ASSERT_EQ(0, dlclose(handle2));
 }
 
+TEST(dlfcn, dlopen_by_soname) {
+  static const char* soname = "libdlext_test_soname.so";
+  static const char* filename = "libdlext_test_different_soname.so";
+  // 1. Make sure there is no library with soname in default search path
+  void* handle = dlopen(soname, RTLD_NOW);
+  ASSERT_TRUE(handle == nullptr);
+
+  // 2. Load a library using filename
+  handle = dlopen(filename, RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+
+  // 3. Find library by soname
+  void* handle_soname = dlopen(soname, RTLD_NOW | RTLD_NOLOAD);
+  ASSERT_TRUE(handle_soname != nullptr) << dlerror();
+  ASSERT_EQ(handle, handle_soname);
+
+  // 4. RTLD_NOLOAD should still work with filename
+  void* handle_filename = dlopen(filename, RTLD_NOW | RTLD_NOLOAD);
+  ASSERT_TRUE(handle_filename != nullptr) << dlerror();
+  ASSERT_EQ(handle, handle_filename);
+
+  dlclose(handle_filename);
+  dlclose(handle_soname);
+  dlclose(handle);
+}
+
 // ifuncs are only supported on intel and arm64 for now
 #if defined (__aarch64__) || defined(__i386__) || defined(__x86_64__)
 TEST(dlfcn, ifunc) {
@@ -376,33 +425,31 @@
   // Test dlopens parent1 which loads and relocates libtest_two_parents_child.so
   // as a second step it dlopens parent2 and dlcloses parent1...
 
-  test_isolated([] {
-    void* handle = dlopen("libtest_two_parents_parent1.so", RTLD_NOW | RTLD_LOCAL);
-    ASSERT_TRUE(handle != nullptr) << dlerror();
+  void* handle = dlopen("libtest_two_parents_parent1.so", RTLD_NOW | RTLD_LOCAL);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
 
-    void* handle2 = dlopen("libtest_two_parents_parent2.so", RTLD_NOW | RTLD_LOCAL);
-    ASSERT_TRUE(handle2 != nullptr) << dlerror();
+  void* handle2 = dlopen("libtest_two_parents_parent2.so", RTLD_NOW | RTLD_LOCAL);
+  ASSERT_TRUE(handle2 != nullptr) << dlerror();
 
-    typedef int (*fn_t) (void);
-    fn_t fn = reinterpret_cast<fn_t>(dlsym(handle2, "check_order_reloc_get_answer"));
-    ASSERT_TRUE(fn != nullptr) << dlerror();
-    ASSERT_EQ(42, fn());
+  typedef int (*fn_t) (void);
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle2, "check_order_reloc_get_answer"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(42, fn());
 
-    ASSERT_EQ(0, dlclose(handle));
+  ASSERT_EQ(0, dlclose(handle));
 
-    handle = dlopen("libtest_two_parents_parent1.so", RTLD_NOW | RTLD_LOCAL | RTLD_NOLOAD);
-    ASSERT_TRUE(handle != nullptr);
-    ASSERT_EQ(0, dlclose(handle));
+  handle = dlopen("libtest_two_parents_parent1.so", RTLD_NOW | RTLD_LOCAL | RTLD_NOLOAD);
+  ASSERT_TRUE(handle != nullptr);
+  ASSERT_EQ(0, dlclose(handle));
 
-    fn = reinterpret_cast<fn_t>(dlsym(handle2, "check_order_reloc_get_answer"));
-    ASSERT_TRUE(fn != nullptr) << dlerror();
-    ASSERT_EQ(42, fn());
+  fn = reinterpret_cast<fn_t>(dlsym(handle2, "check_order_reloc_get_answer"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(42, fn());
 
-    ASSERT_EQ(0, dlclose(handle2));
+  ASSERT_EQ(0, dlclose(handle2));
 
-    handle = dlopen("libtest_two_parents_parent1.so", RTLD_NOW | RTLD_LOCAL | RTLD_NOLOAD);
-    ASSERT_TRUE(handle == nullptr);
-  });
+  handle = dlopen("libtest_two_parents_parent1.so", RTLD_NOW | RTLD_LOCAL | RTLD_NOLOAD);
+  ASSERT_TRUE(handle == nullptr);
 }
 
 extern "C" int check_order_reloc_root_get_answer_impl() {
@@ -485,25 +532,23 @@
 // libtest_with_dependency_loop_b.so -> libtest_with_dependency_loop_c.so ->
 // libtest_with_dependency_loop_a.so
 TEST(dlfcn, dlopen_check_loop) {
-  test_isolated([] {
-    void* handle = dlopen("libtest_with_dependency_loop.so", RTLD_NOW);
-    ASSERT_TRUE(handle != nullptr) << dlerror();
-    void* f = dlsym(handle, "dlopen_test_loopy_function");
-    ASSERT_TRUE(f != nullptr) << dlerror();
-    EXPECT_TRUE(reinterpret_cast<bool (*)(void)>(f)());
-    ASSERT_EQ(0, dlclose(handle));
+  void* handle = dlopen("libtest_with_dependency_loop.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  void* f = dlsym(handle, "dlopen_test_loopy_function");
+  ASSERT_TRUE(f != nullptr) << dlerror();
+  EXPECT_TRUE(reinterpret_cast<bool (*)(void)>(f)());
+  ASSERT_EQ(0, dlclose(handle));
 
-    // dlopen second time to make sure that the library was unloaded correctly
-    handle = dlopen("libtest_with_dependency_loop.so", RTLD_NOW | RTLD_NOLOAD);
-    ASSERT_TRUE(handle == nullptr);
+  // dlopen second time to make sure that the library was unloaded correctly
+  handle = dlopen("libtest_with_dependency_loop.so", RTLD_NOW | RTLD_NOLOAD);
+  ASSERT_TRUE(handle == nullptr);
 #ifdef __BIONIC__
-    // TODO: glibc returns nullptr on dlerror() here. Is it bug?
-    ASSERT_STREQ("dlopen failed: library \"libtest_with_dependency_loop.so\" wasn't loaded and RTLD_NOLOAD prevented it", dlerror());
+  // TODO: glibc returns nullptr on dlerror() here. Is it bug?
+  ASSERT_STREQ("dlopen failed: library \"libtest_with_dependency_loop.so\" wasn't loaded and RTLD_NOLOAD prevented it", dlerror());
 #endif
 
-    handle = dlopen("libtest_with_dependency_a.so", RTLD_NOW | RTLD_NOLOAD);
-    ASSERT_TRUE(handle == nullptr);
-  });
+  handle = dlopen("libtest_with_dependency_a.so", RTLD_NOW | RTLD_NOLOAD);
+  ASSERT_TRUE(handle == nullptr);
 }
 
 TEST(dlfcn, dlopen_nodelete) {
@@ -657,7 +702,7 @@
   ASSERT_EQ(0, dlclose(self));
 }
 
-TEST(dlfcn, dladdr) {
+TEST(dlfcn, dladdr_executable) {
   dlerror(); // Clear any pending errors.
   void* self = dlopen(NULL, RTLD_NOW);
   ASSERT_TRUE(self != NULL);
@@ -678,13 +723,11 @@
   rc = readlink("/proc/self/exe", executable_path, sizeof(executable_path));
   ASSERT_NE(rc, -1);
   executable_path[rc] = '\0';
-  std::string executable_name(basename(executable_path));
 
   // The filename should be that of this executable.
-  // Note that we don't know whether or not we have the full path, so we want an "ends_with" test.
-  std::string dli_fname(info.dli_fname);
-  dli_fname = basename(&dli_fname[0]);
-  ASSERT_EQ(dli_fname, executable_name);
+  char dli_realpath[PATH_MAX];
+  ASSERT_TRUE(realpath(info.dli_fname, dli_realpath) != nullptr);
+  ASSERT_STREQ(executable_path, dli_realpath);
 
   // The symbol name should be the symbol we looked up.
   ASSERT_STREQ(info.dli_sname, "DlSymTestFunction");
@@ -692,22 +735,16 @@
   // The address should be the exact address of the symbol.
   ASSERT_EQ(info.dli_saddr, sym);
 
-  // Look in /proc/pid/maps to find out what address we were loaded at.
-  // TODO: factor /proc/pid/maps parsing out into a class and reuse all over bionic.
-  void* base_address = NULL;
-  char line[BUFSIZ];
-  FILE* fp = fopen("/proc/self/maps", "r");
-  ASSERT_TRUE(fp != NULL);
-  while (fgets(line, sizeof(line), fp) != NULL) {
-    uintptr_t start = strtoul(line, 0, 16);
-    line[strlen(line) - 1] = '\0'; // Chomp the '\n'.
-    char* path = strchr(line, '/');
-    if (path != NULL && strcmp(executable_path, path) == 0) {
-      base_address = reinterpret_cast<void*>(start);
+  std::vector<map_record> maps;
+  ASSERT_TRUE(Maps::parse_maps(&maps));
+
+  void* base_address = nullptr;
+  for (const map_record& rec : maps) {
+    if (executable_path == rec.pathname) {
+      base_address = reinterpret_cast<void*>(rec.addr_start);
       break;
     }
   }
-  fclose(fp);
 
   // The base address should be the address we were loaded at.
   ASSERT_EQ(info.dli_fbase, base_address);
@@ -715,6 +752,32 @@
   ASSERT_EQ(0, dlclose(self));
 }
 
+#if defined(__LP64__)
+#define BIONIC_PATH_TO_LIBC "/system/lib64/libc.so"
+#else
+#define BIONIC_PATH_TO_LIBC "/system/lib/libc.so"
+#endif
+
+TEST(dlfcn, dladdr_libc) {
+#if defined(__BIONIC__)
+  Dl_info info;
+  void* addr = reinterpret_cast<void*>(puts); // well-known libc function
+  ASSERT_TRUE(dladdr(addr, &info) != 0);
+
+  // /system/lib is symlink when this test is executed on host.
+  char libc_realpath[PATH_MAX];
+  ASSERT_TRUE(realpath(BIONIC_PATH_TO_LIBC, libc_realpath) == libc_realpath);
+
+  ASSERT_STREQ(libc_realpath, info.dli_fname);
+  // TODO: add check for dfi_fbase
+  ASSERT_STREQ("puts", info.dli_sname);
+  ASSERT_EQ(addr, info.dli_saddr);
+#else
+  GTEST_LOG_(INFO) << "This test does nothing for glibc. Glibc returns path from ldconfig "
+      "for libc.so, which is symlink itself (not a realpath).\n";
+#endif
+}
+
 TEST(dlfcn, dladdr_invalid) {
   Dl_info info;
 
@@ -830,15 +893,13 @@
 }
 
 TEST(dlfcn, dlopen_undefined_weak_func) {
-  test_isolated([] {
-    void* handle = dlopen("libtest_dlopen_weak_undefined_func.so", RTLD_NOW);
-    ASSERT_TRUE(handle != nullptr) << dlerror();
-    int (*weak_func)();
-    weak_func = reinterpret_cast<int (*)()>(dlsym(handle, "use_weak_undefined_func"));
-    ASSERT_TRUE(weak_func != nullptr) << dlerror();
-    EXPECT_EQ(6551, weak_func());
-    dlclose(handle);
-  });
+  void* handle = dlopen("libtest_dlopen_weak_undefined_func.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  int (*weak_func)();
+  weak_func = reinterpret_cast<int (*)()>(dlsym(handle, "use_weak_undefined_func"));
+  ASSERT_TRUE(weak_func != nullptr) << dlerror();
+  EXPECT_EQ(6551, weak_func());
+  dlclose(handle);
 }
 
 TEST(dlfcn, dlopen_symlink) {
@@ -864,3 +925,63 @@
   GTEST_LOG_(INFO) << "This test is disabled for glibc (glibc segfaults if you try to call dlopen from a constructor).\n";
 #endif
 }
+
+TEST(dlfcn, symbol_versioning_use_v1) {
+  void* handle = dlopen("libtest_versioned_uselibv1.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  typedef int (*fn_t)();
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle, "get_function_version"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(1, fn());
+  dlclose(handle);
+}
+
+TEST(dlfcn, symbol_versioning_use_v2) {
+  void* handle = dlopen("libtest_versioned_uselibv2.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  typedef int (*fn_t)();
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle, "get_function_version"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(2, fn());
+  dlclose(handle);
+}
+
+TEST(dlfcn, symbol_versioning_use_other_v2) {
+  void* handle = dlopen("libtest_versioned_uselibv2_other.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  typedef int (*fn_t)();
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle, "get_function_version"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(20, fn());
+  dlclose(handle);
+}
+
+TEST(dlfcn, symbol_versioning_use_other_v3) {
+  void* handle = dlopen("libtest_versioned_uselibv3_other.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  typedef int (*fn_t)();
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle, "get_function_version"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(3, fn());
+  dlclose(handle);
+}
+
+TEST(dlfcn, symbol_versioning_default_via_dlsym) {
+  void* handle = dlopen("libtest_versioned_lib.so", RTLD_NOW);
+  ASSERT_TRUE(handle != nullptr) << dlerror();
+  typedef int (*fn_t)();
+  fn_t fn = reinterpret_cast<fn_t>(dlsym(handle, "versioned_function"));
+  ASSERT_TRUE(fn != nullptr) << dlerror();
+  ASSERT_EQ(3, fn()); // the default version is 3
+  dlclose(handle);
+}
+
+// This preempts the implementation from libtest_versioned_lib.so
+extern "C" int version_zero_function() {
+  return 0;
+}
+
+// This preempts the implementation from libtest_versioned_uselibv*.so
+extern "C" int version_zero_function2() {
+  return 0;
+}
diff --git a/tests/fortify_test.cpp b/tests/fortify_test.cpp
index 5cc728f..4faccb4 100644
--- a/tests/fortify_test.cpp
+++ b/tests/fortify_test.cpp
@@ -623,6 +623,22 @@
   ASSERT_FORTIFY(FD_ISSET(0, set));
 }
 
+TEST_F(DEATHTEST, pread_fortified) {
+  char buf[1];
+  size_t ct = atoi("2"); // prevent optimizations
+  int fd = open("/dev/null", O_RDONLY);
+  ASSERT_FORTIFY(pread(fd, buf, ct, 0));
+  close(fd);
+}
+
+TEST_F(DEATHTEST, pread64_fortified) {
+  char buf[1];
+  size_t ct = atoi("2"); // prevent optimizations
+  int fd = open("/dev/null", O_RDONLY);
+  ASSERT_FORTIFY(pread64(fd, buf, ct, 0));
+  close(fd);
+}
+
 TEST_F(DEATHTEST, read_fortified) {
   char buf[1];
   size_t ct = atoi("2"); // prevent optimizations
@@ -631,6 +647,18 @@
   close(fd);
 }
 
+TEST_F(DEATHTEST, readlink_fortified) {
+  char buf[1];
+  size_t ct = atoi("2"); // prevent optimizations
+  ASSERT_FORTIFY(readlink("/dev/null", buf, ct));
+}
+
+TEST_F(DEATHTEST, readlinkat_fortified) {
+  char buf[1];
+  size_t ct = atoi("2"); // prevent optimizations
+  ASSERT_FORTIFY(readlinkat(AT_FDCWD, "/dev/null", buf, ct));
+}
+
 extern "C" char* __strncat_chk(char*, const char*, size_t, size_t);
 extern "C" char* __strcat_chk(char*, const char*, size_t);
 
diff --git a/tests/ftw_test.cpp b/tests/ftw_test.cpp
index 6741d00..b7e5bd5 100644
--- a/tests/ftw_test.cpp
+++ b/tests/ftw_test.cpp
@@ -30,11 +30,11 @@
   char path[PATH_MAX];
 
   snprintf(path, sizeof(path), "%s/dir", root);
-  ASSERT_EQ(0, mkdir(path, 0555));
+  ASSERT_EQ(0, mkdir(path, 0755)) << path;
   snprintf(path, sizeof(path), "%s/dir/sub", root);
-  ASSERT_EQ(0, mkdir(path, 0555));
+  ASSERT_EQ(0, mkdir(path, 0555)) << path;
   snprintf(path, sizeof(path), "%s/unreadable-dir", root);
-  ASSERT_EQ(0, mkdir(path, 0000));
+  ASSERT_EQ(0, mkdir(path, 0000)) << path;
 
   snprintf(path, sizeof(path), "%s/dangler", root);
   ASSERT_EQ(0, symlink("/does-not-exist", path));
diff --git a/tests/getauxval_test.cpp b/tests/getauxval_test.cpp
index b331150..6ce00f1 100644
--- a/tests/getauxval_test.cpp
+++ b/tests/getauxval_test.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <errno.h>
 #include <sys/cdefs.h>
 #include <gtest/gtest.h>
 
@@ -53,7 +54,9 @@
 
 TEST(getauxval, unexpected_values) {
 #if defined(GETAUXVAL_CAN_COMPILE)
+  errno = 0;
   ASSERT_EQ((unsigned long int) 0, getauxval(0xdeadbeef));
+  ASSERT_EQ(ENOENT, errno);
 #else
   GTEST_LOG_(INFO) << "This test does nothing.\n";
 #endif
diff --git a/tests/gtest_ex.h b/tests/gtest_ex.h
deleted file mode 100644
index fe1d894..0000000
--- a/tests/gtest_ex.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-
-template<typename F>
-void test_isolated(F test) {
-  int pid = fork();
-  ASSERT_NE(-1, pid) << strerror(errno);
-
-  if (pid == 0) {
-    test();
-    _exit(testing::Test::HasFailure() ? 1 : 0);
-  }
-
-  int status;
-  ASSERT_EQ(pid, waitpid(pid, &status, 0));
-  ASSERT_TRUE(WIFEXITED(status));
-  ASSERT_EQ(0, WEXITSTATUS(status)) << "Forked test has failed, see above..";
-}
diff --git a/tests/gtest_main.cpp b/tests/gtest_main.cpp
index bf2b695..692b7e8 100644
--- a/tests/gtest_main.cpp
+++ b/tests/gtest_main.cpp
@@ -277,8 +277,8 @@
 // PrettyUnitTestResultPrinter. The reason for copy is that PrettyUnitTestResultPrinter
 // is defined and used in gtest.cc, which is hard to reuse.
 static void OnTestIterationStartPrint(const std::vector<TestCase>& testcase_list, size_t iteration,
-                                      size_t iteration_count) {
-  if (iteration_count > 1) {
+                                      int iteration_count) {
+  if (iteration_count != 1) {
     printf("\nRepeating all tests (iteration %zu) . . .\n\n", iteration);
   }
   ColoredPrintf(COLOR_GREEN,  "[==========] ");
@@ -743,7 +743,7 @@
 // makes deadlock to use fork in multi-thread.
 // Returns true if all tests run successfully, otherwise return false.
 static bool RunTestInSeparateProc(int argc, char** argv, std::vector<TestCase>& testcase_list,
-                                  size_t iteration_count, size_t job_count,
+                                  int iteration_count, size_t job_count,
                                   const std::string& xml_output_filename) {
   // Stop default result printer to avoid environment setup/teardown information for each test.
   testing::UnitTest::GetInstance()->listeners().Release(
@@ -762,7 +762,9 @@
 
   bool all_tests_passed = true;
 
-  for (size_t iteration = 1; iteration <= iteration_count; ++iteration) {
+  for (size_t iteration = 1;
+       iteration_count < 0 || iteration <= static_cast<size_t>(iteration_count);
+       ++iteration) {
     OnTestIterationStartPrint(testcase_list, iteration, iteration_count);
     int64_t iteration_start_time_ns = NanoTime();
     time_t epoch_iteration_start_time = time(NULL);
@@ -875,7 +877,7 @@
   int test_warnline_ms;
   std::string gtest_color;
   bool gtest_print_time;
-  size_t gtest_repeat;
+  int gtest_repeat;
   std::string gtest_output;
 };
 
@@ -993,12 +995,9 @@
     } else if (strcmp(args[i], "--gtest_print_time=0") == 0) {
       options.gtest_print_time = false;
     } else if (strncmp(args[i], "--gtest_repeat=", strlen("--gtest_repeat=")) == 0) {
-      int repeat = atoi(args[i] + strlen("--gtest_repeat="));
-      if (repeat < 0) {
-        fprintf(stderr, "invalid gtest_repeat count: %d\n", repeat);
-        return false;
-      }
-      options.gtest_repeat = repeat;
+      // If the value of gtest_repeat is < 0, then it indicates the tests
+      // should be repeated forever.
+      options.gtest_repeat = atoi(args[i] + strlen("--gtest_repeat="));
       // Remove --gtest_repeat=xx from arguments, so child process only run one iteration for a single test.
       args.erase(args.begin() + i);
       --i;
diff --git a/tests/libs/Android.build.dlext_testzip.mk b/tests/libs/Android.build.dlext_testzip.mk
index d05927e..7cc0dae 100644
--- a/tests/libs/Android.build.dlext_testzip.mk
+++ b/tests/libs/Android.build.dlext_testzip.mk
@@ -35,7 +35,7 @@
 $(LOCAL_BUILT_MODULE): PRIVATE_ALIGNMENT := 4096 # PAGE_SIZE
 $(LOCAL_BUILT_MODULE) : $(my_shared_libs) | $(ZIPALIGN)
 	@echo "Zipalign $(PRIVATE_ALIGNMENT): $@"
-	$(hide) rm -rf $(dir $@) && mkdir -p $(dir $@)
-	$(hide) cp $^ $(dir $@)
-	$(hide) (cd $(dir $@) && touch empty_file.txt && zip -rD0 $(notdir $@).unaligned empty_file.txt *.so)
+	$(hide) rm -rf $(dir $@) && mkdir -p $(dir $@)/libdir
+	$(hide) cp $^ $(dir $@)/libdir
+	$(hide) (cd $(dir $@) && touch empty_file.txt && zip -rD0 $(notdir $@).unaligned empty_file.txt libdir/*.so)
 	$(hide) $(ZIPALIGN) $(PRIVATE_ALIGNMENT) $@.unaligned $@
diff --git a/tests/libs/Android.build.versioned_lib.mk b/tests/libs/Android.build.versioned_lib.mk
new file mode 100644
index 0000000..f3a6374
--- /dev/null
+++ b/tests/libs/Android.build.versioned_lib.mk
@@ -0,0 +1,120 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# -----------------------------------------------------------------------------
+# Libraries used to test versioned symbols
+# -----------------------------------------------------------------------------
+libtest_versioned_uselibv1_src_files := versioned_uselib.cpp
+
+libtest_versioned_uselibv1_shared_libraries := \
+    libtest_versioned_libv1
+
+module := libtest_versioned_uselibv1
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+libtest_versioned_uselibv2_src_files := \
+    versioned_uselib.cpp
+
+libtest_versioned_uselibv2_shared_libraries := \
+    libtest_versioned_libv2
+
+libtest_versioned_uselibv2_ldflags := \
+    -Wl,--version-script,$(LOCAL_PATH)/versioned_uselib.map
+
+module := libtest_versioned_uselibv2
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+libtest_versioned_uselibv2_other_src_files := \
+    versioned_uselib.cpp
+
+libtest_versioned_uselibv2_other_shared_libraries := \
+    libtest_versioned_otherlib_empty libtest_versioned_libv2
+
+module := libtest_versioned_uselibv2_other
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+libtest_versioned_uselibv3_other_src_files := \
+    versioned_uselib.cpp
+
+libtest_versioned_uselibv3_other_shared_libraries := \
+    libtest_versioned_otherlib_empty libtest_versioned_lib
+
+module := libtest_versioned_uselibv3_other
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+# lib v1 - this one used during static linking but never used at runtime
+# which forces libtest_versioned_uselibv1 to use function v1 from
+# libtest_versioned_lib.so
+# -----------------------------------------------------------------------------
+libtest_versioned_libv1_src_files := \
+    versioned_lib_v1.cpp
+
+libtest_versioned_libv1_ldflags := \
+    -Wl,--version-script,$(LOCAL_PATH)/versioned_lib_v1.map \
+    -Wl,-soname,libtest_versioned_lib.so
+
+module := libtest_versioned_libv1
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+# lib v2 - to make libtest_versioned_uselibv2.so use version 2 of versioned_function()
+# -----------------------------------------------------------------------------
+libtest_versioned_libv2_src_files := \
+    versioned_lib_v2.cpp
+
+libtest_versioned_libv2_ldflags := \
+    -Wl,--version-script,$(LOCAL_PATH)/versioned_lib_v2.map \
+    -Wl,-soname,libtest_versioned_lib.so
+
+module := libtest_versioned_libv2
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+
+# -----------------------------------------------------------------------------
+# last version - this one is used at the runtime and exports 3 versions
+# of versioned_symbol().
+# -----------------------------------------------------------------------------
+libtest_versioned_lib_src_files := \
+    versioned_lib_v3.cpp
+
+libtest_versioned_lib_ldflags := \
+    -Wl,--version-script,$(LOCAL_PATH)/versioned_lib_v3.map
+
+module := libtest_versioned_lib
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+# This library is empty, the actual implementation will provide an unversioned
+# symbol for versioned_function().
+# -----------------------------------------------------------------------------
+libtest_versioned_otherlib_empty_src_files := empty.cpp
+
+libtest_versioned_otherlib_empty_ldflags := -Wl,-soname,libtest_versioned_otherlib.so
+module := libtest_versioned_otherlib_empty
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
+libtest_versioned_otherlib_src_files := versioned_lib_other.cpp
+
+libtest_versioned_otherlib_ldflags := \
+    -Wl,--version-script,$(LOCAL_PATH)/versioned_lib_other.map
+
+module := libtest_versioned_otherlib
+include $(LOCAL_PATH)/Android.build.testlib.mk
diff --git a/tests/libs/Android.mk b/tests/libs/Android.mk
index 7ca856c..3d5b060 100644
--- a/tests/libs/Android.mk
+++ b/tests/libs/Android.mk
@@ -26,6 +26,7 @@
     $(LOCAL_PATH)/Android.build.dlopen_check_order_reloc_siblings.mk \
     $(LOCAL_PATH)/Android.build.dlopen_check_order_reloc_main_executable.mk \
     $(LOCAL_PATH)/Android.build.testlib.mk \
+    $(LOCAL_PATH)/Android.build.versioned_lib.mk \
     $(TEST_PATH)/Android.build.mk
 
 # -----------------------------------------------------------------------------
@@ -117,6 +118,17 @@
 build_target := SHARED_LIBRARY
 include $(TEST_PATH)/Android.build.mk
 
+# ----------------------------------------------------------------------------
+# Library with soname which does not match filename
+# ----------------------------------------------------------------------------
+libdlext_test_different_soname_src_files := \
+    dlext_test_library.cpp \
+
+module := libdlext_test_different_soname
+module_tag := optional
+libdlext_test_different_soname_ldflags := -Wl,-soname=libdlext_test_soname.so
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
 # -----------------------------------------------------------------------------
 # Library used by dlext tests - zipped and aligned
 # -----------------------------------------------------------------------------
@@ -187,6 +199,11 @@
 include $(LOCAL_PATH)/Android.build.dlopen_check_order_reloc_main_executable.mk
 
 # -----------------------------------------------------------------------------
+# Build libtest_versioned_lib.so with its dependencies.
+# -----------------------------------------------------------------------------
+include $(LOCAL_PATH)/Android.build.versioned_lib.mk
+
+# -----------------------------------------------------------------------------
 # Library with dependency loop used by dlfcn tests
 #
 # libtest_with_dependency_loop -> a -> b -> c -> a
@@ -360,6 +377,16 @@
 include $(LOCAL_PATH)/Android.build.testlib.mk
 
 # -----------------------------------------------------------------------------
+# Library to check RTLD_LOCAL with dlsym in 'this'
+# -----------------------------------------------------------------------------
+libtest_dlsym_from_this_src_files := dlsym_from_this.cpp
+
+module := libtest_dlsym_from_this
+libtest_dlsym_from_this_shared_libraries_target := libdl
+
+include $(LOCAL_PATH)/Android.build.testlib.mk
+
+# -----------------------------------------------------------------------------
 # Library with weak undefined function
 # -----------------------------------------------------------------------------
 libtest_dlopen_weak_undefined_func_src_files := \
@@ -376,13 +403,9 @@
 
 module := libtest_dlopen_from_ctor
 
-build_target := SHARED_LIBRARY
-build_type := host
-include $(TEST_PATH)/Android.build.mk
+libtest_dlopen_from_ctor_shared_libraries_target := libdl
 
-libtest_dlopen_from_ctor_shared_libraries := libdl
-build_type := target
-include $(TEST_PATH)/Android.build.mk
+include $(LOCAL_PATH)/Android.build.testlib.mk
 
 # -----------------------------------------------------------------------------
 # Library that depends on the library with constructor that calls dlopen() b/7941716
diff --git a/tests/libs/dlsym_from_this.cpp b/tests/libs/dlsym_from_this.cpp
new file mode 100644
index 0000000..b5215c9
--- /dev/null
+++ b/tests/libs/dlsym_from_this.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <dlfcn.h>
+#include <stdio.h>
+
+int test_dlsym_symbol = 42;
+
+extern "C" int* lookup_dlsym_symbol_using_RTLD_DEFAULT() {
+  dlerror();
+  int* result = static_cast<int*>(dlsym(RTLD_DEFAULT, "test_dlsym_symbol"));
+  // TODO: remove this once b/20049306 is fixed
+  if (result == nullptr) {
+    printf("Cannot find the answer\n");
+  }
+  return result;
+}
+
diff --git a/tests/libs/versioned_lib_other.cpp b/tests/libs/versioned_lib_other.cpp
new file mode 100644
index 0000000..60fa99a
--- /dev/null
+++ b/tests/libs/versioned_lib_other.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern "C" int versioned_function_v2() {
+  return 20;
+}
+
+__asm__(".symver versioned_function_v2,versioned_function@@TESTLIB_V2");
diff --git a/tests/libs/versioned_lib_other.map b/tests/libs/versioned_lib_other.map
new file mode 100644
index 0000000..752686d
--- /dev/null
+++ b/tests/libs/versioned_lib_other.map
@@ -0,0 +1,9 @@
+TESTLIB_V0 {
+  local:
+    versioned_function_v*;
+};
+
+TESTLIB_V2 {
+  global:
+    versioned_function;
+} TESTLIB_V0;
diff --git a/tests/libs/versioned_lib_v1.cpp b/tests/libs/versioned_lib_v1.cpp
new file mode 100644
index 0000000..c81cbf1
--- /dev/null
+++ b/tests/libs/versioned_lib_v1.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern "C" {
+  int versioned_function_v1(); // __attribute__((visibility("hidden")));
+  int version_zero_function();
+}
+
+int versioned_function_v1() {
+  return 1;
+}
+
+int version_zero_function() {
+  return 100;
+}
+
+__asm__(".symver versioned_function_v1,versioned_function@@TESTLIB_V1");
diff --git a/tests/libs/versioned_lib_v1.map b/tests/libs/versioned_lib_v1.map
new file mode 100644
index 0000000..dbda327
--- /dev/null
+++ b/tests/libs/versioned_lib_v1.map
@@ -0,0 +1,12 @@
+TESTLIB_V0 {
+  global:
+    version_zero_function;
+  local:
+    versioned_function_v*;
+};
+
+TESTLIB_V1 {
+  global:
+    versioned_function;
+} TESTLIB_V0;
+
diff --git a/tests/libs/versioned_lib_v2.cpp b/tests/libs/versioned_lib_v2.cpp
new file mode 100644
index 0000000..d7d413f
--- /dev/null
+++ b/tests/libs/versioned_lib_v2.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern "C" {
+  int versioned_function_v1(); // __attribute__((visibility("hidden")));
+  int versioned_function_v2(); // __attribute__((visibility("hidden")));
+  int version_zero_function();
+}
+
+int versioned_function_v1() {
+  return 1;
+}
+
+int versioned_function_v2() {
+  return 2;
+}
+
+int version_zero_function() {
+  return 200;
+}
+__asm__(".symver versioned_function_v1,versioned_function@TESTLIB_V1");
+__asm__(".symver versioned_function_v2,versioned_function@@TESTLIB_V2");
diff --git a/tests/libs/versioned_lib_v2.map b/tests/libs/versioned_lib_v2.map
new file mode 100644
index 0000000..bb38102
--- /dev/null
+++ b/tests/libs/versioned_lib_v2.map
@@ -0,0 +1,16 @@
+TESTLIB_V0 {
+  global:
+    version_zero_function;
+  local:
+    versioned_function_v*;
+};
+
+TESTLIB_V1 {
+  global:
+    versioned_function;
+} TESTLIB_V0;
+
+TESTLIB_V2 {
+  global:
+    versioned_function;
+} TESTLIB_V1;
diff --git a/tests/libs/versioned_lib_v3.cpp b/tests/libs/versioned_lib_v3.cpp
new file mode 100644
index 0000000..f4740a4
--- /dev/null
+++ b/tests/libs/versioned_lib_v3.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern "C" {
+  int versioned_function_v1(); // __attribute__((visibility("hidden")));
+  int versioned_function_v2(); // __attribute__((visibility("hidden")));
+  int versioned_function_v3(); // __attribute__((visibility("hidden")));
+  int version_zero_function();
+}
+
+int versioned_function_v1() {
+  return 1;
+}
+
+int versioned_function_v2() {
+  return 2;
+}
+
+int versioned_function_v3() {
+  return 3;
+}
+
+int version_zero_function() {
+  return 1000;
+}
+
+__asm__(".symver versioned_function_v1,versioned_function@TESTLIB_V1");
+__asm__(".symver versioned_function_v2,versioned_function@TESTLIB_V2");
+__asm__(".symver versioned_function_v3,versioned_function@@TESTLIB_V3");
diff --git a/tests/libs/versioned_lib_v3.map b/tests/libs/versioned_lib_v3.map
new file mode 100644
index 0000000..5b1ce59
--- /dev/null
+++ b/tests/libs/versioned_lib_v3.map
@@ -0,0 +1,21 @@
+TESTLIB_V0 {
+  global:
+    version_zero_function;
+  local:
+    versioned_function_v*;
+};
+
+TESTLIB_V1 {
+  global:
+    versioned_function;
+} TESTLIB_V0;
+
+TESTLIB_V2 {
+  global:
+    versioned_function;
+} TESTLIB_V1;
+
+TESTLIB_V3 {
+  global:
+    versioned_function;
+} TESTLIB_V2;
diff --git a/tests/libs/versioned_uselib.cpp b/tests/libs/versioned_uselib.cpp
new file mode 100644
index 0000000..96eb7c3
--- /dev/null
+++ b/tests/libs/versioned_uselib.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern "C" {
+  int versioned_function();
+
+  int get_function_version();
+  int version_zero_function();
+  int version_zero_function2() __attribute__((weak));
+}
+
+int get_function_version() {
+  return version_zero_function2() + version_zero_function() + versioned_function();
+}
+
+// we expect this function to be preempted by main executable.
+int version_zero_function2() {
+  return 40000;
+}
diff --git a/tests/libs/versioned_uselib.map b/tests/libs/versioned_uselib.map
new file mode 100644
index 0000000..10bc9ce
--- /dev/null
+++ b/tests/libs/versioned_uselib.map
@@ -0,0 +1,9 @@
+TESTLIB_NONE {
+  global:
+    get_function_version;
+};
+
+TESTLIB_ZERO {
+  global:
+    version_zero_function2;
+} TESTLIB_NONE;
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index c507faa..a299f02 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -16,11 +16,6 @@
 
 #include <gtest/gtest.h>
 
-#include "private/ScopeGuard.h"
-#include "BionicDeathTest.h"
-#include "ScopedSignalHandler.h"
-#include "gtest_ex.h"
-
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
@@ -34,6 +29,18 @@
 #include <unistd.h>
 
 #include <atomic>
+#include <regex>
+#include <vector>
+
+#include <base/file.h>
+#include <base/stringprintf.h>
+
+#include "private/bionic_macros.h"
+#include "private/ScopeGuard.h"
+#include "BionicDeathTest.h"
+#include "ScopedSignalHandler.h"
+
+extern "C" pid_t gettid();
 
 TEST(pthread, pthread_key_create) {
   pthread_key_t key;
@@ -67,8 +74,7 @@
 
   for (int i = 0; i < nkeys; ++i) {
     pthread_key_t key;
-    // If this fails, it's likely that GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT is
-    // wrong.
+    // If this fails, it's likely that LIBC_PTHREAD_KEY_RESERVED_COUNT is wrong.
     ASSERT_EQ(0, pthread_key_create(&key, NULL)) << i << " of " << nkeys;
     keys.push_back(key);
     ASSERT_EQ(0, pthread_setspecific(key, reinterpret_cast<void*>(i)));
@@ -175,6 +181,19 @@
   ASSERT_EQ(0, pthread_key_delete(key));
 }
 
+TEST(pthread, static_pthread_key_used_before_creation) {
+#if defined(__BIONIC__)
+  // See http://b/19625804. The bug is about a static/global pthread key being used before creation.
+  // So here tests if the static/global default value 0 can be detected as invalid key.
+  static pthread_key_t key;
+  ASSERT_EQ(nullptr, pthread_getspecific(key));
+  ASSERT_EQ(EINVAL, pthread_setspecific(key, nullptr));
+  ASSERT_EQ(EINVAL, pthread_key_delete(key));
+#else
+  GTEST_LOG_(INFO) << "This test tests bionic pthread key implementation detail.\n";
+#endif
+}
+
 static void* IdFn(void* arg) {
   return arg;
 }
@@ -272,8 +291,11 @@
 
   sleep(1); // (Give t2 a chance to call pthread_join.)
 
-  // ...a call to pthread_detach on thread 1 will "succeed" (silently fail)...
+#if defined(__BIONIC__)
+  ASSERT_EQ(EINVAL, pthread_detach(t1));
+#else
   ASSERT_EQ(0, pthread_detach(t1));
+#endif
   AssertDetached(t1, false);
 
   spinhelper.UnSpin();
@@ -651,6 +673,37 @@
 #endif // __BIONIC__
 }
 
+TEST(pthread, pthread_rwlockattr_smoke) {
+  pthread_rwlockattr_t attr;
+  ASSERT_EQ(0, pthread_rwlockattr_init(&attr));
+
+  int pshared_value_array[] = {PTHREAD_PROCESS_PRIVATE, PTHREAD_PROCESS_SHARED};
+  for (size_t i = 0; i < sizeof(pshared_value_array) / sizeof(pshared_value_array[0]); ++i) {
+    ASSERT_EQ(0, pthread_rwlockattr_setpshared(&attr, pshared_value_array[i]));
+    int pshared;
+    ASSERT_EQ(0, pthread_rwlockattr_getpshared(&attr, &pshared));
+    ASSERT_EQ(pshared_value_array[i], pshared);
+  }
+
+  int kind_array[] = {PTHREAD_RWLOCK_PREFER_READER_NP,
+                      PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP};
+  for (size_t i = 0; i < sizeof(kind_array) / sizeof(kind_array[0]); ++i) {
+    ASSERT_EQ(0, pthread_rwlockattr_setkind_np(&attr, kind_array[i]));
+    int kind;
+    ASSERT_EQ(0, pthread_rwlockattr_getkind_np(&attr, &kind));
+    ASSERT_EQ(kind_array[i], kind);
+  }
+
+  ASSERT_EQ(0, pthread_rwlockattr_destroy(&attr));
+}
+
+TEST(pthread, pthread_rwlock_init_same_as_PTHREAD_RWLOCK_INITIALIZER) {
+  pthread_rwlock_t lock1 = PTHREAD_RWLOCK_INITIALIZER;
+  pthread_rwlock_t lock2;
+  ASSERT_EQ(0, pthread_rwlock_init(&lock2, NULL));
+  ASSERT_EQ(0, memcmp(&lock1, &lock2, sizeof(lock1)));
+}
+
 TEST(pthread, pthread_rwlock_smoke) {
   pthread_rwlock_t l;
   ASSERT_EQ(0, pthread_rwlock_init(&l, NULL));
@@ -686,7 +739,6 @@
   ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(0, pthread_rwlock_unlock(&l));
 
-#ifdef __BIONIC__
   // EDEADLK in "read after write"
   ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(EDEADLK, pthread_rwlock_rdlock(&l));
@@ -696,11 +748,27 @@
   ASSERT_EQ(0, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(EDEADLK, pthread_rwlock_wrlock(&l));
   ASSERT_EQ(0, pthread_rwlock_unlock(&l));
-#endif
 
   ASSERT_EQ(0, pthread_rwlock_destroy(&l));
 }
 
+static void WaitUntilThreadSleep(std::atomic<pid_t>& pid) {
+  while (pid == 0) {
+    usleep(1000);
+  }
+  std::string filename = android::base::StringPrintf("/proc/%d/stat", pid.load());
+  std::regex regex {R"(\s+S\s+)"};
+
+  while (true) {
+    std::string content;
+    ASSERT_TRUE(android::base::ReadFileToString(filename, &content));
+    if (std::regex_search(content, regex)) {
+      break;
+    }
+    usleep(1000);
+  }
+}
+
 struct RwlockWakeupHelperArg {
   pthread_rwlock_t lock;
   enum Progress {
@@ -710,9 +778,11 @@
     LOCK_ACCESSED
   };
   std::atomic<Progress> progress;
+  std::atomic<pid_t> tid;
 };
 
 static void pthread_rwlock_reader_wakeup_writer_helper(RwlockWakeupHelperArg* arg) {
+  arg->tid = gettid();
   ASSERT_EQ(RwlockWakeupHelperArg::LOCK_INITIALIZED, arg->progress);
   arg->progress = RwlockWakeupHelperArg::LOCK_WAITING;
 
@@ -729,12 +799,14 @@
   ASSERT_EQ(0, pthread_rwlock_init(&wakeup_arg.lock, NULL));
   ASSERT_EQ(0, pthread_rwlock_rdlock(&wakeup_arg.lock));
   wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_INITIALIZED;
+  wakeup_arg.tid = 0;
 
   pthread_t thread;
   ASSERT_EQ(0, pthread_create(&thread, NULL,
     reinterpret_cast<void* (*)(void*)>(pthread_rwlock_reader_wakeup_writer_helper), &wakeup_arg));
-  sleep(1);
+  WaitUntilThreadSleep(wakeup_arg.tid);
   ASSERT_EQ(RwlockWakeupHelperArg::LOCK_WAITING, wakeup_arg.progress);
+
   wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_RELEASED;
   ASSERT_EQ(0, pthread_rwlock_unlock(&wakeup_arg.lock));
 
@@ -744,6 +816,7 @@
 }
 
 static void pthread_rwlock_writer_wakeup_reader_helper(RwlockWakeupHelperArg* arg) {
+  arg->tid = gettid();
   ASSERT_EQ(RwlockWakeupHelperArg::LOCK_INITIALIZED, arg->progress);
   arg->progress = RwlockWakeupHelperArg::LOCK_WAITING;
 
@@ -760,12 +833,14 @@
   ASSERT_EQ(0, pthread_rwlock_init(&wakeup_arg.lock, NULL));
   ASSERT_EQ(0, pthread_rwlock_wrlock(&wakeup_arg.lock));
   wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_INITIALIZED;
+  wakeup_arg.tid = 0;
 
   pthread_t thread;
   ASSERT_EQ(0, pthread_create(&thread, NULL,
     reinterpret_cast<void* (*)(void*)>(pthread_rwlock_writer_wakeup_reader_helper), &wakeup_arg));
-  sleep(1);
+  WaitUntilThreadSleep(wakeup_arg.tid);
   ASSERT_EQ(RwlockWakeupHelperArg::LOCK_WAITING, wakeup_arg.progress);
+
   wakeup_arg.progress = RwlockWakeupHelperArg::LOCK_RELEASED;
   ASSERT_EQ(0, pthread_rwlock_unlock(&wakeup_arg.lock));
 
@@ -774,6 +849,111 @@
   ASSERT_EQ(0, pthread_rwlock_destroy(&wakeup_arg.lock));
 }
 
+class RwlockKindTestHelper {
+ private:
+  struct ThreadArg {
+    RwlockKindTestHelper* helper;
+    std::atomic<pid_t>& tid;
+
+    ThreadArg(RwlockKindTestHelper* helper, std::atomic<pid_t>& tid)
+      : helper(helper), tid(tid) { }
+  };
+
+ public:
+  pthread_rwlock_t lock;
+
+ public:
+  RwlockKindTestHelper(int kind_type) {
+    InitRwlock(kind_type);
+  }
+
+  ~RwlockKindTestHelper() {
+    DestroyRwlock();
+  }
+
+  void CreateWriterThread(pthread_t& thread, std::atomic<pid_t>& tid) {
+    tid = 0;
+    ThreadArg* arg = new ThreadArg(this, tid);
+    ASSERT_EQ(0, pthread_create(&thread, NULL,
+                                reinterpret_cast<void* (*)(void*)>(WriterThreadFn), arg));
+  }
+
+  void CreateReaderThread(pthread_t& thread, std::atomic<pid_t>& tid) {
+    tid = 0;
+    ThreadArg* arg = new ThreadArg(this, tid);
+    ASSERT_EQ(0, pthread_create(&thread, NULL,
+                                reinterpret_cast<void* (*)(void*)>(ReaderThreadFn), arg));
+  }
+
+ private:
+  void InitRwlock(int kind_type) {
+    pthread_rwlockattr_t attr;
+    ASSERT_EQ(0, pthread_rwlockattr_init(&attr));
+    ASSERT_EQ(0, pthread_rwlockattr_setkind_np(&attr, kind_type));
+    ASSERT_EQ(0, pthread_rwlock_init(&lock, &attr));
+    ASSERT_EQ(0, pthread_rwlockattr_destroy(&attr));
+  }
+
+  void DestroyRwlock() {
+    ASSERT_EQ(0, pthread_rwlock_destroy(&lock));
+  }
+
+  static void WriterThreadFn(ThreadArg* arg) {
+    arg->tid = gettid();
+
+    RwlockKindTestHelper* helper = arg->helper;
+    ASSERT_EQ(0, pthread_rwlock_wrlock(&helper->lock));
+    ASSERT_EQ(0, pthread_rwlock_unlock(&helper->lock));
+    delete arg;
+  }
+
+  static void ReaderThreadFn(ThreadArg* arg) {
+    arg->tid = gettid();
+
+    RwlockKindTestHelper* helper = arg->helper;
+    ASSERT_EQ(0, pthread_rwlock_rdlock(&helper->lock));
+    ASSERT_EQ(0, pthread_rwlock_unlock(&helper->lock));
+    delete arg;
+  }
+};
+
+TEST(pthread, pthread_rwlock_kind_PTHREAD_RWLOCK_PREFER_READER_NP) {
+  RwlockKindTestHelper helper(PTHREAD_RWLOCK_PREFER_READER_NP);
+  ASSERT_EQ(0, pthread_rwlock_rdlock(&helper.lock));
+
+  pthread_t writer_thread;
+  std::atomic<pid_t> writer_tid;
+  helper.CreateWriterThread(writer_thread, writer_tid);
+  WaitUntilThreadSleep(writer_tid);
+
+  pthread_t reader_thread;
+  std::atomic<pid_t> reader_tid;
+  helper.CreateReaderThread(reader_thread, reader_tid);
+  ASSERT_EQ(0, pthread_join(reader_thread, NULL));
+
+  ASSERT_EQ(0, pthread_rwlock_unlock(&helper.lock));
+  ASSERT_EQ(0, pthread_join(writer_thread, NULL));
+}
+
+TEST(pthread, pthread_rwlock_kind_PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP) {
+  RwlockKindTestHelper helper(PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+  ASSERT_EQ(0, pthread_rwlock_rdlock(&helper.lock));
+
+  pthread_t writer_thread;
+  std::atomic<pid_t> writer_tid;
+  helper.CreateWriterThread(writer_thread, writer_tid);
+  WaitUntilThreadSleep(writer_tid);
+
+  pthread_t reader_thread;
+  std::atomic<pid_t> reader_tid;
+  helper.CreateReaderThread(reader_thread, reader_tid);
+  WaitUntilThreadSleep(reader_tid);
+
+  ASSERT_EQ(0, pthread_rwlock_unlock(&helper.lock));
+  ASSERT_EQ(0, pthread_join(writer_thread, NULL));
+  ASSERT_EQ(0, pthread_join(reader_thread, NULL));
+}
+
 static int g_once_fn_call_count = 0;
 static void OnceFn() {
   ++g_once_fn_call_count;
@@ -817,23 +997,21 @@
 static void AtForkChild2() { g_atfork_child_calls = (g_atfork_child_calls << 4) | 2; }
 
 TEST(pthread, pthread_atfork_smoke) {
-  test_isolated([] {
-    ASSERT_EQ(0, pthread_atfork(AtForkPrepare1, AtForkParent1, AtForkChild1));
-    ASSERT_EQ(0, pthread_atfork(AtForkPrepare2, AtForkParent2, AtForkChild2));
+  ASSERT_EQ(0, pthread_atfork(AtForkPrepare1, AtForkParent1, AtForkChild1));
+  ASSERT_EQ(0, pthread_atfork(AtForkPrepare2, AtForkParent2, AtForkChild2));
 
-    int pid = fork();
-    ASSERT_NE(-1, pid) << strerror(errno);
+  int pid = fork();
+  ASSERT_NE(-1, pid) << strerror(errno);
 
-    // Child and parent calls are made in the order they were registered.
-    if (pid == 0) {
-      ASSERT_EQ(0x12, g_atfork_child_calls);
-      _exit(0);
-    }
-    ASSERT_EQ(0x12, g_atfork_parent_calls);
+  // Child and parent calls are made in the order they were registered.
+  if (pid == 0) {
+    ASSERT_EQ(0x12, g_atfork_child_calls);
+    _exit(0);
+  }
+  ASSERT_EQ(0x12, g_atfork_parent_calls);
 
-    // Prepare calls are made in the reverse order.
-    ASSERT_EQ(0x21, g_atfork_prepare_calls);
-  });
+  // Prepare calls are made in the reverse order.
+  ASSERT_EQ(0x21, g_atfork_prepare_calls);
 }
 
 TEST(pthread, pthread_attr_getscope) {
@@ -875,7 +1053,7 @@
 }
 
 TEST(pthread, pthread_cond_broadcast__preserves_condattr_flags) {
-#if defined(__BIONIC__) // This tests a bionic implementation detail.
+#if defined(__BIONIC__)
   pthread_condattr_t attr;
   pthread_condattr_init(&attr);
 
@@ -888,16 +1066,78 @@
   ASSERT_EQ(0, pthread_cond_signal(&cond_var));
   ASSERT_EQ(0, pthread_cond_broadcast(&cond_var));
 
-  attr = static_cast<pthread_condattr_t>(cond_var.value);
+  attr = static_cast<pthread_condattr_t>(*reinterpret_cast<uint32_t*>(cond_var.__private));
   clockid_t clock;
   ASSERT_EQ(0, pthread_condattr_getclock(&attr, &clock));
   ASSERT_EQ(CLOCK_MONOTONIC, clock);
   int pshared;
   ASSERT_EQ(0, pthread_condattr_getpshared(&attr, &pshared));
   ASSERT_EQ(PTHREAD_PROCESS_SHARED, pshared);
-#else // __BIONIC__
-  GTEST_LOG_(INFO) << "This test does nothing.\n";
-#endif // __BIONIC__
+#else  // !defined(__BIONIC__)
+  GTEST_LOG_(INFO) << "This tests a bionic implementation detail.\n";
+#endif  // !defined(__BIONIC__)
+}
+
+class pthread_CondWakeupTest : public ::testing::Test {
+ protected:
+  pthread_mutex_t mutex;
+  pthread_cond_t cond;
+
+  enum Progress {
+    INITIALIZED,
+    WAITING,
+    SIGNALED,
+    FINISHED,
+  };
+  std::atomic<Progress> progress;
+  pthread_t thread;
+
+ protected:
+  virtual void SetUp() {
+    ASSERT_EQ(0, pthread_mutex_init(&mutex, NULL));
+    ASSERT_EQ(0, pthread_cond_init(&cond, NULL));
+    progress = INITIALIZED;
+    ASSERT_EQ(0,
+      pthread_create(&thread, NULL, reinterpret_cast<void* (*)(void*)>(WaitThreadFn), this));
+  }
+
+  virtual void TearDown() {
+    ASSERT_EQ(0, pthread_join(thread, NULL));
+    ASSERT_EQ(FINISHED, progress);
+    ASSERT_EQ(0, pthread_cond_destroy(&cond));
+    ASSERT_EQ(0, pthread_mutex_destroy(&mutex));
+  }
+
+  void SleepUntilProgress(Progress expected_progress) {
+    while (progress != expected_progress) {
+      usleep(5000);
+    }
+    usleep(5000);
+  }
+
+ private:
+  static void WaitThreadFn(pthread_CondWakeupTest* test) {
+    ASSERT_EQ(0, pthread_mutex_lock(&test->mutex));
+    test->progress = WAITING;
+    while (test->progress == WAITING) {
+      ASSERT_EQ(0, pthread_cond_wait(&test->cond, &test->mutex));
+    }
+    ASSERT_EQ(SIGNALED, test->progress);
+    test->progress = FINISHED;
+    ASSERT_EQ(0, pthread_mutex_unlock(&test->mutex));
+  }
+};
+
+TEST_F(pthread_CondWakeupTest, signal) {
+  SleepUntilProgress(WAITING);
+  progress = SIGNALED;
+  pthread_cond_signal(&cond);
+}
+
+TEST_F(pthread_CondWakeupTest, broadcast) {
+  SleepUntilProgress(WAITING);
+  progress = SIGNALED;
+  pthread_cond_broadcast(&cond);
 }
 
 TEST(pthread, pthread_mutex_timedlock) {
@@ -1118,64 +1358,214 @@
   ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
   ASSERT_EQ(0, pthread_mutexattr_gettype(&attr, &attr_type));
   ASSERT_EQ(PTHREAD_MUTEX_RECURSIVE, attr_type);
+
+  ASSERT_EQ(0, pthread_mutexattr_destroy(&attr));
 }
 
-TEST(pthread, pthread_mutex_lock_NORMAL) {
-  pthread_mutexattr_t attr;
-  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
-  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL));
-
+struct PthreadMutex {
   pthread_mutex_t lock;
-  ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
 
-  ASSERT_EQ(0, pthread_mutex_lock(&lock));
-  ASSERT_EQ(0, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(0, pthread_mutex_destroy(&lock));
+  PthreadMutex(int mutex_type) {
+    init(mutex_type);
+  }
+
+  ~PthreadMutex() {
+    destroy();
+  }
+
+ private:
+  void init(int mutex_type) {
+    pthread_mutexattr_t attr;
+    ASSERT_EQ(0, pthread_mutexattr_init(&attr));
+    ASSERT_EQ(0, pthread_mutexattr_settype(&attr, mutex_type));
+    ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
+    ASSERT_EQ(0, pthread_mutexattr_destroy(&attr));
+  }
+
+  void destroy() {
+    ASSERT_EQ(0, pthread_mutex_destroy(&lock));
+  }
+
+  DISALLOW_COPY_AND_ASSIGN(PthreadMutex);
+};
+
+TEST(pthread, pthread_mutex_lock_NORMAL) {
+  PthreadMutex m(PTHREAD_MUTEX_NORMAL);
+
+  ASSERT_EQ(0, pthread_mutex_lock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
 }
 
 TEST(pthread, pthread_mutex_lock_ERRORCHECK) {
-  pthread_mutexattr_t attr;
-  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
-  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
+  PthreadMutex m(PTHREAD_MUTEX_ERRORCHECK);
 
-  pthread_mutex_t lock;
-  ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
-
-  ASSERT_EQ(0, pthread_mutex_lock(&lock));
-  ASSERT_EQ(EDEADLK, pthread_mutex_lock(&lock));
-  ASSERT_EQ(0, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(0, pthread_mutex_trylock(&lock));
-  ASSERT_EQ(EBUSY, pthread_mutex_trylock(&lock));
-  ASSERT_EQ(0, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(EPERM, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(0, pthread_mutex_destroy(&lock));
+  ASSERT_EQ(0, pthread_mutex_lock(&m.lock));
+  ASSERT_EQ(EDEADLK, pthread_mutex_lock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_trylock(&m.lock));
+  ASSERT_EQ(EBUSY, pthread_mutex_trylock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
+  ASSERT_EQ(EPERM, pthread_mutex_unlock(&m.lock));
 }
 
 TEST(pthread, pthread_mutex_lock_RECURSIVE) {
-  pthread_mutexattr_t attr;
-  ASSERT_EQ(0, pthread_mutexattr_init(&attr));
-  ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
+  PthreadMutex m(PTHREAD_MUTEX_RECURSIVE);
 
-  pthread_mutex_t lock;
-  ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
+  ASSERT_EQ(0, pthread_mutex_lock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_lock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_trylock(&m.lock));
+  ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
+  ASSERT_EQ(EPERM, pthread_mutex_unlock(&m.lock));
+}
 
-  ASSERT_EQ(0, pthread_mutex_lock(&lock));
-  ASSERT_EQ(0, pthread_mutex_lock(&lock));
-  ASSERT_EQ(0, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(0, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(0, pthread_mutex_trylock(&lock));
-  ASSERT_EQ(0, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(EPERM, pthread_mutex_unlock(&lock));
-  ASSERT_EQ(0, pthread_mutex_destroy(&lock));
+TEST(pthread, pthread_mutex_init_same_as_static_initializers) {
+  pthread_mutex_t lock_normal = PTHREAD_MUTEX_INITIALIZER;
+  PthreadMutex m1(PTHREAD_MUTEX_NORMAL);
+  ASSERT_EQ(0, memcmp(&lock_normal, &m1.lock, sizeof(pthread_mutex_t)));
+  pthread_mutex_destroy(&lock_normal);
+
+  pthread_mutex_t lock_errorcheck = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+  PthreadMutex m2(PTHREAD_MUTEX_ERRORCHECK);
+  ASSERT_EQ(0, memcmp(&lock_errorcheck, &m2.lock, sizeof(pthread_mutex_t)));
+  pthread_mutex_destroy(&lock_errorcheck);
+
+  pthread_mutex_t lock_recursive = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+  PthreadMutex m3(PTHREAD_MUTEX_RECURSIVE);
+  ASSERT_EQ(0, memcmp(&lock_recursive, &m3.lock, sizeof(pthread_mutex_t)));
+  ASSERT_EQ(0, pthread_mutex_destroy(&lock_recursive));
+}
+class MutexWakeupHelper {
+ private:
+  PthreadMutex m;
+  enum Progress {
+    LOCK_INITIALIZED,
+    LOCK_WAITING,
+    LOCK_RELEASED,
+    LOCK_ACCESSED
+  };
+  std::atomic<Progress> progress;
+  std::atomic<pid_t> tid;
+
+  static void thread_fn(MutexWakeupHelper* helper) {
+    helper->tid = gettid();
+    ASSERT_EQ(LOCK_INITIALIZED, helper->progress);
+    helper->progress = LOCK_WAITING;
+
+    ASSERT_EQ(0, pthread_mutex_lock(&helper->m.lock));
+    ASSERT_EQ(LOCK_RELEASED, helper->progress);
+    ASSERT_EQ(0, pthread_mutex_unlock(&helper->m.lock));
+
+    helper->progress = LOCK_ACCESSED;
+  }
+
+ public:
+  MutexWakeupHelper(int mutex_type) : m(mutex_type) {
+  }
+
+  void test() {
+    ASSERT_EQ(0, pthread_mutex_lock(&m.lock));
+    progress = LOCK_INITIALIZED;
+    tid = 0;
+
+    pthread_t thread;
+    ASSERT_EQ(0, pthread_create(&thread, NULL,
+      reinterpret_cast<void* (*)(void*)>(MutexWakeupHelper::thread_fn), this));
+
+    WaitUntilThreadSleep(tid);
+    ASSERT_EQ(LOCK_WAITING, progress);
+
+    progress = LOCK_RELEASED;
+    ASSERT_EQ(0, pthread_mutex_unlock(&m.lock));
+
+    ASSERT_EQ(0, pthread_join(thread, NULL));
+    ASSERT_EQ(LOCK_ACCESSED, progress);
+  }
+};
+
+TEST(pthread, pthread_mutex_NORMAL_wakeup) {
+  MutexWakeupHelper helper(PTHREAD_MUTEX_NORMAL);
+  helper.test();
+}
+
+TEST(pthread, pthread_mutex_ERRORCHECK_wakeup) {
+  MutexWakeupHelper helper(PTHREAD_MUTEX_ERRORCHECK);
+  helper.test();
+}
+
+TEST(pthread, pthread_mutex_RECURSIVE_wakeup) {
+  MutexWakeupHelper helper(PTHREAD_MUTEX_RECURSIVE);
+  helper.test();
 }
 
 TEST(pthread, pthread_mutex_owner_tid_limit) {
+#if defined(__BIONIC__) && !defined(__LP64__)
   FILE* fp = fopen("/proc/sys/kernel/pid_max", "r");
   ASSERT_TRUE(fp != NULL);
   long pid_max;
   ASSERT_EQ(1, fscanf(fp, "%ld", &pid_max));
   fclose(fp);
-  // Current pthread_mutex uses 16 bits to represent owner tid.
-  // Change the implementation if we need to support higher value than 65535.
+  // Bionic's pthread_mutex implementation on 32-bit devices uses 16 bits to represent owner tid.
   ASSERT_LE(pid_max, 65536);
+#else
+  GTEST_LOG_(INFO) << "This test does nothing as 32-bit tid is supported by pthread_mutex.\n";
+#endif
+}
+
+class StrictAlignmentAllocator {
+ public:
+  void* allocate(size_t size, size_t alignment) {
+    char* p = new char[size + alignment * 2];
+    allocated_array.push_back(p);
+    while (!is_strict_aligned(p, alignment)) {
+      ++p;
+    }
+    return p;
+  }
+
+  ~StrictAlignmentAllocator() {
+    for (auto& p : allocated_array) {
+      delete [] p;
+    }
+  }
+
+ private:
+  bool is_strict_aligned(char* p, size_t alignment) {
+    return (reinterpret_cast<uintptr_t>(p) % (alignment * 2)) == alignment;
+  }
+
+  std::vector<char*> allocated_array;
+};
+
+TEST(pthread, pthread_types_allow_four_bytes_alignment) {
+#if defined(__BIONIC__)
+  // For binary compatibility with old version, we need to allow 4-byte aligned data for pthread types.
+  StrictAlignmentAllocator allocator;
+  pthread_mutex_t* mutex = reinterpret_cast<pthread_mutex_t*>(
+                             allocator.allocate(sizeof(pthread_mutex_t), 4));
+  ASSERT_EQ(0, pthread_mutex_init(mutex, NULL));
+  ASSERT_EQ(0, pthread_mutex_lock(mutex));
+  ASSERT_EQ(0, pthread_mutex_unlock(mutex));
+  ASSERT_EQ(0, pthread_mutex_destroy(mutex));
+
+  pthread_cond_t* cond = reinterpret_cast<pthread_cond_t*>(
+                           allocator.allocate(sizeof(pthread_cond_t), 4));
+  ASSERT_EQ(0, pthread_cond_init(cond, NULL));
+  ASSERT_EQ(0, pthread_cond_signal(cond));
+  ASSERT_EQ(0, pthread_cond_broadcast(cond));
+  ASSERT_EQ(0, pthread_cond_destroy(cond));
+
+  pthread_rwlock_t* rwlock = reinterpret_cast<pthread_rwlock_t*>(
+                               allocator.allocate(sizeof(pthread_rwlock_t), 4));
+  ASSERT_EQ(0, pthread_rwlock_init(rwlock, NULL));
+  ASSERT_EQ(0, pthread_rwlock_rdlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_unlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_wrlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_unlock(rwlock));
+  ASSERT_EQ(0, pthread_rwlock_destroy(rwlock));
+
+#else
+  GTEST_LOG_(INFO) << "This test tests bionic implementation details.";
+#endif
 }
diff --git a/tests/stack_protector_test.cpp b/tests/stack_protector_test.cpp
index 8007711..22285d1 100644
--- a/tests/stack_protector_test.cpp
+++ b/tests/stack_protector_test.cpp
@@ -24,14 +24,10 @@
 #include <pthread.h>
 #include <stdint.h>
 #include <stdio.h>
-#include <sys/syscall.h>
 #include <unistd.h>
 #include <set>
 
-#if defined(__GLIBC__)
-// glibc doesn't expose gettid(2).
-pid_t gettid() { return syscall(__NR_gettid); }
-#endif // __GLIBC__
+extern "C" pid_t gettid();
 
 // For x86, bionic and glibc have per-thread stack guard values (all identical).
 #if defined(__i386__)
diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp
index 2ecfc60..62677cd 100644
--- a/tests/stdio_test.cpp
+++ b/tests/stdio_test.cpp
@@ -151,6 +151,15 @@
   fclose(fp);
 }
 
+TEST(stdio, getdelim_directory) {
+  FILE* fp = fopen("/proc", "r");
+  ASSERT_TRUE(fp != NULL);
+  char* word_read;
+  size_t allocated_length;
+  ASSERT_EQ(-1, getdelim(&word_read, &allocated_length, ' ', fp));
+  fclose(fp);
+}
+
 TEST(stdio, getline) {
   FILE* fp = tmpfile();
   ASSERT_TRUE(fp != NULL);
diff --git a/tests/sys_statvfs_test.cpp b/tests/sys_statvfs_test.cpp
index 6b19e13..bff9e20 100644
--- a/tests/sys_statvfs_test.cpp
+++ b/tests/sys_statvfs_test.cpp
@@ -30,6 +30,11 @@
   EXPECT_EQ(0U, sb.f_ffree);
   EXPECT_EQ(0U, sb.f_fsid);
   EXPECT_EQ(255U, sb.f_namemax);
+
+  // The kernel sets a private bit to indicate that f_flags is valid.
+  // This flag is not supposed to be exposed to libc clients.
+  static const uint32_t ST_VALID = 0x0020;
+  EXPECT_TRUE((sb.f_flag & ST_VALID) == 0) << sb.f_flag;
 }
 
 TEST(sys_statvfs, statvfs) {
@@ -51,6 +56,7 @@
   close(fd);
   Check(sb);
 }
+
 TEST(sys_statvfs, fstatvfs64) {
   struct statvfs64 sb;
   int fd = open("/proc", O_RDONLY);
diff --git a/tests/sys_vfs_test.cpp b/tests/sys_vfs_test.cpp
index 4b05660..a521967 100644
--- a/tests/sys_vfs_test.cpp
+++ b/tests/sys_vfs_test.cpp
@@ -31,6 +31,11 @@
   EXPECT_EQ(0, sb.f_fsid.__val[0]);
   EXPECT_EQ(0, sb.f_fsid.__val[1]);
   EXPECT_EQ(255, static_cast<int>(sb.f_namelen));
+
+  // The kernel sets a private bit to indicate that f_flags is valid.
+  // This flag is not supposed to be exposed to libc clients.
+  static const uint32_t ST_VALID = 0x0020;
+  EXPECT_TRUE((sb.f_flags & ST_VALID) == 0) << sb.f_flags;
 }
 
 TEST(sys_vfs, statfs) {
@@ -52,6 +57,7 @@
   close(fd);
   Check(sb);
 }
+
 TEST(sys_vfs, fstatfs64) {
   struct statfs64 sb;
   int fd = open("/proc", O_RDONLY);
diff --git a/tests/time_test.cpp b/tests/time_test.cpp
index 691d8ff..a0b0209 100644
--- a/tests/time_test.cpp
+++ b/tests/time_test.cpp
@@ -24,6 +24,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <atomic>
 
 #include "ScopedSignalHandler.h"
 
@@ -197,7 +198,7 @@
   ASSERT_EQ(0, timer_delete(timer_id));
 }
 
-static int timer_create_SIGEV_SIGNAL_signal_handler_invocation_count = 0;
+static int timer_create_SIGEV_SIGNAL_signal_handler_invocation_count;
 static void timer_create_SIGEV_SIGNAL_signal_handler(int signal_number) {
   ++timer_create_SIGEV_SIGNAL_signal_handler_invocation_count;
   ASSERT_EQ(SIGUSR1, signal_number);
@@ -212,6 +213,7 @@
   timer_t timer_id;
   ASSERT_EQ(0, timer_create(CLOCK_MONOTONIC, &se, &timer_id));
 
+  timer_create_SIGEV_SIGNAL_signal_handler_invocation_count = 0;
   ScopedSignalHandler ssh(SIGUSR1, timer_create_SIGEV_SIGNAL_signal_handler);
 
   ASSERT_EQ(0, timer_create_SIGEV_SIGNAL_signal_handler_invocation_count);
@@ -228,25 +230,26 @@
 }
 
 struct Counter {
-  volatile int value;
+ private:
+  std::atomic<int> value;
   timer_t timer_id;
   sigevent_t se;
   bool timer_valid;
 
-  Counter(void (*fn)(sigval_t)) : value(0), timer_valid(false) {
-    memset(&se, 0, sizeof(se));
-    se.sigev_notify = SIGEV_THREAD;
-    se.sigev_notify_function = fn;
-    se.sigev_value.sival_ptr = this;
-    Create();
-  }
-
   void Create() {
     ASSERT_FALSE(timer_valid);
     ASSERT_EQ(0, timer_create(CLOCK_REALTIME, &se, &timer_id));
     timer_valid = true;
   }
 
+ public:
+  Counter(void (*fn)(sigval_t)) : value(0), timer_valid(false) {
+    memset(&se, 0, sizeof(se));
+    se.sigev_notify = SIGEV_THREAD;
+    se.sigev_notify_function = fn;
+    se.sigev_value.sival_ptr = this;
+    Create();
+  }
   void DeleteTimer() {
     ASSERT_TRUE(timer_valid);
     ASSERT_EQ(0, timer_delete(timer_id));
@@ -259,12 +262,16 @@
     }
   }
 
+  int Value() const {
+    return value;
+  }
+
   void SetTime(time_t value_s, time_t value_ns, time_t interval_s, time_t interval_ns) {
     ::SetTime(timer_id, value_s, value_ns, interval_s, interval_ns);
   }
 
   bool ValueUpdated() {
-    volatile int current_value = value;
+    int current_value = value;
     time_t start = time(NULL);
     while (current_value == value && (time(NULL) - start) < 5) {
     }
@@ -287,30 +294,29 @@
 
 TEST(time, timer_settime_0) {
   Counter counter(Counter::CountAndDisarmNotifyFunction);
-  ASSERT_TRUE(counter.timer_valid);
-
-  ASSERT_EQ(0, counter.value);
+  ASSERT_EQ(0, counter.Value());
 
   counter.SetTime(0, 1, 1, 0);
   usleep(500000);
 
   // The count should just be 1 because we disarmed the timer the first time it fired.
-  ASSERT_EQ(1, counter.value);
+  ASSERT_EQ(1, counter.Value());
 }
 
 TEST(time, timer_settime_repeats) {
   Counter counter(Counter::CountNotifyFunction);
-  ASSERT_TRUE(counter.timer_valid);
-
-  ASSERT_EQ(0, counter.value);
+  ASSERT_EQ(0, counter.Value());
 
   counter.SetTime(0, 1, 0, 10);
   ASSERT_TRUE(counter.ValueUpdated());
   ASSERT_TRUE(counter.ValueUpdated());
   ASSERT_TRUE(counter.ValueUpdated());
+  counter.DeleteTimer();
+  // Add a sleep as other threads may be calling the callback function when the timer is deleted.
+  usleep(500000);
 }
 
-static int timer_create_NULL_signal_handler_invocation_count = 0;
+static int timer_create_NULL_signal_handler_invocation_count;
 static void timer_create_NULL_signal_handler(int signal_number) {
   ++timer_create_NULL_signal_handler_invocation_count;
   ASSERT_EQ(SIGALRM, signal_number);
@@ -321,6 +327,7 @@
   timer_t timer_id;
   ASSERT_EQ(0, timer_create(CLOCK_MONOTONIC, NULL, &timer_id));
 
+  timer_create_NULL_signal_handler_invocation_count = 0;
   ScopedSignalHandler ssh(SIGALRM, timer_create_NULL_signal_handler);
 
   ASSERT_EQ(0, timer_create_NULL_signal_handler_invocation_count);
@@ -367,22 +374,59 @@
 
 TEST(time, timer_create_multiple) {
   Counter counter1(Counter::CountNotifyFunction);
-  ASSERT_TRUE(counter1.timer_valid);
   Counter counter2(Counter::CountNotifyFunction);
-  ASSERT_TRUE(counter2.timer_valid);
   Counter counter3(Counter::CountNotifyFunction);
-  ASSERT_TRUE(counter3.timer_valid);
 
-  ASSERT_EQ(0, counter1.value);
-  ASSERT_EQ(0, counter2.value);
-  ASSERT_EQ(0, counter3.value);
+  ASSERT_EQ(0, counter1.Value());
+  ASSERT_EQ(0, counter2.Value());
+  ASSERT_EQ(0, counter3.Value());
 
   counter2.SetTime(0, 1, 0, 0);
   usleep(500000);
 
-  EXPECT_EQ(0, counter1.value);
-  EXPECT_EQ(1, counter2.value);
-  EXPECT_EQ(0, counter3.value);
+  EXPECT_EQ(0, counter1.Value());
+  EXPECT_EQ(1, counter2.Value());
+  EXPECT_EQ(0, counter3.Value());
+}
+
+// Test to verify that disarming a repeatable timer disables the callbacks.
+TEST(time, timer_disarm_terminates) {
+  Counter counter(Counter::CountNotifyFunction);
+  ASSERT_EQ(0, counter.Value());
+
+  counter.SetTime(0, 1, 0, 1);
+  ASSERT_TRUE(counter.ValueUpdated());
+  ASSERT_TRUE(counter.ValueUpdated());
+  ASSERT_TRUE(counter.ValueUpdated());
+
+  counter.SetTime(0, 0, 0, 0);
+  // Add a sleep as the kernel may have pending events when the timer is disarmed.
+  usleep(500000);
+  int value = counter.Value();
+  usleep(500000);
+
+  // Verify the counter has not been incremented.
+  ASSERT_EQ(value, counter.Value());
+}
+
+// Test to verify that deleting a repeatable timer disables the callbacks.
+TEST(time, timer_delete_terminates) {
+  Counter counter(Counter::CountNotifyFunction);
+  ASSERT_EQ(0, counter.Value());
+
+  counter.SetTime(0, 1, 0, 1);
+  ASSERT_TRUE(counter.ValueUpdated());
+  ASSERT_TRUE(counter.ValueUpdated());
+  ASSERT_TRUE(counter.ValueUpdated());
+
+  counter.DeleteTimer();
+  // Add a sleep as other threads may be calling the callback function when the timer is deleted.
+  usleep(500000);
+  int value = counter.Value();
+  usleep(500000);
+
+  // Verify the counter has not been incremented.
+  ASSERT_EQ(value, counter.Value());
 }
 
 struct TimerDeleteData {
@@ -499,45 +543,3 @@
   timespec out;
   ASSERT_EQ(EINVAL, clock_nanosleep(-1, 0, &in, &out));
 }
-
-// Test to verify that disarming a repeatable timer disables the
-// callbacks.
-TEST(time, timer_disarm_terminates) {
-  Counter counter(Counter::CountNotifyFunction);
-  ASSERT_TRUE(counter.timer_valid);
-
-  ASSERT_EQ(0, counter.value);
-
-  counter.SetTime(0, 1, 0, 1);
-  ASSERT_TRUE(counter.ValueUpdated());
-  ASSERT_TRUE(counter.ValueUpdated());
-  ASSERT_TRUE(counter.ValueUpdated());
-
-  counter.SetTime(0, 0, 1, 0);
-  volatile int value = counter.value;
-  usleep(500000);
-
-  // Verify the counter has not been incremented.
-  ASSERT_EQ(value, counter.value);
-}
-
-// Test to verify that deleting a repeatable timer disables the
-// callbacks.
-TEST(time, timer_delete_terminates) {
-  Counter counter(Counter::CountNotifyFunction);
-  ASSERT_TRUE(counter.timer_valid);
-
-  ASSERT_EQ(0, counter.value);
-
-  counter.SetTime(0, 1, 0, 1);
-  ASSERT_TRUE(counter.ValueUpdated());
-  ASSERT_TRUE(counter.ValueUpdated());
-  ASSERT_TRUE(counter.ValueUpdated());
-
-  counter.DeleteTimer();
-  volatile int value = counter.value;
-  usleep(500000);
-
-  // Verify the counter has not been incremented.
-  ASSERT_EQ(value, counter.value);
-}
diff --git a/tests/utils.h b/tests/utils.h
new file mode 100644
index 0000000..fd012a3
--- /dev/null
+++ b/tests/utils.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_UTILS_H
+#define __TEST_UTILS_H
+#include <inttypes.h>
+#include <sys/mman.h>
+
+#include "private/ScopeGuard.h"
+
+struct map_record {
+  uintptr_t addr_start;
+  uintptr_t addr_end;
+
+  int perms;
+
+  size_t offset;
+
+  dev_t device;
+  ino_t inode;
+
+  std::string pathname;
+};
+
+class Maps {
+ public:
+  static bool parse_maps(std::vector<map_record>* maps) {
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/self/task/%d/maps", getpid());
+    FILE* fp = fopen(path, "re");
+    if (fp == nullptr) {
+      return false;
+    }
+
+    auto fp_guard = make_scope_guard([&]() {
+      fclose(fp);
+    });
+
+    char line[BUFSIZ];
+    while (fgets(line, sizeof(line), fp) != nullptr) {
+      map_record record;
+      uint32_t dev_major, dev_minor;
+      char pathstr[BUFSIZ];
+      char prot[5]; // sizeof("rwxp")
+      if (sscanf(line, "%" SCNxPTR "-%" SCNxPTR " %4s %" SCNxPTR " %x:%x %lu %s",
+            &record.addr_start, &record.addr_end, prot, &record.offset,
+            &dev_major, &dev_minor, &record.inode, pathstr) == 8) {
+        record.perms = 0;
+        if (prot[0] == 'r') {
+          record.perms |= PROT_READ;
+        }
+        if (prot[1] == 'w') {
+          record.perms |= PROT_WRITE;
+        }
+        if (prot[2] == 'x') {
+          record.perms |= PROT_EXEC;
+        }
+
+        // TODO: parse shared/private?
+
+        record.device = makedev(dev_major, dev_minor);
+        record.pathname = pathstr;
+        maps->push_back(record);
+      }
+    }
+
+    return true;
+  }
+};
+
+#endif
diff --git a/tools/bionicbb/README.md b/tools/bionicbb/README.md
index 91f64d8..a285984 100644
--- a/tools/bionicbb/README.md
+++ b/tools/bionicbb/README.md
@@ -8,11 +8,11 @@
 ------------
 
  * Python 2.7
+ * [Advanced Python Scheduler](https://apscheduler.readthedocs.org/en/latest/)
  * [Flask](http://flask.pocoo.org/)
  * [Google API Client Library](https://developers.google.com/api-client-library/python/start/installation)
  * [jenkinsapi](https://pypi.python.org/pypi/jenkinsapi)
  * [Requests](http://docs.python-requests.org/en/latest/)
- * [termcolor](https://pypi.python.org/pypi/termcolor)
 
 Setup
 -----
diff --git a/tools/bionicbb/build_listener.py b/tools/bionicbb/bionicbb.py
similarity index 74%
rename from tools/bionicbb/build_listener.py
rename to tools/bionicbb/bionicbb.py
index f7f52ed..a786b27 100644
--- a/tools/bionicbb/build_listener.py
+++ b/tools/bionicbb/bionicbb.py
@@ -15,12 +15,16 @@
 # limitations under the License.
 #
 import json
+import logging
+import os
+
+from apscheduler.schedulers.background import BackgroundScheduler
+from flask import Flask, request
 import requests
-import termcolor
 
 import gerrit
+import tasks
 
-from flask import Flask, request
 app = Flask(__name__)
 
 
@@ -43,7 +47,7 @@
     ref = params['REF']
     patch_set = ref.split('/')[-1]
 
-    print '{} #{} {}: {}'.format(name, number, status, full_url)
+    logging.debug('%s #%s %s: %s', name, number, status, full_url)
 
     # bionic-lint is always broken, so we don't want to reject changes for
     # those failures until we clean things up.
@@ -69,19 +73,19 @@
                                                                     patch_set))
 
         headers = {'Content-Type': 'application/json;charset=UTF-8'}
-        print 'POST {}: {}'.format(url, request_data)
-        print requests.post(url, headers=headers, json=request_data)
+        logging.debug('POST %s: %s', url, request_data)
+        requests.post(url, headers=headers, json=request_data)
     elif name == 'clean-bionic-presubmit':
         request_data = {'message': 'out/ directory removed'}
         url = gerrit_url('/a/changes/{}/revisions/{}/review'.format(change_id,
                                                                     patch_set))
         headers = {'Content-Type': 'application/json;charset=UTF-8'}
-        print 'POST {}: {}'.format(url, request_data)
-        print requests.post(url, headers=headers, json=request_data)
+        logging.debug('POST %s: %s', url, request_data)
+        requests.post(url, headers=headers, json=request_data)
     elif name == 'bionic-lint':
-        print 'IGNORED'
+        logging.warning('Result for bionic-lint ignored')
     else:
-        print '{}: {}'.format(termcolor.colored('red', 'UNKNOWN'), name)
+        logging.error('Unknown project: %s', name)
     return ''
 
 
@@ -100,19 +104,31 @@
         bb_review = 0
 
     if bb_review >= 0:
-        print 'No rejection to drop: {} {}'.format(change_id, patch_set)
+        logging.info('No rejection to drop: %s %s', change_id, patch_set)
         return ''
 
-    print 'Dropping rejection: {} {}'.format(change_id, patch_set)
+    logging.info('Dropping rejection: %s %s', change_id, patch_set)
 
     request_data = {'labels': {'Verified': 0}}
     url = gerrit_url('/a/changes/{}/revisions/{}/review'.format(change_id,
                                                                 patch_set))
     headers = {'Content-Type': 'application/json;charset=UTF-8'}
-    print 'POST {}: {}'.format(url, request_data)
-    print requests.post(url, headers=headers, json=request_data)
+    logging.debug('POST %s: %s', url, request_data)
+    requests.post(url, headers=headers, json=request_data)
     return ''
 
 
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger()
+    fh = logging.FileHandler('bionicbb.log')
+    fh.setLevel(logging.INFO)
+    logger.addHandler(fh)
+
+    # Prevent the job from being rescheduled by the reloader.
+    if os.environ.get('WERKZEUG_RUN_MAIN') == 'true':
+        scheduler = BackgroundScheduler()
+        scheduler.start()
+        scheduler.add_job(tasks.get_and_process_jobs, 'interval', minutes=5)
+
     app.run(host='0.0.0.0', debug=True)
diff --git a/tools/bionicbb/gerrit.py b/tools/bionicbb/gerrit.py
index 76e42b4..9c62c6a 100644
--- a/tools/bionicbb/gerrit.py
+++ b/tools/bionicbb/gerrit.py
@@ -29,6 +29,12 @@
         call('/changes/{}/revisions/{}/commit'.format(change_id, revision)))
 
 
+def get_files_for_revision(change_id, revision):
+    return json.loads(
+        call('/changes/{}/revisions/{}/files'.format(
+            change_id, revision))).keys()
+
+
 def call(endpoint, method='GET'):
     if method != 'GET':
         raise NotImplementedError('Currently only HTTP GET is supported.')
@@ -62,8 +68,8 @@
         }
     }
     """
-    details = call('/changes/{}/revisions/{}/review'.format(
-        change_id, patch_set))
+    details = json.loads(call('/changes/{}/revisions/{}/review'.format(
+        change_id, patch_set)))
     labels = {'Code-Review': {}, 'Verified': {}}
     for review in details['labels']['Code-Review']['all']:
         if 'value' in review and 'email' in review:
diff --git a/tools/bionicbb/gmail.py b/tools/bionicbb/gmail.py
new file mode 100644
index 0000000..f088ad6
--- /dev/null
+++ b/tools/bionicbb/gmail.py
@@ -0,0 +1,71 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import base64
+import httplib2
+
+import config
+
+
+def get_body(msg):
+    if 'attachmentId' in msg['payload']['body']:
+        raise NotImplementedError('Handling of messages contained in '
+                                  'attachments not yet implemented.')
+    b64_body = msg['payload']['body']['data']
+    return base64.urlsafe_b64decode(b64_body.encode('ASCII'))
+
+
+def build_service():
+    from apiclient.discovery import build
+    from oauth2client.client import flow_from_clientsecrets
+    from oauth2client.file import Storage
+    from oauth2client.tools import run
+
+    OAUTH_SCOPE = 'https://www.googleapis.com/auth/gmail.modify'
+    STORAGE = Storage('oauth.storage')
+
+    # Start the OAuth flow to retrieve credentials
+    flow = flow_from_clientsecrets(config.client_secret_file,
+                                   scope=OAUTH_SCOPE)
+    http = httplib2.Http()
+
+    # Try to retrieve credentials from storage or run the flow to generate them
+    credentials = STORAGE.get()
+    if credentials is None or credentials.invalid:
+        credentials = run(flow, STORAGE, http=http)
+
+    http = credentials.authorize(http)
+    return build('gmail', 'v1', http=http)
+
+
+def get_gerrit_label(labels):
+    for label in labels:
+        if label['name'] == 'gerrit':
+            return label['id']
+    return None
+
+
+def get_all_messages(service, label):
+    msgs = []
+    response = service.users().messages().list(
+        userId='me', labelIds=label).execute()
+    if 'messages' in response:
+        msgs.extend(response['messages'])
+    while 'nextPageToken' in response:
+        page_token = response['nextPageToken']
+        response = service.users().messages().list(
+            userId='me', pageToken=page_token).execute()
+        msgs.extend(response['messages'])
+    return msgs
diff --git a/tools/bionicbb/gmail_listener.py b/tools/bionicbb/gmail_listener.py
deleted file mode 100644
index 0cd31c9..0000000
--- a/tools/bionicbb/gmail_listener.py
+++ /dev/null
@@ -1,363 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright (C) 2015 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import base64
-import httplib
-import httplib2
-import jenkinsapi
-import json
-import re
-import requests
-import termcolor
-import socket
-import sys
-import time
-
-import apiclient.errors
-
-import config
-import gerrit
-
-
-class GmailError(RuntimeError):
-    def __init__(self, message):
-        super(GmailError, self).__init__(message)
-
-
-def get_gerrit_label(labels):
-    for label in labels:
-        if label['name'] == 'gerrit':
-            return label['id']
-    return None
-
-
-def get_headers(msg):
-    headers = {}
-    for hdr in msg['payload']['headers']:
-        headers[hdr['name']] = hdr['value']
-    return headers
-
-
-def should_skip_message(info):
-    if info['MessageType'] in ('newchange', 'newpatchset', 'comment'):
-        commit = gerrit.get_commit(info['Change-Id'], info['PatchSet'])
-        committer = commit['committer']['email']
-        return not committer.endswith('@google.com')
-    else:
-        raise ValueError('should_skip_message() is only valid for new '
-                         'changes, patch sets, and commits.')
-
-
-def build_service():
-    from apiclient.discovery import build
-    from oauth2client.client import flow_from_clientsecrets
-    from oauth2client.file import Storage
-    from oauth2client.tools import run
-
-    OAUTH_SCOPE = 'https://www.googleapis.com/auth/gmail.modify'
-    STORAGE = Storage('oauth.storage')
-
-    # Start the OAuth flow to retrieve credentials
-    flow = flow_from_clientsecrets(config.client_secret_file,
-                                   scope=OAUTH_SCOPE)
-    http = httplib2.Http()
-
-    # Try to retrieve credentials from storage or run the flow to generate them
-    credentials = STORAGE.get()
-    if credentials is None or credentials.invalid:
-        credentials = run(flow, STORAGE, http=http)
-
-    http = credentials.authorize(http)
-    return build('gmail', 'v1', http=http)
-
-
-def get_all_messages(service, label):
-    msgs = []
-    response = service.users().messages().list(
-        userId='me', labelIds=label).execute()
-    if 'messages' in response:
-        msgs.extend(response['messages'])
-    while 'nextPageToken' in response:
-        page_token = response['nextPageToken']
-        response = service.users().messages().list(
-            userId='me', pageToken=page_token).execute()
-        msgs.extend(response['messages'])
-    return msgs
-
-
-def get_body(msg):
-    if 'attachmentId' in msg['payload']['body']:
-        raise NotImplementedError('Handling of messages contained in '
-                                  'attachments not yet implemented.')
-    b64_body = msg['payload']['body']['data']
-    return base64.urlsafe_b64decode(b64_body.encode('ASCII'))
-
-
-def get_gerrit_info(body):
-    info = {}
-    gerrit_pattern = r'^Gerrit-(\S+): (.+)$'
-    for match in re.finditer(gerrit_pattern, body, flags=re.MULTILINE):
-        info[match.group(1)] = match.group(2).strip()
-    return info
-
-
-def clean_project(gerrit_info, dry_run):
-    username = config.jenkins_credentials['username']
-    password = config.jenkins_credentials['password']
-    jenkins_url = config.jenkins_url
-    jenkins = jenkinsapi.api.Jenkins(jenkins_url, username, password)
-
-    build = 'clean-bionic-presubmit'
-    if build in jenkins:
-        if not dry_run:
-            job = jenkins[build].invoke()
-            url = job.get_build().baseurl
-        else:
-            url = 'DRY_RUN_URL'
-        print '{}({}): {} {}'.format(
-            termcolor.colored('CLEAN', 'green'),
-            gerrit_info['MessageType'],
-            build,
-            url)
-    else:
-        print '{}({}): {}'.format(
-            termcolor.colored('CLEAN', 'red'),
-            gerrit_info['MessageType'],
-            termcolor.colored(build, 'red'))
-    return True
-
-
-def build_project(gerrit_info, dry_run, lunch_target=None):
-    project_to_jenkins_map = {
-        'platform/bionic': 'bionic-presubmit',
-        'platform/build': 'bionic-presubmit',
-        'platform/external/jemalloc': 'bionic-presubmit',
-        'platform/external/libcxx': 'bionic-presubmit',
-        'platform/external/libcxxabi': 'bionic-presubmit',
-        'platform/external/compiler-rt': 'bionic-presubmit',
-    }
-
-    username = config.jenkins_credentials['username']
-    password = config.jenkins_credentials['password']
-    jenkins_url = config.jenkins_url
-    jenkins = jenkinsapi.api.Jenkins(jenkins_url, username, password)
-
-    project = gerrit_info['Project']
-    change_id = gerrit_info['Change-Id']
-    if project in project_to_jenkins_map:
-        build = project_to_jenkins_map[project]
-    else:
-        build = 'bionic-presubmit'
-
-    if build in jenkins:
-        project_path = '/'.join(project.split('/')[1:])
-        if not project_path:
-            raise RuntimeError('bogus project: {}'.format(project))
-        if project_path.startswith('platform/'):
-            print '{}({}): {} => {}'.format(
-                termcolor.colored('ERROR', 'red'),
-                'project',
-                project,
-                project_path)
-            return False
-        try:
-            ref = gerrit.ref_for_change(change_id)
-        except gerrit.GerritError as ex:
-            print '{}({}): {} {}'.format(
-                termcolor.colored('GERRIT-ERROR', 'red'),
-                ex.code,
-                change_id,
-                ex.url)
-            return False
-        params = {
-            'REF': ref,
-            'CHANGE_ID': change_id,
-            'PROJECT': project_path
-        }
-        if lunch_target is not None:
-            params['LUNCH_TARGET'] = lunch_target
-        if not dry_run:
-            job = jenkins[build].invoke(build_params=params)
-            url = job.get_build().baseurl
-        else:
-            url = 'DRY_RUN_URL'
-        print '{}({}): {} => {} {} {}'.format(
-            termcolor.colored('BUILD', 'green'),
-            gerrit_info['MessageType'],
-            project,
-            build,
-            url,
-            change_id)
-    else:
-        print '{}({}): {} => {} {}'.format(
-            termcolor.colored('BUILD', 'red'),
-            gerrit_info['MessageType'],
-            project,
-            termcolor.colored(build, 'red'),
-            change_id)
-    return True
-
-
-def handle_change(gerrit_info, _, dry_run):
-    if should_skip_message(gerrit_info):
-        return True
-    return build_project(gerrit_info, dry_run)
-handle_newchange = handle_change
-handle_newpatchset = handle_change
-
-
-def drop_rejection(gerrit_info, dry_run):
-    request_data = {
-        'changeid': gerrit_info['Change-Id'],
-        'patchset': gerrit_info['PatchSet']
-    }
-    url = '{}/{}'.format(config.build_listener_url, 'drop-rejection')
-    headers = {'Content-Type': 'application/json;charset=UTF-8'}
-    if not dry_run:
-        try:
-            requests.post(url, headers=headers, data=json.dumps(request_data))
-        except requests.exceptions.ConnectionError as ex:
-            print '{}(drop-rejection): {}'.format(
-                termcolor.colored('ERROR', 'red'), ex)
-            return False
-    print '{}({}): {}'.format(
-        termcolor.colored('CHECK', 'green'),
-        gerrit_info['MessageType'],
-        gerrit_info['Change-Id'])
-    return True
-
-
-def handle_comment(gerrit_info, body, dry_run):
-    if 'Verified+1' in body:
-        drop_rejection(gerrit_info, dry_run)
-
-    # TODO(danalbert): Needs to be based on the account that made the comment.
-    if should_skip_message(gerrit_info):
-        return True
-
-    command_map = {
-        'clean': lambda: clean_project(gerrit_info, dry_run),
-        'retry': lambda: build_project(gerrit_info, dry_run),
-
-        'arm': lambda: build_project(gerrit_info, dry_run,
-                                     lunch_target='aosp_arm-eng'),
-        'aarch64': lambda: build_project(gerrit_info, dry_run,
-                                         lunch_target='aosp_arm64-eng'),
-        'mips': lambda: build_project(gerrit_info, dry_run,
-                                      lunch_target='aosp_mips-eng'),
-        'mips64': lambda: build_project(gerrit_info, dry_run,
-                                        lunch_target='aosp_mips64-eng'),
-        'x86': lambda: build_project(gerrit_info, dry_run,
-                                     lunch_target='aosp_x86-eng'),
-        'x86_64': lambda: build_project(gerrit_info, dry_run,
-                                        lunch_target='aosp_x86_64-eng'),
-    }
-
-    def handle_unknown_command():
-        pass    # TODO(danalbert): should complain to the commenter.
-
-    commands = [match.group(1).strip() for match in
-                re.finditer(r'^bionicbb:\s*(.+)$', body, flags=re.MULTILINE)]
-
-    for command in commands:
-        if command in command_map:
-            command_map[command]()
-        else:
-            handle_unknown_command()
-
-    return True
-
-
-def skip_handler(gerrit_info, _, __):
-    print '{}({}): {}'.format(
-        termcolor.colored('SKIP', 'yellow'),
-        gerrit_info['MessageType'],
-        gerrit_info['Change-Id'])
-    return True
-handle_abandon = skip_handler
-handle_merged = skip_handler
-handle_restore = skip_handler
-handle_revert = skip_handler
-
-
-def process_message(msg, dry_run):
-    try:
-        body = get_body(msg)
-        gerrit_info = get_gerrit_info(body)
-        if not gerrit_info:
-            print termcolor.colored('No info found: {}'.format(msg['id']),
-                                    'red')
-        msg_type = gerrit_info['MessageType']
-        handler = 'handle_{}'.format(gerrit_info['MessageType'])
-        if handler in globals():
-            return globals()[handler](gerrit_info, body, dry_run)
-        else:
-            print termcolor.colored(
-                'MessageType {} unhandled.'.format(msg_type), 'red')
-        print
-        return False
-    except NotImplementedError as ex:
-        print ex
-        return False
-    except gerrit.GerritError as ex:
-        if ex.code == 404:
-            print '{}(404): {}!'.format(
-                termcolor.colored('ERROR', 'red'), ex)
-            return True
-        else:
-            return False
-
-
-def main(argc, argv):
-    dry_run = False
-    if argc == 2 and argv[1] == '--dry-run':
-        dry_run = True
-    elif argc > 2:
-        sys.exit('usage: python {} [--dry-run]'.format(argv[0]))
-
-    gmail_service = build_service()
-    msg_service = gmail_service.users().messages()
-
-    while True:
-        try:
-            labels = gmail_service.users().labels().list(userId='me').execute()
-            if not labels['labels']:
-                raise GmailError('Could not retrieve Gmail labels')
-            label_id = get_gerrit_label(labels['labels'])
-            if not label_id:
-                raise GmailError('Could not find gerrit label')
-
-            for msg in get_all_messages(gmail_service, label_id):
-                msg = msg_service.get(userId='me', id=msg['id']).execute()
-                if process_message(msg, dry_run) and not dry_run:
-                    msg_service.trash(userId='me', id=msg['id']).execute()
-            time.sleep(60 * 5)
-        except GmailError as ex:
-            print '{}: {}!'.format(termcolor.colored('ERROR', 'red'), ex)
-            time.sleep(60 * 5)
-        except apiclient.errors.HttpError as ex:
-            print '{}: {}!'.format(termcolor.colored('ERROR', 'red'), ex)
-            time.sleep(60 * 5)
-        except httplib.BadStatusLine:
-            pass
-        except httplib2.ServerNotFoundError:
-            pass
-        except socket.error:
-            pass
-
-
-if __name__ == '__main__':
-    main(len(sys.argv), sys.argv)
diff --git a/tools/bionicbb/presubmit.py b/tools/bionicbb/presubmit.py
new file mode 100644
index 0000000..cc6f3cc
--- /dev/null
+++ b/tools/bionicbb/presubmit.py
@@ -0,0 +1,203 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import absolute_import
+
+import json
+import logging
+import os.path
+import re
+import requests
+
+import jenkinsapi
+
+import gerrit
+
+import config
+
+
+def is_untrusted_committer(change_id, patch_set):
+    # TODO(danalbert): Needs to be based on the account that made the comment.
+    commit = gerrit.get_commit(change_id, patch_set)
+    committer = commit['committer']['email']
+    return not committer.endswith('@google.com')
+
+
+def contains_cleanspec(change_id, patch_set):
+    files = gerrit.get_files_for_revision(change_id, patch_set)
+    return 'CleanSpec.mk' in [os.path.basename(f) for f in files]
+
+
+def contains_bionicbb(change_id, patch_set):
+    files = gerrit.get_files_for_revision(change_id, patch_set)
+    return any('tools/bionicbb' in f for f in files)
+
+
+def should_skip_build(info):
+    if info['MessageType'] not in ('newchange', 'newpatchset', 'comment'):
+        raise ValueError('should_skip_build() is only valid for new '
+                         'changes, patch sets, and commits.')
+
+    change_id = info['Change-Id']
+    patch_set = info['PatchSet']
+
+    checks = [
+        is_untrusted_committer,
+        contains_cleanspec,
+        contains_bionicbb,
+    ]
+    for check in checks:
+        if check(change_id, patch_set):
+            return True
+    return False
+
+
+def clean_project(dry_run):
+    username = config.jenkins_credentials['username']
+    password = config.jenkins_credentials['password']
+    jenkins_url = config.jenkins_url
+    jenkins = jenkinsapi.api.Jenkins(jenkins_url, username, password)
+
+    build = 'clean-bionic-presubmit'
+    if build in jenkins:
+        if not dry_run:
+            job = jenkins[build].invoke()
+            url = job.get_build().baseurl
+        else:
+            url = 'DRY_RUN_URL'
+        logging.info('Cleaning: %s %s', build, url)
+    else:
+        logging.error('Failed to clean: could not find project %s', build)
+    return True
+
+
+def build_project(gerrit_info, dry_run, lunch_target=None):
+    project_to_jenkins_map = {
+        'platform/bionic': 'bionic-presubmit',
+        'platform/build': 'bionic-presubmit',
+        'platform/external/jemalloc': 'bionic-presubmit',
+        'platform/external/libcxx': 'bionic-presubmit',
+        'platform/external/libcxxabi': 'bionic-presubmit',
+        'platform/external/compiler-rt': 'bionic-presubmit',
+    }
+
+    username = config.jenkins_credentials['username']
+    password = config.jenkins_credentials['password']
+    jenkins_url = config.jenkins_url
+    jenkins = jenkinsapi.api.Jenkins(jenkins_url, username, password)
+
+    project = gerrit_info['Project']
+    change_id = gerrit_info['Change-Id']
+    if project in project_to_jenkins_map:
+        build = project_to_jenkins_map[project]
+    else:
+        build = 'bionic-presubmit'
+
+    if build in jenkins:
+        project_path = '/'.join(project.split('/')[1:])
+        if not project_path:
+            raise RuntimeError('bogus project: {}'.format(project))
+        if project_path.startswith('platform/'):
+            raise RuntimeError('Bad project mapping: {} => {}'.format(
+                project, project_path))
+        ref = gerrit.ref_for_change(change_id)
+        params = {
+            'REF': ref,
+            'CHANGE_ID': change_id,
+            'PROJECT': project_path
+        }
+        if lunch_target is not None:
+            params['LUNCH_TARGET'] = lunch_target
+        if not dry_run:
+            _ = jenkins[build].invoke(build_params=params)
+            # https://issues.jenkins-ci.org/browse/JENKINS-27256
+            # url = job.get_build().baseurl
+            url = 'URL UNAVAILABLE'
+        else:
+            url = 'DRY_RUN_URL'
+        logging.info('Building: %s => %s %s %s', project, build, url,
+                     change_id)
+    else:
+        logging.error('Unknown build: %s => %s %s', project, build, change_id)
+    return True
+
+
+def handle_change(gerrit_info, _, dry_run):
+    if should_skip_build(gerrit_info):
+        return True
+    return build_project(gerrit_info, dry_run)
+
+
+def drop_rejection(gerrit_info, dry_run):
+    request_data = {
+        'changeid': gerrit_info['Change-Id'],
+        'patchset': gerrit_info['PatchSet']
+    }
+    url = '{}/{}'.format(config.build_listener_url, 'drop-rejection')
+    headers = {'Content-Type': 'application/json;charset=UTF-8'}
+    if not dry_run:
+        try:
+            requests.post(url, headers=headers, data=json.dumps(request_data))
+        except requests.exceptions.ConnectionError as ex:
+            logging.error('Failed to drop rejection: %s', ex)
+            return False
+    logging.info('Dropped rejection: %s', gerrit_info['Change-Id'])
+    return True
+
+
+def handle_comment(gerrit_info, body, dry_run):
+    if 'Verified+1' in body:
+        drop_rejection(gerrit_info, dry_run)
+
+    if should_skip_build(gerrit_info):
+        return True
+
+    command_map = {
+        'clean': lambda: clean_project(dry_run),
+        'retry': lambda: build_project(gerrit_info, dry_run),
+
+        'arm': lambda: build_project(gerrit_info, dry_run,
+                                     lunch_target='aosp_arm-eng'),
+        'aarch64': lambda: build_project(gerrit_info, dry_run,
+                                         lunch_target='aosp_arm64-eng'),
+        'mips': lambda: build_project(gerrit_info, dry_run,
+                                      lunch_target='aosp_mips-eng'),
+        'mips64': lambda: build_project(gerrit_info, dry_run,
+                                        lunch_target='aosp_mips64-eng'),
+        'x86': lambda: build_project(gerrit_info, dry_run,
+                                     lunch_target='aosp_x86-eng'),
+        'x86_64': lambda: build_project(gerrit_info, dry_run,
+                                        lunch_target='aosp_x86_64-eng'),
+    }
+
+    def handle_unknown_command():
+        pass    # TODO(danalbert): should complain to the commenter.
+
+    commands = [match.group(1).strip() for match in
+                re.finditer(r'^bionicbb:\s*(.+)$', body, flags=re.MULTILINE)]
+
+    for command in commands:
+        if command in command_map:
+            command_map[command]()
+        else:
+            handle_unknown_command()
+
+    return True
+
+
+def skip_handler(gerrit_info, _, __):
+    logging.info('Skipping %s: %s', gerrit_info['MessageType'],
+                 gerrit_info['Change-Id'])
+    return True
diff --git a/tools/bionicbb/tasks.py b/tools/bionicbb/tasks.py
new file mode 100644
index 0000000..4c39a98
--- /dev/null
+++ b/tools/bionicbb/tasks.py
@@ -0,0 +1,108 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import httplib
+import httplib2
+import logging
+import re
+import socket
+
+import apiclient.errors
+
+import gerrit
+import gmail
+import presubmit
+
+
+def get_gerrit_info(body):
+    info = {}
+    gerrit_pattern = r'^Gerrit-(\S+): (.+)$'
+    for match in re.finditer(gerrit_pattern, body, flags=re.MULTILINE):
+        info[match.group(1)] = match.group(2).strip()
+    return info
+
+
+def process_message(msg, dry_run):
+    try:
+        body = gmail.get_body(msg)
+        gerrit_info = get_gerrit_info(body)
+        if not gerrit_info:
+            logging.fatal('No Gerrit info found: %s', msg.subject)
+        msg_type = gerrit_info['MessageType']
+        handlers = {
+            'comment': presubmit.handle_comment,
+            'newchange': presubmit.handle_change,
+            'newpatchset': presubmit.handle_change,
+
+            'abandon': presubmit.skip_handler,
+            'merge-failed': presubmit.skip_handler,
+            'merged': presubmit.skip_handler,
+            'restore': presubmit.skip_handler,
+            'revert': presubmit.skip_handler,
+        }
+
+        message_type = gerrit_info['MessageType']
+        if message_type in handlers:
+            return handlers[message_type](gerrit_info, body, dry_run)
+        else:
+            logging.warning('MessageType %s unhandled.', msg_type)
+        return False
+    except NotImplementedError as ex:
+        logging.error("%s", ex)
+        return False
+    except gerrit.GerritError as ex:
+        change_id = gerrit_info['Change-Id']
+        logging.error('Gerrit error (%d): %s %s', ex.code, change_id, ex.url)
+        return ex.code == 404
+
+
+def get_and_process_jobs():
+    dry_run = False
+
+    gmail_service = gmail.build_service()
+    msg_service = gmail_service.users().messages()
+
+    # We run in a loop because some of the exceptions thrown here mean we just
+    # need to retry. For errors where we should back off (typically any gmail
+    # API exceptions), process_changes catches the error and returns normally.
+    while True:
+        try:
+            process_changes(gmail_service, msg_service, dry_run)
+            return
+        except httplib.BadStatusLine:
+            pass
+        except httplib2.ServerNotFoundError:
+            pass
+        except socket.error:
+            pass
+
+
+def process_changes(gmail_service, msg_service, dry_run):
+    try:
+        labels = gmail_service.users().labels().list(userId='me').execute()
+        if not labels['labels']:
+            logging.error('Could not retrieve Gmail labels')
+            return
+        label_id = gmail.get_gerrit_label(labels['labels'])
+        if not label_id:
+            logging.error('Could not find gerrit label')
+            return
+
+        for msg in gmail.get_all_messages(gmail_service, label_id):
+            msg = msg_service.get(userId='me', id=msg['id']).execute()
+            if process_message(msg, dry_run) and not dry_run:
+                msg_service.trash(userId='me', id=msg['id']).execute()
+    except apiclient.errors.HttpError as ex:
+        logging.error('API Client HTTP error: %s', ex)
diff --git a/tools/bionicbb/test_gmail_listener.py b/tools/bionicbb/test_gmail_listener.py
deleted file mode 100644
index 6545cdc..0000000
--- a/tools/bionicbb/test_gmail_listener.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import gmail_listener
-import mock
-import unittest
-
-
-class TestShouldSkipMessage(unittest.TestCase):
-    def test_accepts_googlers(self):
-        for message_type in ('newchange', 'newpatchset', 'comment'):
-            with mock.patch('gerrit.get_commit') as mock_commit:
-                mock_commit.return_value = {
-                    'committer': {'email': 'googler@google.com'}
-                }
-
-                self.assertFalse(gmail_listener.should_skip_message({
-                    'MessageType': message_type,
-                    'Change-Id': '',
-                    'PatchSet': '',
-                }))
-
-    def test_rejects_non_googlers(self):
-        for message_type in ('newchange', 'newpatchset', 'comment'):
-            with mock.patch('gerrit.get_commit') as mock_commit:
-                mock_commit.return_value = {
-                    'committer': {'email': 'fakegoogler@google.com.fake.com'}
-                }
-
-                self.assertTrue(gmail_listener.should_skip_message({
-                    'MessageType': message_type,
-                    'Change-Id': '',
-                    'PatchSet': '',
-                }))
-
-            with mock.patch('gerrit.get_commit') as mock_commit:
-                mock_commit.return_value = {
-                    'committer': {'email': 'johndoe@example.com'}
-                }
-
-                self.assertTrue(gmail_listener.should_skip_message({
-                    'MessageType': message_type,
-                    'Change-Id': '',
-                    'PatchSet': '',
-                }))
-
-    def test_calls_gerrit_get_commit(self):  # pylint: disable=no-self-use
-        for message_type in ('newchange', 'newpatchset', 'comment'):
-            with mock.patch('gerrit.get_commit') as mock_commit:
-                gmail_listener.should_skip_message({
-                    'MessageType': message_type,
-                    'Change-Id': 'foo',
-                    'PatchSet': 'bar',
-                })
-            mock_commit.assert_called_once_with('foo', 'bar')
-
-            with mock.patch('gerrit.get_commit') as mock_commit:
-                gmail_listener.should_skip_message({
-                    'MessageType': message_type,
-                    'Change-Id': 'baz',
-                    'PatchSet': 'qux',
-                })
-            mock_commit.assert_called_once_with('baz', 'qux')
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tools/bionicbb/test_tasks.py b/tools/bionicbb/test_tasks.py
new file mode 100644
index 0000000..b36cbad
--- /dev/null
+++ b/tools/bionicbb/test_tasks.py
@@ -0,0 +1,94 @@
+import mock
+import unittest
+
+import presubmit
+
+
+class TestShouldSkipBuild(unittest.TestCase):
+    @mock.patch('presubmit.contains_bionicbb')
+    @mock.patch('presubmit.contains_cleanspec')
+    @mock.patch('gerrit.get_commit')
+    def test_accepts_googlers(self, mock_commit, *other_checks):
+        mock_commit.return_value = {
+            'committer': {'email': 'googler@google.com'}
+        }
+
+        for other_check in other_checks:
+            other_check.return_value = False
+
+        for message_type in ('newchange', 'newpatchset', 'comment'):
+            self.assertFalse(presubmit.should_skip_build({
+                'MessageType': message_type,
+                'Change-Id': '',
+                'PatchSet': '',
+            }))
+
+    @mock.patch('presubmit.contains_bionicbb')
+    @mock.patch('presubmit.contains_cleanspec')
+    @mock.patch('gerrit.get_commit')
+    def test_rejects_googlish_domains(self, mock_commit, *other_checks):
+        mock_commit.return_value = {
+            'committer': {'email': 'fakegoogler@google.com.fake.com'}
+        }
+
+        for other_check in other_checks:
+            other_check.return_value = False
+
+        for message_type in ('newchange', 'newpatchset', 'comment'):
+            self.assertTrue(presubmit.should_skip_build({
+                'MessageType': message_type,
+                'Change-Id': '',
+                'PatchSet': '',
+            }))
+
+    @mock.patch('presubmit.contains_bionicbb')
+    @mock.patch('presubmit.contains_cleanspec')
+    @mock.patch('gerrit.get_commit')
+    def test_rejects_non_googlers(self, mock_commit, *other_checks):
+        mock_commit.return_value = {
+            'committer': {'email': 'johndoe@example.com'}
+        }
+
+        for other_check in other_checks:
+            other_check.return_value = False
+
+        for message_type in ('newchange', 'newpatchset', 'comment'):
+            self.assertTrue(presubmit.should_skip_build({
+                'MessageType': message_type,
+                'Change-Id': '',
+                'PatchSet': '',
+            }))
+
+    @mock.patch('presubmit.contains_bionicbb')
+    @mock.patch('presubmit.is_untrusted_committer')
+    @mock.patch('gerrit.get_files_for_revision')
+    def test_skips_cleanspecs(self, mock_files, *other_checks):
+        mock_files.return_value = ['foo/CleanSpec.mk']
+        for other_check in other_checks:
+            other_check.return_value = False
+
+        for message_type in ('newchange', 'newpatchset', 'comment'):
+            self.assertTrue(presubmit.should_skip_build({
+                'MessageType': message_type,
+                'Change-Id': '',
+                'PatchSet': '',
+            }))
+
+    @mock.patch('presubmit.contains_cleanspec')
+    @mock.patch('presubmit.is_untrusted_committer')
+    @mock.patch('gerrit.get_files_for_revision')
+    def test_skips_bionicbb(self, mock_files, *other_checks):
+        mock_files.return_value = ['tools/bionicbb/common.sh']
+        for other_check in other_checks:
+            other_check.return_value = False
+
+        for message_type in ('newchange', 'newpatchset', 'comment'):
+            self.assertTrue(presubmit.should_skip_build({
+                'MessageType': message_type,
+                'Change-Id': '',
+                'PatchSet': '',
+            }))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/relocation_packer/README.TXT b/tools/relocation_packer/README.TXT
deleted file mode 100644
index 071ab5d..0000000
--- a/tools/relocation_packer/README.TXT
+++ /dev/null
@@ -1,135 +0,0 @@
-Introduction:
--------------
-
-Relative relocations are the bulk of dynamic relocations (the .rel.dyn
-or .rela.dyn sections) in libchrome.<version>.so.  The ELF standard
-representation of them is wasteful.
-
-Packing uses a combination of run length encoding, delta encoding, and LEB128
-encoding to store them more efficiently.  Packed relocations are placed in
-a new .android.rel.dyn or .android.rela.dyn section.  Packing reduces
-the footprint of libchrome.<version>.so in the filesystem, in APK downloads,
-and in memory when loaded on the device.
-
-A packed libchrome.<version>.so is designed so that it can be loaded directly
-on Android, but requires the explicit support of a crazy linker that has been
-extended to understand packed relocations.  Packed relocations are currently
-only supported on ARM.
-
-A packed libchrome.<version>.so cannot currently be used with the standard
-Android runtime linker.
-
-See src/*.h for design and implementation notes.
-
-
-Notes:
-------
-
-Packing does not adjust debug data.  An unstripped libchrome.<version>.so
-can be packed and will run, but may no longer be useful for debugging.
-
-Unpacking on the device requires the explicit support of an extended crazy
-linker.  Adds the following new .dynamic tags, used by the crazy linker to
-find the packed .android.rel.dyn or .android.rela.dyn section data:
-
-  DT_ANDROID_REL_OFFSET = DT_LOOS    (Operating System specific: 0x6000000d)
-    - The offset of packed relocation data in libchrome.<version>.so
-  DT_ANDROID_REL_SIZE = DT_LOOS + 1  (Operating System Specific: 0x6000000e)
-    - The size of packed relocation data in bytes
-
-32 bit ARM libraries use relocations without addends.  64 bit ARM libraries
-use relocations with addends.  The packing strategy necessarily differs for
-the two relocation types.
-
-Where libchrome.<version>.so contains relocations without addends, the format
-of .android.rel.dyn data is:
-
-  "APR1" identifier
-  N: the number of count-delta pairs in the encoding
-  A: the initial offset
-  N * C,D: N count-delta pairs
-
-Where libchrome.<version>.so contains relocations with addends, the format
-of .android.rela.dyn data is:
-
-  "APA1" identifier
-  N: the number of addr-addend delta pairs in the encoding
-  N * A,V: N addr-addend delta pairs
-
-All numbers in the encoding stream are stored as LEB128 values.  For details
-see http://en.wikipedia.org/wiki/LEB128.
-
-The streaming unpacking algorithm for 32 bit ARM is:
-
-  skip over "APR1"
-  pairs, addr = next leb128 value, next leb128 value
-  emit R_ARM_RELATIVE relocation with r_offset = addr
-  while pairs:
-    count, delta = next leb128 value, next leb128 value
-    while count:
-      addr += delta
-      emit R_ARM_RELATIVE relocation with r_offset = addr
-      count--
-    pairs--
-
-The streaming unpacking algorithm for 64 bit ARM is:
-
-  skip over "APA1"
-  pairs = next signed leb128 value
-  addr, addend = 0, 0
-  while pairs:
-    addr += next signed leb128 value
-    addend += next signed leb128 value
-    emit R_AARCH64_RELATIVE relocation with r_offset = addr, r_addend = addend
-    pairs--
-
-
-Usage instructions:
--------------------
-
-To pack relocations, add an empty .android.rel.dyn or .android.rela.dyn and
-then run the tool:
-
-    echo -n 'NULL' >/tmp/small
-    if file libchrome.<version>.so | grep -q 'ELF 32'; then
-      arm-linux-androideabi-objcopy
-          --add-section .android.rel.dyn=/tmp/small
-          libchrome.<version>.so libchrome.<version>.so.packed
-    else
-      aarch64-linux-android-objcopy
-          --add-section .android.rela.dyn=/tmp/small
-          libchrome.<version>.so libchrome.<version>.so.packed
-    fi
-    rm /tmp/small
-    relocation_packer libchrome.<version>.so.packed
-
-To unpack and restore the shared library to its original state:
-
-    cp libchrome.<version>.so.packed unpackable
-    relocation_packer -u unpackable
-    if file libchrome.<version>.so | grep -q 'ELF 32'; then
-      arm-linux-androideabi-objcopy \
-          --remove-section=.android.rel.dyn unpackable libchrome.<version>.so
-    else
-      aarch64-linux-android-objcopy \
-          --remove-section=.android.rela.dyn unpackable libchrome.<version>.so
-    endif
-    rm unpackable
-
-
-Bugs & TODOs:
--------------
-
-Requires two free slots in the .dynamic section.  Uses these to add data that
-tells the crazy linker where to find the packed relocation data.  Fails
-if insufficient free slots exist (use gold --spare-dynamic-slots to increase
-the allocation).
-
-Requires libelf 0.158 or later.  Earlier libelf releases may be buggy in
-ways that prevent the packer from working correctly.
-
-
-Testing:
---------
-
-Unittests run under gtest, on the host system.
diff --git a/tools/relocation_packer/src/elf_file.cc b/tools/relocation_packer/src/elf_file.cc
index 20b25ef..6843f5b 100644
--- a/tools/relocation_packer/src/elf_file.cc
+++ b/tools/relocation_packer/src/elf_file.cc
@@ -439,6 +439,9 @@
                                 tag == DT_JMPREL ||
                                 tag == DT_INIT_ARRAY ||
                                 tag == DT_FINI_ARRAY ||
+                                tag == DT_VERSYM ||
+                                tag == DT_VERNEED ||
+                                tag == DT_VERDEF ||
                                 tag == DT_ANDROID_REL||
                                 tag == DT_ANDROID_RELA);
 
@@ -586,7 +589,7 @@
     const typename ELF::Rel* relocations_base = reinterpret_cast<typename ELF::Rel*>(data->d_buf);
     ConvertRelArrayToRelaVector(relocations_base,
         data->d_size / sizeof(typename ELF::Rel), &relocations);
-    LOG(INFO) << "Relocations   : REL";
+    VLOG(1) << "Relocations   : REL";
   } else if (relocations_type_ == RELA) {
     // Convert data to a vector of relocations with addends.
     const typename ELF::Rela* relocations_base = reinterpret_cast<typename ELF::Rela*>(data->d_buf);
@@ -594,7 +597,7 @@
         relocations_base,
         relocations_base + data->d_size / sizeof(relocations[0]));
 
-    LOG(INFO) << "Relocations   : RELA";
+    VLOG(1) << "Relocations   : RELA";
   } else {
     NOTREACHED();
   }
@@ -618,18 +621,18 @@
       relocations_type_ == RELA ? sizeof(typename ELF::Rela) : sizeof(typename ELF::Rel);
   const size_t initial_bytes = relocations->size() * rel_size;
 
-  LOG(INFO) << "Unpacked                   : " << initial_bytes << " bytes";
+  VLOG(1) << "Unpacked                   : " << initial_bytes << " bytes";
   std::vector<uint8_t> packed;
   RelocationPacker<ELF> packer;
 
   // Pack relocations: dry run to estimate memory savings.
   packer.PackRelocations(*relocations, &packed);
   const size_t packed_bytes_estimate = packed.size() * sizeof(packed[0]);
-  LOG(INFO) << "Packed         (no padding): " << packed_bytes_estimate << " bytes";
+  VLOG(1) << "Packed         (no padding): " << packed_bytes_estimate << " bytes";
 
   if (packed.empty()) {
     LOG(INFO) << "Too few relocations to pack";
-    return false;
+    return true;
   }
 
   // Pre-calculate the size of the hole we will close up when we rewrite
@@ -646,7 +649,7 @@
   // Adjusting for alignment may have removed any packing benefit.
   if (hole_size == 0) {
     LOG(INFO) << "Too few relocations to pack after alignment";
-    return false;
+    return true;
   }
 
   if (hole_size <= 0) {
diff --git a/tools/relocation_packer/src/elf_file.h b/tools/relocation_packer/src/elf_file.h
index 73c3192..a749d50 100644
--- a/tools/relocation_packer/src/elf_file.h
+++ b/tools/relocation_packer/src/elf_file.h
@@ -4,53 +4,16 @@
 
 // ELF shared object file updates handler.
 //
-// Provides functions to remove relative relocations from the .rel.dyn
-// or .rela.dyn sections and pack into .android.rel.dyn or .android.rela.dyn,
-// and unpack to return the file to its pre-packed state.
-//
-// Files to be packed or unpacked must include an existing .android.rel.dyn
-// or android.rela.dyn section.  A standard libchrome.<version>.so will not
-// contain this section, so the following can be used to add one:
-//
-//   echo -n 'NULL' >/tmp/small
-//   if file libchrome.<version>.so | grep -q 'ELF 32'; then
-//     arm-linux-androideabi-objcopy
-//         --add-section .android.rel.dyn=/tmp/small
-//         libchrome.<version>.so libchrome.<version>.so.packed
-//   else
-//     aarch64-linux-android-objcopy
-//         --add-section .android.rela.dyn=/tmp/small
-//         libchrome.<version>.so libchrome.<version>.so.packed
-//   fi
-//   rm /tmp/small
-//
-// To use, open the file and pass the file descriptor to the constructor,
-// then pack or unpack as desired.  Packing or unpacking will flush the file
-// descriptor on success.  Example:
-//
-//   int fd = open(..., O_RDWR);
-//   ElfFile elf_file(fd);
-//   bool status;
-//   if (is_packing)
-//     status = elf_file.PackRelocations();
-//   else
-//     status = elf_file.UnpackRelocations();
-//   close(fd);
+// Provides functions to pack relocations in the .rel.dyn or .rela.dyn
+// sections, and unpack to return the file to its pre-packed state.
 //
 // SetPadding() causes PackRelocations() to pad .rel.dyn or .rela.dyn with
 // NONE-type entries rather than cutting a hole out of the shared object
 // file.  This keeps all load addresses and offsets constant, and enables
 // easier debugging and testing.
 //
-// A packed shared object file has all of its relative relocations
-// removed from .rel.dyn or .rela.dyn, and replaced as packed data in
-// .android.rel.dyn or .android.rela.dyn respectively.  The resulting file
-// is shorter than its non-packed original.
-//
-// Unpacking a packed file restores the file to its non-packed state, by
-// expanding the packed data in .android.rel.dyn or .android.rela.dyn,
-// combining the relative relocations with the data already in .rel.dyn
-// or .rela.dyn, and then writing back the now expanded section.
+// A packed shared object file is shorter than its non-packed original.
+// Unpacking a packed file restores the file to its non-packed state.
 
 #ifndef TOOLS_RELOCATION_PACKER_SRC_ELF_FILE_H_
 #define TOOLS_RELOCATION_PACKER_SRC_ELF_FILE_H_
diff --git a/tools/relocation_packer/src/leb128.h b/tools/relocation_packer/src/leb128.h
index 2c5b5d0..67fc4b8 100644
--- a/tools/relocation_packer/src/leb128.h
+++ b/tools/relocation_packer/src/leb128.h
@@ -4,9 +4,8 @@
 
 // LEB128 encoder and decoder for packed relative relocations.
 //
-// Run-length encoded relative relocations consist of a large number
-// of pairs of relatively small positive integer values.  Encoding these as
-// LEB128 saves space.
+// Packed relocations consist of a large number of relatively small
+// integer values.  Encoding these as LEB128 saves space.
 //
 // For more on LEB128 see http://en.wikipedia.org/wiki/LEB128.
 
diff --git a/tools/relocation_packer/src/main.cc b/tools/relocation_packer/src/main.cc
index 3f784e4..8e9de6d 100644
--- a/tools/relocation_packer/src/main.cc
+++ b/tools/relocation_packer/src/main.cc
@@ -4,9 +4,6 @@
 
 // Tool to pack and unpack relative relocations in a shared library.
 //
-// Packing removes relative relocations from .rel.dyn and writes them
-// in a more compact form to .android.rel.dyn.  Unpacking does the reverse.
-//
 // Invoke with -v to trace actions taken when packing or unpacking.
 // Invoke with -p to pad removed relocations with R_*_NONE.  Suppresses
 // shrinking of .rel.dyn.
diff --git a/tools/relocation_packer/src/packer.h b/tools/relocation_packer/src/packer.h
index 8a57e62..63f50e2 100644
--- a/tools/relocation_packer/src/packer.h
+++ b/tools/relocation_packer/src/packer.h
@@ -3,43 +3,6 @@
 // found in the LICENSE file.
 
 // Pack relative relocations into a more compact form.
-//
-//
-// For relative relocations without addends (32 bit platforms)
-// -----------------------------------------------------------
-//
-// Applies two packing strategies.  The first is run-length encoding, which
-// turns a large set of relative relocations into a much smaller set
-// of delta-count pairs, prefixed with a two-word header comprising the
-// count of pairs and the initial relocation offset.  The second is LEB128
-// encoding, which compresses the result of run-length encoding.
-//
-// Once packed, data is prefixed by an identifier that allows for any later
-// versioning of packing strategies.
-//
-// A complete packed stream of relocations without addends might look
-// something like:
-//
-//   "APR1"   pairs  init_offset count1 delta1 count2 delta2 ...
-//   41505231 f2b003 b08ac716    e001   04     01     10     ...
-//
-//
-// For relative relocations with addends (64 bit platforms)
-// --------------------------------------------------------
-//
-// Applies two packing strategies.  The first is delta encoding, which
-// turns a large set of relative relocations into a smaller set
-// of offset and addend delta pairs, prefixed with a header indicating the
-// count of pairs.  The second is signed LEB128 encoding, which compacts
-// the result of delta encoding.
-//
-// Once packed, data is prefixed by an identifier that allows for any later
-// versioning of packing strategies.
-//
-// A complete packed stream might look something like:
-//
-//   "APA1"   pairs  offset_d1 addend_d1 offset_d2 addend_d2 ...
-//   41505232 f2b018 04        28        08        9f01      ...
 
 #ifndef TOOLS_RELOCATION_PACKER_SRC_PACKER_H_
 #define TOOLS_RELOCATION_PACKER_SRC_PACKER_H_
diff --git a/tools/relocation_packer/src/run_length_encoder.h b/tools/relocation_packer/src/run_length_encoder.h
deleted file mode 100644
index f3a80e6..0000000
--- a/tools/relocation_packer/src/run_length_encoder.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Run-length encode and decode relative relocations.
-//
-// Relative relocations are the bulk of dynamic relocations (the
-// .rel.dyn or .rela.dyn sections) in libchrome.<version>.so, and the ELF
-// standard representation of them is wasteful.  .rel.dyn contains
-// relocations without addends, .rela.dyn relocations with addends.
-//
-// A relocation with no addend is 8 bytes on 32 bit platforms and 16 bytes
-// on 64 bit plaforms, split into offset and info fields.  Offsets strictly
-// increase, and each is commonly a few bytes different from its predecessor.
-// There are long runs where the difference does not change.  The info field
-// is constant.  Example, from 'readelf -x4 libchrome.<version>.so' 32 bit:
-//
-//   offset   info     offset   info
-//   808fef01 17000000 848fef01 17000000 ................
-//   888fef01 17000000 8c8fef01 17000000 ................
-//   908fef01 17000000 948fef01 17000000 ................
-//
-// Run length encoding packs this data more efficiently, by representing it
-// as a delta and a count of entries each differing from its predecessor
-// by this delta.  The above can be represented as a start address followed
-// by an encoded count of 6 and offset difference of 4:
-//
-//   start    count    diff
-//   01ef8f80 00000006 00000004
-//
-// Because relative relocation offsets strictly increase, the complete
-// set of relative relocations in libchrome.<version>.so can be
-// represented by a single start address followed by one or more difference
-// and count encoded word pairs:
-//
-//   start    run1 count run1 diff  run2 count run2 diff
-//   01ef8f80 00000006   00000004   00000010   00000008 ...
-//
-// Decoding regenerates relative relocations beginning at address
-// 'start' and for each encoded run, incrementing the address by 'difference'
-// for 'count' iterations and emitting a new relative relocation.
-//
-// Once encoded, data is prefixed by a single word count of packed delta and
-// count pairs.  A final run-length encoded relative relocations vector
-// might therefore look something like:
-//
-//   pairs    start    run 1             run 2             ... run 15
-//   0000000f 01ef8f80 00000006 00000004 00000010 00000008 ...
-// Interpreted as:
-//   pairs=15 start=.. count=6,delta=4   count=16,delta=8
-
-#ifndef TOOLS_RELOCATION_PACKER_SRC_RUN_LENGTH_ENCODER_H_
-#define TOOLS_RELOCATION_PACKER_SRC_RUN_LENGTH_ENCODER_H_
-
-#include <vector>
-
-#include "elf.h"
-#include "elf_traits.h"
-
-namespace relocation_packer {
-
-// A RelocationRunLengthCodec packs vectors of relative relocations
-// into more compact forms, and unpacks them to reproduce the pre-packed data.
-class RelocationRunLengthCodec {
- public:
-  // Encode relative relocations into a more compact form.
-  // |relocations| is a vector of relative relocation structs.
-  // |packed| is the vector of packed words into which relocations are packed.
-  static void Encode(const std::vector<ELF::Rel>& relocations,
-                     std::vector<ELF::Xword>* packed);
-
-  // Decode relative relocations from their more compact form.
-  // |packed| is the vector of packed relocations.
-  // |relocations| is a vector of unpacked relative relocation structs.
-  static void Decode(const std::vector<ELF::Xword>& packed,
-                     std::vector<ELF::Rel>* relocations);
-};
-
-}  // namespace relocation_packer
-
-#endif  // TOOLS_RELOCATION_PACKER_SRC_RUN_LENGTH_ENCODER_H_
diff --git a/tools/relocation_packer/src/sleb128.h b/tools/relocation_packer/src/sleb128.h
index fa0a246..3a63f66 100644
--- a/tools/relocation_packer/src/sleb128.h
+++ b/tools/relocation_packer/src/sleb128.h
@@ -4,9 +4,8 @@
 
 // SLEB128 encoder and decoder for packed relative relocations.
 //
-// Delta encoded relative relocations consist of a large number
-// of pairs signed integer values, many with small values.  Encoding these
-// as signed LEB128 saves space.
+// Packed relocations consist of a large number of relatively small
+// integer values.  Encoding these as LEB128 saves space.
 //
 // For more on LEB128 see http://en.wikipedia.org/wiki/LEB128.
 
diff --git a/tools/relocation_packer/test_data/generate_elf_file_unittest_relocs.py b/tools/relocation_packer/test_data/generate_elf_file_unittest_relocs.py
deleted file mode 100755
index e71b5cb..0000000
--- a/tools/relocation_packer/test_data/generate_elf_file_unittest_relocs.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Build relocation packer unit test data.
-
-Uses a built relocation packer to generate 'golden' reference test data
-files for elf_file_unittests.cc.
-"""
-
-import optparse
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-
-def PackArmLibraryRelocations(android_pack_relocations,
-                              android_objcopy,
-                              added_section,
-                              input_path,
-                              output_path):
-  # Copy and add a 'NULL' .android.rel.dyn section for the packing tool.
-  with tempfile.NamedTemporaryFile() as stream:
-    stream.write('NULL')
-    stream.flush()
-    objcopy_command = [android_objcopy,
-                       '--add-section', '%s=%s' % (added_section, stream.name),
-                       input_path, output_path]
-    subprocess.check_call(objcopy_command)
-
-  # Pack relocations.
-  pack_command = [android_pack_relocations, output_path]
-  subprocess.check_call(pack_command)
-
-
-def UnpackArmLibraryRelocations(android_pack_relocations,
-                                input_path,
-                                output_path):
-  shutil.copy(input_path, output_path)
-
-  # Unpack relocations.  We leave the .android.rel.dyn or .android.rela.dyn
-  # in place.
-  unpack_command = [android_pack_relocations, '-u', output_path]
-  subprocess.check_call(unpack_command)
-
-
-def main():
-  parser = optparse.OptionParser()
-
-  parser.add_option('--android-pack-relocations',
-      help='Path to the ARM relocations packer binary')
-  parser.add_option('--android-objcopy',
-      help='Path to the toolchain\'s objcopy binary')
-  parser.add_option('--added-section',
-      choices=['.android.rel.dyn', '.android.rela.dyn'],
-      help='Section to add, one of ".android.rel.dyn" or ".android.rela.dyn"')
-  parser.add_option('--test-file',
-      help='Path to the input test file, an unpacked ARM .so')
-  parser.add_option('--unpacked-output',
-      help='Path to the output file for reference unpacked data')
-  parser.add_option('--packed-output',
-      help='Path to the output file for reference packed data')
-
-  options, _ = parser.parse_args()
-
-  for output in [options.unpacked_output, options.packed_output]:
-    directory = os.path.dirname(output)
-    if not os.path.exists(directory):
-      os.makedirs(directory)
-
-  PackArmLibraryRelocations(options.android_pack_relocations,
-                            options.android_objcopy,
-                            options.added_section,
-                            options.test_file,
-                            options.packed_output)
-
-  UnpackArmLibraryRelocations(options.android_pack_relocations,
-                              options.packed_output,
-                              options.unpacked_output)
-
-  return 0
-
-
-if __name__ == '__main__':
-  sys.exit(main())
diff --git a/tools/relocation_packer/test_data/generate_elf_file_unittest_relocs.sh b/tools/relocation_packer/test_data/generate_elf_file_unittest_relocs.sh
deleted file mode 100755
index f90a2f6..0000000
--- a/tools/relocation_packer/test_data/generate_elf_file_unittest_relocs.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Generates elf_file_unittest_relocs_arm{32,64}{,_packed}.so test data files
-# from elf_file_unittest_relocs.cc.  Run once to create these test data
-# files; the files are checked into the source tree.
-#
-# To use:
-#   ./generate_elf_file_unittest_relocs.sh
-#   git add elf_file_unittest_relocs_arm{32,64}{,_packed}.so
-
-function main() {
-  local '-r' test_data_directory="$(pwd)"
-  cd '../../..'
-
-  source tools/cr/cr-bash-helpers.sh
-  local arch
-  for arch in 'arm32' 'arm64'; do
-    cr 'init' '--platform=android' '--type=Debug' '--architecture='"${arch}"
-    cr 'build' 'relocation_packer_unittests_test_data'
-  done
-
-  local '-r' packer='out_android/Debug/obj/tools/relocation_packer'
-  local '-r' gen="${packer}/relocation_packer_unittests_test_data.gen"
-
-  cp "${gen}/elf_file_unittest_relocs_arm"{32,64}{,_packed}'.so' \
-     "${test_data_directory}"
-
-  return 0
-}
-
-main