Merge "Move getusershell/endusershell/setusershell to ndk_cruft.cpp."
diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk
index c3eb6d3..ae0541f 100644
--- a/benchmarks/Android.mk
+++ b/benchmarks/Android.mk
@@ -20,12 +20,29 @@
 # Benchmarks library, usable by projects outside this directory.
 # -----------------------------------------------------------------------------
 
+benchmark_cflags := \
+    -O2 \
+    -fno-builtin \
+    -Wall \
+    -Wextra \
+    -Werror \
+    -Wunused \
+
+benchmark_cppflags := \
+    -std=gnu++11 \
+
+benchmarklib_src_files := \
+    Benchmark.cpp \
+    utils.cpp \
+    main.cpp \
+
 include $(CLEAR_VARS)
 LOCAL_MODULE := libbenchmark
-LOCAL_CFLAGS += -O2 -Wall -Wextra -Werror
-LOCAL_SRC_FILES := benchmark_main.cpp
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/include
-LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include
+LOCAL_CFLAGS := $(benchmark_cflags)
+LOCAL_CPPFLAGS := $(benchmark_cppflags)
+LOCAL_SRC_FILES := $(benchmarklib_src_files)
+LOCAL_C_INCLUDES := $(benchmark_c_includes)
+LOCAL_STATIC_LIBRARIES := libutils
 include $(BUILD_STATIC_LIBRARY)
 
 # Only supported on linux systems.
@@ -33,11 +50,12 @@
 
 include $(CLEAR_VARS)
 LOCAL_MODULE := libbenchmark
-LOCAL_CFLAGS += -O2 -Wall -Wextra -Werror
-LOCAL_SRC_FILES := benchmark_main.cpp
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/include
-LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include
+LOCAL_CFLAGS := $(benchmark_cflags)
+LOCAL_CPPFLAGS := $(benchmark_cppflags)
+LOCAL_SRC_FILES := $(benchmarklib_src_files)
+LOCAL_C_INCLUDES := $(benchmark_c_includes)
 LOCAL_MULTILIB := both
+LOCAL_STATIC_LIBRARIES := libutils
 include $(BUILD_HOST_STATIC_LIBRARY)
 
 endif
@@ -45,16 +63,9 @@
 # -----------------------------------------------------------------------------
 # Benchmarks.
 # -----------------------------------------------------------------------------
-
-benchmark_c_flags = \
-    -O2 \
-    -Wall -Wextra -Wunused \
-    -Werror \
-    -fno-builtin \
-    -std=gnu++11 \
-
-benchmark_src_files = \
+benchmark_src_files := \
     math_benchmark.cpp \
+    property_benchmark.cpp \
     pthread_benchmark.cpp \
     semaphore_benchmark.cpp \
     stdio_benchmark.cpp \
@@ -70,9 +81,10 @@
 LOCAL_MODULE_STEM_32 := bionic-benchmarks32
 LOCAL_MODULE_STEM_64 := bionic-benchmarks64
 LOCAL_MULTILIB := both
-LOCAL_CFLAGS += $(benchmark_c_flags)
-LOCAL_SRC_FILES := $(benchmark_src_files) property_benchmark.cpp
-LOCAL_STATIC_LIBRARIES += libbenchmark
+LOCAL_CFLAGS := $(benchmark_cflags)
+LOCAL_CPPFLAGS := $(benchmark_cppflags)
+LOCAL_SRC_FILES := $(benchmark_src_files)
+LOCAL_STATIC_LIBRARIES := libbenchmark libutils
 include $(BUILD_EXECUTABLE)
 
 # We don't build a static benchmark executable because it's not usually
@@ -90,10 +102,11 @@
 LOCAL_MODULE_STEM_32 := bionic-benchmarks-glibc32
 LOCAL_MODULE_STEM_64 := bionic-benchmarks-glibc64
 LOCAL_MULTILIB := both
-LOCAL_CFLAGS += $(benchmark_c_flags)
-LOCAL_LDFLAGS += -lrt
+LOCAL_CFLAGS := $(benchmark_cflags)
+LOCAL_CPPFLAGS := $(benchmark_cppflags)
+LOCAL_LDFLAGS := -lrt
 LOCAL_SRC_FILES := $(benchmark_src_files)
-LOCAL_STATIC_LIBRARIES += libbenchmark
+LOCAL_STATIC_LIBRARIES := libbenchmark libutils
 include $(BUILD_HOST_EXECUTABLE)
 
 endif
diff --git a/benchmarks/Benchmark.cpp b/benchmarks/Benchmark.cpp
new file mode 100644
index 0000000..eea304f
--- /dev/null
+++ b/benchmarks/Benchmark.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <string>
+#include <vector>
+
+#include <utils/stringprintf.h>
+
+#include <benchmark/Benchmark.h>
+
+#include "utils.h"
+
+namespace testing {
+
+static uint64_t NanoTime() {
+  struct timespec t;
+  t.tv_sec = t.tv_nsec = 0;
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return static_cast<uint64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
+}
+
+std::vector<Benchmark*>& Benchmark::List() {
+  static std::vector<Benchmark*> list;
+  return list;
+}
+
+int Benchmark::MaxNameColumnWidth() {
+  int max = 20;
+  for (auto& benchmark : List()) {
+    max = std::max(max, benchmark->NameColumnWidth());
+  }
+  return max;
+}
+
+bool Benchmark::RunAll(std::vector<regex_t*>& regs) {
+  bool ran_benchmark = false;
+  for (auto& benchmark : List()) {
+    if (benchmark->ShouldRun(regs)) {
+      if (!ran_benchmark) {
+        printf("%-*s %10s %10s\n", MaxNameColumnWidth(), "", "iterations", "ns/op");
+        ran_benchmark = true;
+      }
+      benchmark->RunAll();
+    }
+  }
+  return ran_benchmark;
+}
+
+bool Benchmark::ShouldRun(std::vector<regex_t*>& regs) {
+  if (regs.empty()) {
+    return true;
+  }
+
+  for (const auto& re : regs) {
+    if (regexec(re, Name().c_str(), 0, NULL, 0) != REG_NOMATCH) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Benchmark::StopBenchmarkTiming() {
+  if (start_time_ns_ != 0) {
+    total_time_ns_ += NanoTime() - start_time_ns_;
+  }
+  start_time_ns_ = 0;
+}
+
+void Benchmark::StartBenchmarkTiming() {
+  if (start_time_ns_ == 0) {
+    start_time_ns_ = NanoTime();
+  }
+}
+
+std::string BenchmarkWithoutArg::GetNameStr(void*) {
+  return Name();
+}
+
+template <>
+std::string BenchmarkWithArg<int>::GetNameStr(int arg) {
+  return Name() + "/" + PrettyInt(arg, 2);
+}
+
+template <>
+std::string BenchmarkWithArg<double>::GetNameStr(double arg) {
+  return Name() + "/" + android::StringPrintf("%0.6f", arg);
+}
+
+template<typename T>
+void BenchmarkT<T>::RunWithArg(T arg) {
+  int new_iterations = 1;
+  int iterations;
+  while (new_iterations < 1e8) {
+    bytes_processed_ = 0;
+    total_time_ns_ = 0;
+    start_time_ns_ = 0;
+
+    iterations = new_iterations;
+    RunIterations(iterations, arg);
+    if (total_time_ns_ >= 1e9) {
+      break;
+    }
+
+    if (total_time_ns_/iterations == 0) {
+      new_iterations = 1e9;
+    } else {
+      new_iterations = 1e9/ (total_time_ns_/iterations);
+    }
+    new_iterations = std::max(iterations + 1,
+                          std::min(new_iterations + new_iterations/2, 100*iterations));
+
+    new_iterations = Round(new_iterations);
+  }
+
+  printf("%-*s %10s %10" PRId64, MaxNameColumnWidth(), GetNameStr(arg).c_str(),
+         PrettyInt(iterations, 10).c_str(), total_time_ns_/iterations);
+
+  if (total_time_ns_ > 0 && bytes_processed_ > 0) {
+    double gib_processed = static_cast<double>(bytes_processed_)/1e9;
+    double seconds = static_cast<double>(total_time_ns_)/1e9;
+    printf(" %8.3f GiB/s", gib_processed/seconds);
+  }
+  printf("\n");
+  fflush(stdout);
+}
+
+template class BenchmarkT<int>;
+template class BenchmarkT<double>;
+template class BenchmarkT<void*>;
+
+template class BenchmarkWithArg<int>;
+template class BenchmarkWithArg<double>;
+
+}  // namespace testing
diff --git a/benchmarks/benchmark/Benchmark.h b/benchmarks/benchmark/Benchmark.h
new file mode 100644
index 0000000..16ae5fa
--- /dev/null
+++ b/benchmarks/benchmark/Benchmark.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BENCHMARKS_BENCHMARK_H_
+#define BENCHMARKS_BENCHMARK_H_
+
+#include <regex.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+namespace testing {
+
+class Benchmark {
+public:
+  Benchmark() {
+    List().push_back(this);
+  }
+  virtual ~Benchmark() {}
+
+  virtual std::string Name() = 0;
+
+  virtual void RunAll() = 0;
+
+  bool ShouldRun(std::vector<regex_t*>&);
+
+  void SetBenchmarkBytesProcessed(uint64_t bytes) { bytes_processed_ += bytes; }
+  void StopBenchmarkTiming();
+  void StartBenchmarkTiming();
+
+  // Run all of the benchmarks that have registered.
+  static bool RunAll(std::vector<regex_t*>&);
+
+  static std::vector<Benchmark*>& List();
+
+  static int MaxNameColumnWidth();
+
+protected:
+  virtual int NameColumnWidth() = 0;
+
+  uint64_t bytes_processed_;
+  uint64_t total_time_ns_;
+  uint64_t start_time_ns_;
+};
+
+template <typename T>
+class BenchmarkT : public Benchmark {
+public:
+  BenchmarkT() {}
+  virtual ~BenchmarkT() {}
+
+protected:
+  void RunWithArg(T arg);
+  virtual void RunIterations(int, T) = 0;
+  virtual std::string GetNameStr(T) = 0;
+};
+
+class BenchmarkWithoutArg : public BenchmarkT<void*> {
+public:
+  BenchmarkWithoutArg() {}
+  virtual ~BenchmarkWithoutArg() {}
+
+protected:
+  virtual void RunAll() override {
+    RunWithArg(nullptr);
+  }
+
+  virtual void RunIterations(int iters, void*) override {
+    Run(iters);
+  }
+
+  virtual void Run(int) = 0;
+
+  virtual int NameColumnWidth() override {
+    return (int)Name().size();
+  }
+
+  virtual std::string GetNameStr(void *) override;
+};
+
+template<typename T>
+class BenchmarkWithArg : public BenchmarkT<T> {
+public:
+  BenchmarkWithArg() {}
+  virtual ~BenchmarkWithArg() {}
+
+  BenchmarkWithArg* Arg(T arg) {
+    args_.push_back(arg);
+    return this;
+  }
+
+protected:
+  virtual int NameColumnWidth() override {
+    int max = 0;
+    for (const auto arg : args_) {
+      max = std::max(max, (int)GetNameStr(arg).size());
+    }
+    return max;
+  }
+
+  std::string GetNameStr(T arg) override;
+
+  virtual void RunAll() override {
+    for (T arg : args_) {
+      BenchmarkT<T>::RunWithArg(arg);
+    }
+  }
+
+  virtual void RunIterations(int iters, T arg) override {
+    Run(iters, arg);
+  }
+
+  virtual void Run(int iters, T arg) = 0;
+
+private:
+  std::vector<T> args_;
+};
+
+}  // namespace testing
+
+#define BENCHMARK_START(f, super_class) \
+  class f : public super_class { \
+  public: \
+    f() {} \
+    virtual ~f() {} \
+    virtual std::string Name() override { return #f; } \
+
+#define BENCHMARK_NO_ARG(f) \
+  BENCHMARK_START(f, ::testing::BenchmarkWithoutArg) \
+    virtual void Run(int) override; \
+  }; \
+  static ::testing::Benchmark* __benchmark_##f = new f()
+
+#define BENCHMARK_WITH_ARG(f, arg_type) \
+  BENCHMARK_START(f, ::testing::BenchmarkWithArg<arg_type>) \
+    virtual void Run(int, arg_type) override; \
+  }; \
+  static ::testing::BenchmarkWithArg<arg_type>* __benchmark_##f = (new f())
+
+#endif  // BENCHMARKS_BENCHMARK_H_
diff --git a/benchmarks/benchmark_main.cpp b/benchmarks/benchmark_main.cpp
deleted file mode 100644
index fae09be..0000000
--- a/benchmarks/benchmark_main.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <benchmark.h>
-
-#include <regex.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include <string>
-#include <vector>
-
-#include <inttypes.h>
-
-static int64_t g_bytes_processed;
-static int64_t g_benchmark_total_time_ns;
-static int64_t g_benchmark_start_time_ns;
-static int g_name_column_width = 20;
-
-typedef std::vector<::testing::Benchmark*> BenchmarkList;
-
-static BenchmarkList& Benchmarks() {
-  static BenchmarkList benchmarks;
-  return benchmarks;
-}
-
-// Similar to the code in art, but supporting both binary and decimal prefixes.
-static std::string PrettyInt(uint64_t count, size_t base) {
-  if (base != 2 && base != 10) abort();
-
-  // The byte thresholds at which we display amounts. A count is displayed
-  // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
-  static const uint64_t kUnitThresholds2[] = {
-    1024*1024*1024 /* Gi */, 2*1024*1024 /* Mi */, 3*1024 /* Ki */, 0,
-  };
-  static const uint64_t kUnitThresholds10[] = {
-    1000*1000*1000 /* G */, 2*1000*1000 /* M */, 3*1000 /* k */, 0,
-  };
-  static const uint64_t kAmountPerUnit2[] = { 1024*1024*1024, 1024*1024, 1024, 1 };
-  static const uint64_t kAmountPerUnit10[] = { 1000*1000*1000, 1000*1000, 1000, 1 };
-  static const char* const kUnitStrings2[] = { "Gi", "Mi", "Ki", "" };
-  static const char* const kUnitStrings10[] = { "G", "M", "k", "" };
-
-  // Which set are we using?
-  const uint64_t* kUnitThresholds = ((base == 2) ? kUnitThresholds2 : kUnitThresholds10);
-  const uint64_t* kAmountPerUnit = ((base == 2) ? kAmountPerUnit2 : kAmountPerUnit10);
-  const char* const* kUnitStrings = ((base == 2) ? kUnitStrings2 : kUnitStrings10);
-
-  size_t i = 0;
-  for (; kUnitThresholds[i] != 0; ++i) {
-    if (count >= kUnitThresholds[i]) {
-      break;
-    }
-  }
-  char* s = NULL;
-  asprintf(&s, "%" PRId64 "%s", count / kAmountPerUnit[i], kUnitStrings[i]);
-  std::string result(s);
-  free(s);
-  return result;
-}
-
-static int Round(int n) {
-  int base = 1;
-  while (base*10 < n) {
-    base *= 10;
-  }
-  if (n < 2*base) {
-    return 2*base;
-  }
-  if (n < 5*base) {
-    return 5*base;
-  }
-  return 10*base;
-}
-
-static int64_t NanoTime() {
-  struct timespec t;
-  t.tv_sec = t.tv_nsec = 0;
-  clock_gettime(CLOCK_MONOTONIC, &t);
-  return static_cast<int64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
-}
-
-namespace testing {
-
-Benchmark* Benchmark::Arg(int arg) {
-  args_.push_back(arg);
-  return this;
-}
-
-const char* Benchmark::Name() {
-  return name_;
-}
-
-bool Benchmark::ShouldRun(int argc, char* argv[]) {
-  if (argc == 1) {
-    return true;  // With no arguments, we run all benchmarks.
-  }
-  // Otherwise, we interpret each argument as a regular expression and
-  // see if any of our benchmarks match.
-  for (int i = 1; i < argc; i++) {
-    regex_t re;
-    if (regcomp(&re, argv[i], 0) != 0) {
-      fprintf(stderr, "couldn't compile \"%s\" as a regular expression!\n", argv[i]);
-      exit(EXIT_FAILURE);
-    }
-    int match = regexec(&re, name_, 0, NULL, 0);
-    regfree(&re);
-    if (match != REG_NOMATCH) {
-      return true;
-    }
-  }
-  return false;
-}
-
-void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)) {
-  name_ = name;
-  fn_ = fn;
-  fn_range_ = fn_range;
-
-  if (fn_ == NULL && fn_range_ == NULL) {
-    fprintf(stderr, "%s: missing function\n", name_);
-    exit(EXIT_FAILURE);
-  }
-
-  Benchmarks().push_back(this);
-}
-
-void Benchmark::Run() {
-  if (fn_ != NULL) {
-    RunWithArg(0);
-  } else {
-    if (args_.empty()) {
-      fprintf(stderr, "%s: no args!\n", name_);
-      exit(EXIT_FAILURE);
-    }
-    for (size_t i = 0; i < args_.size(); ++i) {
-      RunWithArg(args_[i]);
-    }
-  }
-}
-
-void Benchmark::RunRepeatedlyWithArg(int iterations, int arg) {
-  g_bytes_processed = 0;
-  g_benchmark_total_time_ns = 0;
-  g_benchmark_start_time_ns = NanoTime();
-  if (fn_ != NULL) {
-    fn_(iterations);
-  } else {
-    fn_range_(iterations, arg);
-  }
-  if (g_benchmark_start_time_ns != 0) {
-    g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
-  }
-}
-
-void Benchmark::RunWithArg(int arg) {
-  // Run once in case it's expensive.
-  int iterations = 1;
-  int64_t realStartTime = NanoTime();
-  RunRepeatedlyWithArg(iterations, arg);
-  int64_t realTotalTime = NanoTime() - realStartTime;
-  while (realTotalTime < 1e9 && iterations < 1e8) {
-    int last = iterations;
-    if (realTotalTime/iterations == 0) {
-      iterations = 1e9;
-    } else {
-      iterations = 1e9 / (realTotalTime/iterations);
-    }
-    iterations = std::max(last + 1, std::min(iterations + iterations/2, 100*last));
-    iterations = Round(iterations);
-    realStartTime = NanoTime();
-    RunRepeatedlyWithArg(iterations, arg);
-    realTotalTime = NanoTime() - realStartTime;
-  }
-
-  char throughput[100];
-  throughput[0] = '\0';
-
-  if (g_benchmark_total_time_ns > 0 && g_bytes_processed > 0) {
-    double gib_processed = static_cast<double>(g_bytes_processed)/1e9;
-    double seconds = static_cast<double>(g_benchmark_total_time_ns)/1e9;
-    snprintf(throughput, sizeof(throughput), " %8.3f GiB/s", gib_processed/seconds);
-  }
-
-  char full_name[100];
-  if (fn_range_ != NULL) {
-    snprintf(full_name, sizeof(full_name), "%s/%s", name_, PrettyInt(arg, 2).c_str());
-  } else {
-    snprintf(full_name, sizeof(full_name), "%s", name_);
-  }
-
-  printf("%-*s %10s %10" PRId64 "%s\n",
-         g_name_column_width, full_name,
-         PrettyInt(iterations, 10).c_str(),
-         g_benchmark_total_time_ns/iterations,
-         throughput);
-  fflush(stdout);
-}
-
-}  // namespace testing
-
-void SetBenchmarkBytesProcessed(int64_t x) {
-  g_bytes_processed = x;
-}
-
-void StopBenchmarkTiming() {
-  if (g_benchmark_start_time_ns != 0) {
-    g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
-  }
-  g_benchmark_start_time_ns = 0;
-}
-
-void StartBenchmarkTiming() {
-  if (g_benchmark_start_time_ns == 0) {
-    g_benchmark_start_time_ns = NanoTime();
-  }
-}
-
-int main(int argc, char* argv[]) {
-  if (Benchmarks().empty()) {
-    fprintf(stderr, "No benchmarks registered!\n");
-    exit(EXIT_FAILURE);
-  }
-
-  for (auto& b : Benchmarks()) {
-    int name_width = static_cast<int>(strlen(b->Name()));
-    g_name_column_width = std::max(g_name_column_width, name_width);
-  }
-
-  bool need_header = true;
-  for (auto& b : Benchmarks()) {
-    if (b->ShouldRun(argc, argv)) {
-      if (need_header) {
-        printf("%-*s %10s %10s\n", g_name_column_width, "", "iterations", "ns/op");
-        fflush(stdout);
-        need_header = false;
-      }
-      b->Run();
-    }
-  }
-
-  if (need_header) {
-    fprintf(stderr, "No matching benchmarks!\n");
-    fprintf(stderr, "Available benchmarks:\n");
-    for (auto& b : Benchmarks()) {
-      fprintf(stderr, "  %s\n", b->Name());
-    }
-    exit(EXIT_FAILURE);
-  }
-
-  return 0;
-}
diff --git a/benchmarks/include/benchmark.h b/benchmarks/include/benchmark.h
deleted file mode 100644
index 7e134a0..0000000
--- a/benchmarks/include/benchmark.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef BENCHMARKS_BENCHMARK_H_
-#define BENCHMARKS_BENCHMARK_H_
-
-#include <stdint.h>
-#include <vector>
-
-namespace testing {
-
-class Benchmark {
- public:
-  Benchmark(const char* name, void (*fn)(int)) {
-    Register(name, fn, NULL);
-  }
-
-  Benchmark(const char* name, void (*fn_range)(int, int)) {
-    Register(name, NULL, fn_range);
-  }
-
-  Benchmark* Arg(int x);
-
-  const char* Name();
-
-  bool ShouldRun(int argc, char* argv[]);
-  void Run();
-
- private:
-  const char* name_;
-
-  void (*fn_)(int);
-  void (*fn_range_)(int, int);
-
-  std::vector<int> args_;
-
-  void Register(const char* name, void (*fn)(int), void (*fn_range)(int, int));
-  void RunRepeatedlyWithArg(int iterations, int arg);
-  void RunWithArg(int arg);
-};
-
-}  // namespace testing
-
-void SetBenchmarkBytesProcessed(int64_t);
-void StopBenchmarkTiming();
-void StartBenchmarkTiming();
-
-#define BENCHMARK(f) \
-    static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
-        (new ::testing::Benchmark(#f, f))
-
-#endif
diff --git a/benchmarks/main.cpp b/benchmarks/main.cpp
new file mode 100644
index 0000000..b6984fc
--- /dev/null
+++ b/benchmarks/main.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vector>
+
+#include <benchmark/Benchmark.h>
+
+int main(int argc, char* argv[]) {
+  if (::testing::Benchmark::List().empty()) {
+    fprintf(stderr, "No benchmarks registered!\n");
+    exit(EXIT_FAILURE);
+  }
+
+  std::vector<regex_t*> regs;
+  for (int i = 1; i < argc; i++) {
+    regex_t* re = new regex_t;
+    int errcode = regcomp(re, argv[i], 0);
+    if (errcode != 0) {
+      size_t errbuf_size = regerror(errcode, re, NULL, 0);
+      if (errbuf_size > 0) {
+        char* errbuf = new char[errbuf_size];
+        regerror(errcode, re, errbuf, errbuf_size);
+        fprintf(stderr, "Couldn't compile \"%s\" as a regular expression: %s\n",
+                argv[i], errbuf);
+      } else {
+        fprintf(stderr, "Unknown compile error for \"%s\" as a regular expression!\n", argv[i]);
+      }
+      exit(EXIT_FAILURE);
+    }
+    regs.push_back(re);
+  }
+
+  if (!::testing::Benchmark::RunAll(regs)) {
+    fprintf(stderr, "No matching benchmarks!\n");
+    fprintf(stderr, "Available benchmarks:\n");
+    for (const auto& benchmark : ::testing::Benchmark::List()) {
+      fprintf(stderr, "  %s\n", benchmark->Name().c_str());
+    }
+    exit(EXIT_FAILURE);
+  }
+
+  return 0;
+}
diff --git a/benchmarks/math_benchmark.cpp b/benchmarks/math_benchmark.cpp
index 8d6dd10..4de28d1 100644
--- a/benchmarks/math_benchmark.cpp
+++ b/benchmarks/math_benchmark.cpp
@@ -14,16 +14,20 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
 #include <fenv.h>
 #include <math.h>
 
+#include <benchmark/Benchmark.h>
+
+#define AT_COMMON_VALS \
+    Arg(1234.0)->Arg(nan(""))->Arg(HUGE_VAL)->Arg(0.0)
+
 // Avoid optimization.
 volatile double d;
 volatile double v;
 
-static void BM_math_sqrt(int iters) {
+BENCHMARK_NO_ARG(BM_math_sqrt);
+void BM_math_sqrt::Run(int iters) {
   StartBenchmarkTiming();
 
   d = 0.0;
@@ -34,9 +38,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_sqrt);
 
-static void BM_math_log10(int iters) {
+BENCHMARK_NO_ARG(BM_math_log10);
+void BM_math_log10::Run(int iters) {
   StartBenchmarkTiming();
 
   d = 0.0;
@@ -47,9 +51,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_log10);
 
-static void BM_math_logb(int iters) {
+BENCHMARK_NO_ARG(BM_math_logb);
+void BM_math_logb::Run(int iters) {
   StartBenchmarkTiming();
 
   d = 0.0;
@@ -60,61 +64,22 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_logb);
 
-static void BM_math_isinf_NORMAL(int iters) {
+BENCHMARK_WITH_ARG(BM_math_isinf, double)->AT_COMMON_VALS;
+void BM_math_isinf::Run(int iters, double value) {
   StartBenchmarkTiming();
 
   d = 0.0;
-  v = 1234.0; // FP_NORMAL
+  v = value;
   for (int i = 0; i < iters; ++i) {
     d += (isinf)(v);
   }
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_isinf_NORMAL);
 
-static void BM_math_isinf_NAN(int iters) {
-  StartBenchmarkTiming();
-
-  d = 0.0;
-  v = nan(""); // FP_NAN
-  for (int i = 0; i < iters; ++i) {
-    d += (isinf)(v);
-  }
-
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_math_isinf_NAN);
-
-static void BM_math_isinf_INFINITE(int iters) {
-  StartBenchmarkTiming();
-
-  d = 0.0;
-  v = HUGE_VAL; // FP_INFINITE
-  for (int i = 0; i < iters; ++i) {
-    d += (isinf)(v);
-  }
-
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_math_isinf_INFINITE);
-
-static void BM_math_isinf_ZERO(int iters) {
-  StartBenchmarkTiming();
-
-  d = 0.0;
-  v = 0.0; // FP_ZERO
-  for (int i = 0; i < iters; ++i) {
-    d += (isinf)(v);
-  }
-
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_math_isinf_ZERO);
-
-static void BM_math_sin_fast(int iters) {
+BENCHMARK_NO_ARG(BM_math_sin_fast);
+void BM_math_sin_fast::Run(int iters) {
   StartBenchmarkTiming();
 
   d = 1.0;
@@ -124,9 +89,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_sin_fast);
 
-static void BM_math_sin_feupdateenv(int iters) {
+BENCHMARK_NO_ARG(BM_math_sin_feupdateenv);
+void BM_math_sin_feupdateenv::Run(int iters) {
   StartBenchmarkTiming();
 
   d = 1.0;
@@ -140,9 +105,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_sin_feupdateenv);
 
-static void BM_math_sin_fesetenv(int iters) {
+BENCHMARK_NO_ARG(BM_math_sin_fesetenv);
+void BM_math_sin_fesetenv::Run(int iters) {
   StartBenchmarkTiming();
 
   d = 1.0;
@@ -156,56 +121,16 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_sin_fesetenv);
 
-static void BM_math_fpclassify_NORMAL(int iters) {
+BENCHMARK_WITH_ARG(BM_math_fpclassify, double)->AT_COMMON_VALS;
+void BM_math_fpclassify::Run(int iters, double value) {
   StartBenchmarkTiming();
 
   d = 0.0;
-  v = 1234.0; // FP_NORMAL
+  v = value;
   for (int i = 0; i < iters; ++i) {
     d += fpclassify(v);
   }
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_math_fpclassify_NORMAL);
-
-static void BM_math_fpclassify_NAN(int iters) {
-  StartBenchmarkTiming();
-
-  d = 0.0;
-  v = nan(""); // FP_NAN
-  for (int i = 0; i < iters; ++i) {
-    d += fpclassify(v);
-  }
-
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_math_fpclassify_NAN);
-
-static void BM_math_fpclassify_INFINITE(int iters) {
-  StartBenchmarkTiming();
-
-  d = 0.0;
-  v = HUGE_VAL; // FP_INFINITE
-  for (int i = 0; i < iters; ++i) {
-    d += fpclassify(v);
-  }
-
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_math_fpclassify_INFINITE);
-
-static void BM_math_fpclassify_ZERO(int iters) {
-  StartBenchmarkTiming();
-
-  d = 0.0;
-  v = 0.0; // FP_ZERO
-  for (int i = 0; i < iters; ++i) {
-    d += fpclassify(v);
-  }
-
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_math_fpclassify_ZERO);
diff --git a/benchmarks/property_benchmark.cpp b/benchmarks/property_benchmark.cpp
index 0802b4c..944cd68 100644
--- a/benchmarks/property_benchmark.cpp
+++ b/benchmarks/property_benchmark.cpp
@@ -14,19 +14,21 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 
+#include <string>
+
+#if defined(__BIONIC__)
+
 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
 #include <sys/_system_properties.h>
 
-#include <vector>
-#include <string>
+#include <benchmark/Benchmark.h>
 
-extern void *__system_property_area__;
+extern void* __system_property_area__;
 
 // Do not exceed 512, that is about the largest number of properties
 // that can be created with the current property area size.
@@ -34,200 +36,198 @@
     Arg(1)->Arg(4)->Arg(16)->Arg(64)->Arg(128)->Arg(256)->Arg(512)
 
 struct LocalPropertyTestState {
-    LocalPropertyTestState(int nprops) : nprops(nprops), valid(false) {
-        static const char prop_name_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.";
+  LocalPropertyTestState(int nprops) : nprops(nprops), valid(false) {
+    static const char prop_name_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_.";
 
-        const char* android_data = getenv("ANDROID_DATA");
-        if (android_data == NULL) {
-          printf("ANDROID_DATA environment variable not set\n");
-          return;
-        }
-        char dir_template[PATH_MAX];
-        snprintf(dir_template, sizeof(dir_template), "%s/local/tmp/prop-XXXXXX", android_data);
-        char *dirname = mkdtemp(dir_template);
-        if (!dirname) {
-            printf("making temp file for test state failed (is %s/local/tmp writable?): %s\n",
-                   android_data, strerror(errno));
-            return;
-        }
-
-        old_pa = __system_property_area__;
-        __system_property_area__ = NULL;
-
-        pa_dirname = dirname;
-        pa_filename = pa_dirname + "/__properties__";
-
-        __system_property_set_filename(pa_filename.c_str());
-        __system_property_area_init();
-
-        names = new char* [nprops];
-        name_lens = new int[nprops];
-        values = new char* [nprops];
-        value_lens = new int[nprops];
-
-        srandom(nprops);
-
-        for (int i = 0; i < nprops; i++) {
-            // Make sure the name has at least 10 characters to make
-            // it very unlikely to generate the same random name.
-            name_lens[i] = (random() % (PROP_NAME_MAX - 10)) + 10;
-            names[i] = new char[PROP_NAME_MAX + 1];
-            size_t prop_name_len = sizeof(prop_name_chars) - 1;
-            for (int j = 0; j < name_lens[i]; j++) {
-                if (j == 0 || names[i][j-1] == '.' || j == name_lens[i] - 1) {
-                    // Certain values are not allowed:
-                    // - Don't start name with '.'
-                    // - Don't allow '.' to appear twice in a row
-                    // - Don't allow the name to end with '.'
-                    // This assumes that '.' is the last character in the
-                    // array so that decrementing the length by one removes
-                    // the value from the possible values.
-                    prop_name_len--;
-                }
-                names[i][j] = prop_name_chars[random() % prop_name_len];
-            }
-            names[i][name_lens[i]] = 0;
-
-            // Make sure the value contains at least 1 character.
-            value_lens[i] = (random() % (PROP_VALUE_MAX - 1)) + 1;
-            values[i] = new char[PROP_VALUE_MAX];
-            for (int j = 0; j < value_lens[i]; j++) {
-                values[i][j] = prop_name_chars[random() % (sizeof(prop_name_chars) - 1)];
-            }
-
-            if (__system_property_add(names[i], name_lens[i], values[i], value_lens[i]) < 0) {
-                printf("Failed to add a property, terminating...\n");
-                printf("%s = %.*s\n", names[i], value_lens[i], values[i]);
-                exit(1);
-            }
-        }
-
-        valid = true;
+    const char* android_data = getenv("ANDROID_DATA");
+    if (android_data == NULL) {
+      printf("ANDROID_DATA environment variable not set\n");
+      return;
+    }
+    char dir_template[PATH_MAX];
+    snprintf(dir_template, sizeof(dir_template), "%s/local/tmp/prop-XXXXXX", android_data);
+    char* dirname = mkdtemp(dir_template);
+    if (!dirname) {
+      printf("making temp file for test state failed (is %s/local/tmp writable?): %s\n",
+             android_data, strerror(errno));
+      return;
     }
 
-    ~LocalPropertyTestState() {
-        if (!valid)
-            return;
+    old_pa = __system_property_area__;
+    __system_property_area__ = NULL;
 
-        __system_property_area__ = old_pa;
+    pa_dirname = dirname;
+    pa_filename = pa_dirname + "/__properties__";
 
-        __system_property_set_filename(PROP_FILENAME);
-        unlink(pa_filename.c_str());
-        rmdir(pa_dirname.c_str());
+    __system_property_set_filename(pa_filename.c_str());
+    __system_property_area_init();
 
-        for (int i = 0; i < nprops; i++) {
-            delete names[i];
-            delete values[i];
+    names = new char* [nprops];
+    name_lens = new int[nprops];
+    values = new char* [nprops];
+    value_lens = new int[nprops];
+
+    srandom(nprops);
+
+    for (int i = 0; i < nprops; i++) {
+      // Make sure the name has at least 10 characters to make
+      // it very unlikely to generate the same random name.
+      name_lens[i] = (random() % (PROP_NAME_MAX - 10)) + 10;
+      names[i] = new char[PROP_NAME_MAX + 1];
+      size_t prop_name_len = sizeof(prop_name_chars) - 1;
+      for (int j = 0; j < name_lens[i]; j++) {
+        if (j == 0 || names[i][j-1] == '.' || j == name_lens[i] - 1) {
+          // Certain values are not allowed:
+          // - Don't start name with '.'
+          // - Don't allow '.' to appear twice in a row
+          // - Don't allow the name to end with '.'
+          // This assumes that '.' is the last character in the
+          // array so that decrementing the length by one removes
+          // the value from the possible values.
+          prop_name_len--;
         }
-        delete[] names;
-        delete[] name_lens;
-        delete[] values;
-        delete[] value_lens;
+        names[i][j] = prop_name_chars[random() % prop_name_len];
+      }
+      names[i][name_lens[i]] = 0;
+
+      // Make sure the value contains at least 1 character.
+      value_lens[i] = (random() % (PROP_VALUE_MAX - 1)) + 1;
+      values[i] = new char[PROP_VALUE_MAX];
+      for (int j = 0; j < value_lens[i]; j++) {
+        values[i][j] = prop_name_chars[random() % (sizeof(prop_name_chars) - 1)];
+      }
+
+      if (__system_property_add(names[i], name_lens[i], values[i], value_lens[i]) < 0) {
+        printf("Failed to add a property, terminating...\n");
+        printf("%s = %.*s\n", names[i], value_lens[i], values[i]);
+        exit(1);
+      }
     }
+
+    valid = true;
+  }
+
+  ~LocalPropertyTestState() {
+    if (!valid)
+      return;
+
+    __system_property_area__ = old_pa;
+
+    __system_property_set_filename(PROP_FILENAME);
+    unlink(pa_filename.c_str());
+    rmdir(pa_dirname.c_str());
+
+    for (int i = 0; i < nprops; i++) {
+      delete names[i];
+      delete values[i];
+    }
+    delete[] names;
+    delete[] name_lens;
+    delete[] values;
+    delete[] value_lens;
+  }
 public:
-    const int nprops;
-    char **names;
-    int *name_lens;
-    char **values;
-    int *value_lens;
-    bool valid;
+  const int nprops;
+  char** names;
+  int* name_lens;
+  char** values;
+  int* value_lens;
+  bool valid;
 
 private:
-    std::string pa_dirname;
-    std::string pa_filename;
-    void *old_pa;
+  std::string pa_dirname;
+  std::string pa_filename;
+  void* old_pa;
 };
 
-static void BM_property_get(int iters, int nprops)
-{
-    StopBenchmarkTiming();
+BENCHMARK_WITH_ARG(BM_property_get, int)->TEST_NUM_PROPS;
+void BM_property_get::Run(int iters, int nprops) {
+  StopBenchmarkTiming();
 
-    LocalPropertyTestState pa(nprops);
-    char value[PROP_VALUE_MAX];
+  LocalPropertyTestState pa(nprops);
+  char value[PROP_VALUE_MAX];
 
-    if (!pa.valid)
-        return;
+  if (!pa.valid)
+    return;
 
-    srandom(iters * nprops);
+  srandom(iters * nprops);
 
-    StartBenchmarkTiming();
+  StartBenchmarkTiming();
 
-    for (int i = 0; i < iters; i++) {
-        __system_property_get(pa.names[random() % nprops], value);
-    }
-    StopBenchmarkTiming();
+  for (int i = 0; i < iters; i++) {
+    __system_property_get(pa.names[random() % nprops], value);
+  }
+  StopBenchmarkTiming();
 }
-BENCHMARK(BM_property_get)->TEST_NUM_PROPS;
 
-static void BM_property_find(int iters, int nprops)
-{
-    StopBenchmarkTiming();
+BENCHMARK_WITH_ARG(BM_property_find, int)->TEST_NUM_PROPS;
+void BM_property_find::Run(int iters, int nprops) {
+  StopBenchmarkTiming();
 
-    LocalPropertyTestState pa(nprops);
+  LocalPropertyTestState pa(nprops);
 
-    if (!pa.valid)
-        return;
+  if (!pa.valid)
+    return;
 
-    srandom(iters * nprops);
+  srandom(iters * nprops);
 
-    StartBenchmarkTiming();
+  StartBenchmarkTiming();
 
-    for (int i = 0; i < iters; i++) {
-        __system_property_find(pa.names[random() % nprops]);
-    }
-    StopBenchmarkTiming();
+  for (int i = 0; i < iters; i++) {
+    __system_property_find(pa.names[random() % nprops]);
+  }
+  StopBenchmarkTiming();
 }
-BENCHMARK(BM_property_find)->TEST_NUM_PROPS;
 
-static void BM_property_read(int iters, int nprops)
-{
-    StopBenchmarkTiming();
+BENCHMARK_WITH_ARG(BM_property_read, int)->TEST_NUM_PROPS;
+void BM_property_read::Run(int iters, int nprops) {
+  StopBenchmarkTiming();
 
-    LocalPropertyTestState pa(nprops);
+  LocalPropertyTestState pa(nprops);
 
-    if (!pa.valid)
-        return;
+  if (!pa.valid)
+    return;
 
-    srandom(iters * nprops);
-    const prop_info** pinfo = new const prop_info*[iters];
-    char propvalue[PROP_VALUE_MAX];
+  srandom(iters * nprops);
+  const prop_info** pinfo = new const prop_info*[iters];
+  char propvalue[PROP_VALUE_MAX];
 
-    for (int i = 0; i < iters; i++) {
-        pinfo[i] = __system_property_find(pa.names[random() % nprops]);
-    }
+  for (int i = 0; i < iters; i++) {
+    pinfo[i] = __system_property_find(pa.names[random() % nprops]);
+  }
 
-    StartBenchmarkTiming();
-    for (int i = 0; i < iters; i++) {
-        __system_property_read(pinfo[i], 0, propvalue);
-    }
-    StopBenchmarkTiming();
+  StartBenchmarkTiming();
+  for (int i = 0; i < iters; i++) {
+    __system_property_read(pinfo[i], 0, propvalue);
+  }
+  StopBenchmarkTiming();
 
-    delete[] pinfo;
+  delete[] pinfo;
 }
-BENCHMARK(BM_property_read)->TEST_NUM_PROPS;
 
-static void BM_property_serial(int iters, int nprops)
-{
-    StopBenchmarkTiming();
+BENCHMARK_WITH_ARG(BM_property_serial, int)->TEST_NUM_PROPS;
+void BM_property_serial::Run(int iters, int nprops) {
+  StopBenchmarkTiming();
 
-    LocalPropertyTestState pa(nprops);
+  LocalPropertyTestState pa(nprops);
 
-    if (!pa.valid)
-        return;
+  if (!pa.valid)
+    return;
 
-    srandom(iters * nprops);
-    const prop_info** pinfo = new const prop_info*[iters];
+  srandom(iters * nprops);
+  const prop_info** pinfo = new const prop_info*[iters];
 
-    for (int i = 0; i < iters; i++) {
-        pinfo[i] = __system_property_find(pa.names[random() % nprops]);
-    }
+  for (int i = 0; i < iters; i++) {
+    pinfo[i] = __system_property_find(pa.names[random() % nprops]);
+  }
 
-    StartBenchmarkTiming();
-    for (int i = 0; i < iters; i++) {
-        __system_property_serial(pinfo[i]);
-    }
-    StopBenchmarkTiming();
+  StartBenchmarkTiming();
+  for (int i = 0; i < iters; i++) {
+    __system_property_serial(pinfo[i]);
+  }
+  StopBenchmarkTiming();
 
-    delete[] pinfo;
+  delete[] pinfo;
 }
-BENCHMARK(BM_property_serial)->TEST_NUM_PROPS;
+
+#endif  // __BIONIC__
diff --git a/benchmarks/pthread_benchmark.cpp b/benchmarks/pthread_benchmark.cpp
index 42023e0..2f6572d 100644
--- a/benchmarks/pthread_benchmark.cpp
+++ b/benchmarks/pthread_benchmark.cpp
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
 #include <pthread.h>
 
+#include <benchmark/Benchmark.h>
+
 // Stop GCC optimizing out our pure function.
 /* Must not be static! */ pthread_t (*pthread_self_fp)() = pthread_self;
 
-static void BM_pthread_self(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_self);
+void BM_pthread_self::Run(int iters) {
   StartBenchmarkTiming();
 
   for (int i = 0; i < iters; ++i) {
@@ -30,9 +31,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_pthread_self);
 
-static void BM_pthread_getspecific(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_getspecific);
+void BM_pthread_getspecific::Run(int iters) {
   StopBenchmarkTiming();
   pthread_key_t key;
   pthread_key_create(&key, NULL);
@@ -45,9 +46,9 @@
   StopBenchmarkTiming();
   pthread_key_delete(key);
 }
-BENCHMARK(BM_pthread_getspecific);
 
-static void BM_pthread_setspecific(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_setspecific);
+void BM_pthread_setspecific::Run(int iters) {
   StopBenchmarkTiming();
   pthread_key_t key;
   pthread_key_create(&key, NULL);
@@ -60,12 +61,12 @@
   StopBenchmarkTiming();
   pthread_key_delete(key);
 }
-BENCHMARK(BM_pthread_setspecific);
 
 static void DummyPthreadOnceInitFunction() {
 }
 
-static void BM_pthread_once(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_once);
+void BM_pthread_once::Run(int iters) {
   StopBenchmarkTiming();
   pthread_once_t once = PTHREAD_ONCE_INIT;
   pthread_once(&once, DummyPthreadOnceInitFunction);
@@ -77,9 +78,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_pthread_once);
 
-static void BM_pthread_mutex_lock(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_mutex_lock);
+void BM_pthread_mutex_lock::Run(int iters) {
   StopBenchmarkTiming();
   pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
   StartBenchmarkTiming();
@@ -91,9 +92,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_pthread_mutex_lock);
 
-static void BM_pthread_mutex_lock_ERRORCHECK(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_mutex_lock_ERRORCHECK);
+void BM_pthread_mutex_lock_ERRORCHECK::Run(int iters) {
   StopBenchmarkTiming();
   pthread_mutex_t mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
   StartBenchmarkTiming();
@@ -105,9 +106,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_pthread_mutex_lock_ERRORCHECK);
 
-static void BM_pthread_mutex_lock_RECURSIVE(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_mutex_lock_RECURSIVE);
+void BM_pthread_mutex_lock_RECURSIVE::Run(int iters) {
   StopBenchmarkTiming();
   pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
   StartBenchmarkTiming();
@@ -119,9 +120,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_pthread_mutex_lock_RECURSIVE);
 
-static void BM_pthread_rw_lock_read(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_rw_lock_read);
+void BM_pthread_rw_lock_read::Run(int iters) {
   StopBenchmarkTiming();
   pthread_rwlock_t lock;
   pthread_rwlock_init(&lock, NULL);
@@ -135,9 +136,9 @@
   StopBenchmarkTiming();
   pthread_rwlock_destroy(&lock);
 }
-BENCHMARK(BM_pthread_rw_lock_read);
 
-static void BM_pthread_rw_lock_write(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_rw_lock_write);
+void BM_pthread_rw_lock_write::Run(int iters) {
   StopBenchmarkTiming();
   pthread_rwlock_t lock;
   pthread_rwlock_init(&lock, NULL);
@@ -151,13 +152,13 @@
   StopBenchmarkTiming();
   pthread_rwlock_destroy(&lock);
 }
-BENCHMARK(BM_pthread_rw_lock_write);
 
 static void* IdleThread(void*) {
   return NULL;
 }
 
-static void BM_pthread_create(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_create);
+void BM_pthread_create::Run(int iters) {
   StopBenchmarkTiming();
   pthread_t thread;
 
@@ -168,43 +169,45 @@
     pthread_join(thread, NULL);
   }
 }
-BENCHMARK(BM_pthread_create);
 
-static void* RunThread(void*) {
-  StopBenchmarkTiming();
+static void* RunThread(void* arg) {
+  ::testing::Benchmark* benchmark = reinterpret_cast<::testing::Benchmark*>(arg);
+  benchmark->StopBenchmarkTiming();
   return NULL;
 }
 
-static void BM_pthread_create_and_run(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_create_and_run);
+void BM_pthread_create_and_run::Run(int iters) {
   StopBenchmarkTiming();
   pthread_t thread;
 
   for (int i = 0; i < iters; ++i) {
     StartBenchmarkTiming();
-    pthread_create(&thread, NULL, RunThread, NULL);
+    pthread_create(&thread, NULL, RunThread, this);
     pthread_join(thread, NULL);
   }
 }
-BENCHMARK(BM_pthread_create_and_run);
 
-static void* ExitThread(void*) {
-  StartBenchmarkTiming();
+static void* ExitThread(void* arg) {
+  ::testing::Benchmark* benchmark = reinterpret_cast<::testing::Benchmark*>(arg);
+  benchmark->StartBenchmarkTiming();
   pthread_exit(NULL);
 }
 
-static void BM_pthread_exit_and_join(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_exit_and_join);
+void BM_pthread_exit_and_join::Run(int iters) {
   StopBenchmarkTiming();
   pthread_t thread;
 
   for (int i = 0; i < iters; ++i) {
-    pthread_create(&thread, NULL, ExitThread, NULL);
+    pthread_create(&thread, NULL, ExitThread, this);
     pthread_join(thread, NULL);
     StopBenchmarkTiming();
   }
 }
-BENCHMARK(BM_pthread_exit_and_join);
 
-static void BM_pthread_key_create(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_key_create);
+void BM_pthread_key_create::Run(int iters) {
   StopBenchmarkTiming();
   pthread_key_t key;
 
@@ -215,9 +218,9 @@
     pthread_key_delete(key);
   }
 }
-BENCHMARK(BM_pthread_key_create);
 
-static void BM_pthread_key_delete(int iters) {
+BENCHMARK_NO_ARG(BM_pthread_key_delete);
+void BM_pthread_key_delete::Run(int iters) {
   StopBenchmarkTiming();
   pthread_key_t key;
 
@@ -228,4 +231,3 @@
     StopBenchmarkTiming();
   }
 }
-BENCHMARK(BM_pthread_key_delete);
diff --git a/benchmarks/semaphore_benchmark.cpp b/benchmarks/semaphore_benchmark.cpp
index 974b046..8dd5684 100644
--- a/benchmarks/semaphore_benchmark.cpp
+++ b/benchmarks/semaphore_benchmark.cpp
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
 #include <pthread.h>
 #include <semaphore.h>
 #include <stdatomic.h>
 #include <stdio.h>
 
-static void BM_semaphore_sem_getvalue(int iters) {
+#include <benchmark/Benchmark.h>
+
+BENCHMARK_NO_ARG(BM_semaphore_sem_getvalue);
+void BM_semaphore_sem_getvalue::Run(int iters) {
   StopBenchmarkTiming();
   sem_t semaphore;
   sem_init(&semaphore, 1, 1);
@@ -34,9 +35,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_semaphore_sem_getvalue);
 
-static void BM_semaphore_sem_wait_sem_post(int iters) {
+BENCHMARK_NO_ARG(BM_semaphore_sem_wait_sem_post);
+void BM_semaphore_sem_wait_sem_post::Run(int iters) {
   StopBenchmarkTiming();
   sem_t semaphore;
   sem_init(&semaphore, 1, 1);
@@ -49,7 +50,6 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_semaphore_sem_wait_sem_post);
 
 /*
  *    This test reports the overhead of the underlying futex wake syscall on
@@ -87,7 +87,8 @@
     return NULL;
 }
 
-static void BM_semaphore_sem_post(int iters) {
+BENCHMARK_NO_ARG(BM_semaphore_sem_post);
+void BM_semaphore_sem_post::Run(int iters) {
   StopBenchmarkTiming();
 
   sem_t semaphore;
@@ -100,9 +101,6 @@
   pthread_attr_setschedparam(&attr, &param);
   pthread_attr_setschedpolicy(&attr, SCHED_OTHER);
   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-#ifdef PTHREAD_SET_INHERIT_SCHED
-  pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
-#endif
   pthread_t pthread;
   pthread_create(&pthread, &attr, BM_semaphore_sem_post_start_thread, &semaphore);
   pthread_attr_destroy(&attr);
@@ -143,99 +141,3 @@
     sched_yield();
   } while (!BM_semaphore_sem_post_running);
 }
-BENCHMARK(BM_semaphore_sem_post);
-
-/*
- *    This test reports the overhead of sem_post to sem_wake. A circle of
- * num_semaphore - 1 threads are run on a set of semaphores to measure the
- * activity. One can calculate the sem_wake overhead alone by:
- *
- * BM_semaphore_sem_post_sem_wait - BM_semaphore_sem_post - BM_time_clock_gettime
- *
- * Differences will result if there are more threads than active processors,
- * there will be delay induced when scheduling the processes. This cost is
- * measured by trying different values of num_semaphore. The governor selected
- * will have a major impact on the results for a large number of threads.
- *
- *     To reduce the chances for threads racing ahead and not triggering the
- * futex, for example the background threads finish their job before the
- * sem_wait is hit in the main thread, the background threads will run at
- * batch priority and the main thread at fifo priority. This should generally
- * guarantee the main thread completes its task of priming itself with the
- * sem_wait before the other threads can start. In practice without the
- * sched mechanics here, this works on Android configured kernels, this is
- * insurance for wacky(tm) sched configurations.
- */
-static void *BM_semaphore_sem_post_sem_wait_start_thread(void *obj) {
-  sem_t *semaphore = reinterpret_cast<sem_t *>(obj);
-
-  while ((BM_semaphore_sem_post_running > 0) && !sem_wait(semaphore)) {
-    sem_post(semaphore + 1);
-  }
-  --BM_semaphore_sem_post_running;
-  return NULL;
-}
-
-static void BM_semaphore_sem_post_sem_wait_num(int iters, int num_semaphore) {
-  StopBenchmarkTiming();
-
-  sem_t semaphore[num_semaphore];
-
-  for (int i = 0; i < num_semaphore; ++i) {
-    sem_init(semaphore + i, 0, 0);
-  }
-
-  pthread_attr_t attr;
-  pthread_attr_init(&attr);
-  BM_semaphore_sem_post_running = 1;
-  struct sched_param param = { 0, };
-  pthread_attr_setschedparam(&attr, &param);
-  pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
-  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-#ifdef PTHREAD_SET_INHERIT_SCHED
-  pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
-#endif
-  for (int i = 0; i < (num_semaphore - 1); ++i) {
-    pthread_t pthread;
-    pthread_create(&pthread, &attr, BM_semaphore_sem_post_sem_wait_start_thread, semaphore + i);
-  }
-  pthread_attr_destroy(&attr);
-  sched_yield();
-
-  param.sched_priority = 1;
-  sched_setscheduler((pid_t)0, SCHED_FIFO, &param);
-
-  StartBenchmarkTiming();
-
-  for (int i = 0; i < iters; i += num_semaphore) {
-    sem_post(semaphore);
-    sem_wait(semaphore + num_semaphore - 1);
-  }
-
-  StopBenchmarkTiming();
-
-  param.sched_priority = 0;
-  sched_setscheduler((pid_t)0, SCHED_OTHER, &param);
-
-  if (BM_semaphore_sem_post_running > 0) {
-    BM_semaphore_sem_post_running = 0;
-  }
-  for (int i = 0;
-       (i < (10 * num_semaphore)) && (BM_semaphore_sem_post_running > (1 - num_semaphore));
-       ++i) {
-    for (int j = 0; j < (num_semaphore - 1); ++j) {
-      sem_post(semaphore + j);
-    }
-    sched_yield();
-  }
-}
-
-static void BM_semaphore_sem_post_sem_wait_low(int iters) {
-    BM_semaphore_sem_post_sem_wait_num(iters, 2);
-}
-BENCHMARK(BM_semaphore_sem_post_sem_wait_low);
-
-static void BM_semaphore_sem_post_sem_wait_high(int iters) {
-    BM_semaphore_sem_post_sem_wait_num(iters, 100);
-}
-BENCHMARK(BM_semaphore_sem_post_sem_wait_high);
diff --git a/benchmarks/stdio_benchmark.cpp b/benchmarks/stdio_benchmark.cpp
index 5658a50..342e561 100644
--- a/benchmarks/stdio_benchmark.cpp
+++ b/benchmarks/stdio_benchmark.cpp
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
 #include <stdio.h>
 #include <stdio_ext.h>
 
+#include <benchmark/Benchmark.h>
+
 #define KB 1024
 #define MB 1024*KB
 
@@ -27,12 +27,12 @@
     Arg(1*KB)->Arg(4*KB)->Arg(8*KB)->Arg(16*KB)->Arg(64*KB)
 
 template <typename Fn>
-static void ReadWriteTest(int iters, int chunk_size, Fn f, bool buffered) {
-  StopBenchmarkTiming();
+void ReadWriteTest(::testing::Benchmark* benchmark, int iters, int chunk_size, Fn f, bool buffered) {
+  benchmark->StopBenchmarkTiming();
   FILE* fp = fopen("/dev/zero", "rw");
   __fsetlocking(fp, FSETLOCKING_BYCALLER);
   char* buf = new char[chunk_size];
-  StartBenchmarkTiming();
+  benchmark->StartBenchmarkTiming();
 
   if (!buffered) {
     setvbuf(fp, 0, _IONBF, 0);
@@ -42,31 +42,31 @@
     f(buf, chunk_size, 1, fp);
   }
 
-  StopBenchmarkTiming();
-  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(chunk_size));
+  benchmark->StopBenchmarkTiming();
+  benchmark->SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(chunk_size));
   delete[] buf;
   fclose(fp);
 }
 
-static void BM_stdio_fread(int iters, int chunk_size) {
-  ReadWriteTest(iters, chunk_size, fread, true);
+BENCHMARK_WITH_ARG(BM_stdio_fread, int)->AT_COMMON_SIZES;
+void BM_stdio_fread::Run(int iters, int chunk_size) {
+  ReadWriteTest(this, iters, chunk_size, fread, true);
 }
-BENCHMARK(BM_stdio_fread)->AT_COMMON_SIZES;
 
-static void BM_stdio_fwrite(int iters, int chunk_size) {
-  ReadWriteTest(iters, chunk_size, fwrite, true);
+BENCHMARK_WITH_ARG(BM_stdio_fwrite, int)->AT_COMMON_SIZES;
+void BM_stdio_fwrite::Run(int iters, int chunk_size) {
+  ReadWriteTest(this, iters, chunk_size, fwrite, true);
 }
-BENCHMARK(BM_stdio_fwrite)->AT_COMMON_SIZES;
 
-static void BM_stdio_fread_unbuffered(int iters, int chunk_size) {
-  ReadWriteTest(iters, chunk_size, fread, false);
+BENCHMARK_WITH_ARG(BM_stdio_fread_unbuffered, int)->AT_COMMON_SIZES;
+void BM_stdio_fread_unbuffered::Run(int iters, int chunk_size) {
+  ReadWriteTest(this, iters, chunk_size, fread, false);
 }
-BENCHMARK(BM_stdio_fread_unbuffered)->AT_COMMON_SIZES;
 
-static void BM_stdio_fwrite_unbuffered(int iters, int chunk_size) {
-  ReadWriteTest(iters, chunk_size, fwrite, false);
+BENCHMARK_WITH_ARG(BM_stdio_fwrite_unbuffered, int)->AT_COMMON_SIZES;
+void BM_stdio_fwrite_unbuffered::Run(int iters, int chunk_size) {
+  ReadWriteTest(this, iters, chunk_size, fwrite, false);
 }
-BENCHMARK(BM_stdio_fwrite_unbuffered)->AT_COMMON_SIZES;
 
 static void FopenFgetsFclose(int iters, bool no_locking) {
   char buf[1024];
@@ -78,12 +78,16 @@
   }
 }
 
-static void BM_stdio_fopen_fgets_fclose_locking(int iters) {
+BENCHMARK_NO_ARG(BM_stdio_fopen_fgets_fclose_locking);
+void BM_stdio_fopen_fgets_fclose_locking::Run(int iters) {
+  StartBenchmarkTiming();
   FopenFgetsFclose(iters, false);
+  StopBenchmarkTiming();
 }
-BENCHMARK(BM_stdio_fopen_fgets_fclose_locking);
 
-static void BM_stdio_fopen_fgets_fclose_no_locking(int iters) {
+BENCHMARK_NO_ARG(BM_stdio_fopen_fgets_fclose_no_locking);
+void BM_stdio_fopen_fgets_fclose_no_locking::Run(int iters) {
+  StartBenchmarkTiming();
   FopenFgetsFclose(iters, true);
+  StopBenchmarkTiming();
 }
-BENCHMARK(BM_stdio_fopen_fgets_fclose_no_locking);
diff --git a/benchmarks/string_benchmark.cpp b/benchmarks/string_benchmark.cpp
index 536e253..866aa00 100644
--- a/benchmarks/string_benchmark.cpp
+++ b/benchmarks/string_benchmark.cpp
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
+#include <stdint.h>
 #include <string.h>
 
+#include <benchmark/Benchmark.h>
+
 #define KB 1024
 #define MB 1024*KB
 
@@ -26,7 +27,8 @@
 
 // TODO: test unaligned operation too? (currently everything will be 8-byte aligned by malloc.)
 
-static void BM_string_memcmp(int iters, int nbytes) {
+BENCHMARK_WITH_ARG(BM_string_memcmp, int)->AT_COMMON_SIZES;
+void BM_string_memcmp::Run(int iters, int nbytes) {
   StopBenchmarkTiming();
   char* src = new char[nbytes]; char* dst = new char[nbytes];
   memset(src, 'x', nbytes);
@@ -39,13 +41,13 @@
   }
 
   StopBenchmarkTiming();
-  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  SetBenchmarkBytesProcessed(uint64_t(iters) * uint64_t(nbytes));
   delete[] src;
   delete[] dst;
 }
-BENCHMARK(BM_string_memcmp)->AT_COMMON_SIZES;
 
-static void BM_string_memcpy(int iters, int nbytes) {
+BENCHMARK_WITH_ARG(BM_string_memcpy, int)->AT_COMMON_SIZES;
+void BM_string_memcpy::Run(int iters, int nbytes) {
   StopBenchmarkTiming();
   char* src = new char[nbytes]; char* dst = new char[nbytes];
   memset(src, 'x', nbytes);
@@ -56,13 +58,13 @@
   }
 
   StopBenchmarkTiming();
-  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  SetBenchmarkBytesProcessed(uint64_t(iters) * uint64_t(nbytes));
   delete[] src;
   delete[] dst;
 }
-BENCHMARK(BM_string_memcpy)->AT_COMMON_SIZES;
 
-static void BM_string_memmove(int iters, int nbytes) {
+BENCHMARK_WITH_ARG(BM_string_memmove, int)->AT_COMMON_SIZES;
+void BM_string_memmove::Run(int iters, int nbytes) {
   StopBenchmarkTiming();
   char* buf = new char[nbytes + 64];
   memset(buf, 'x', nbytes + 64);
@@ -73,12 +75,12 @@
   }
 
   StopBenchmarkTiming();
-  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  SetBenchmarkBytesProcessed(uint64_t(iters) * uint64_t(nbytes));
   delete[] buf;
 }
-BENCHMARK(BM_string_memmove)->AT_COMMON_SIZES;
 
-static void BM_string_memset(int iters, int nbytes) {
+BENCHMARK_WITH_ARG(BM_string_memset, int)->AT_COMMON_SIZES;
+void BM_string_memset::Run(int iters, int nbytes) {
   StopBenchmarkTiming();
   char* dst = new char[nbytes];
   StartBenchmarkTiming();
@@ -88,12 +90,12 @@
   }
 
   StopBenchmarkTiming();
-  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  SetBenchmarkBytesProcessed(uint64_t(iters) * uint64_t(nbytes));
   delete[] dst;
 }
-BENCHMARK(BM_string_memset)->AT_COMMON_SIZES;
 
-static void BM_string_strlen(int iters, int nbytes) {
+BENCHMARK_WITH_ARG(BM_string_strlen, int)->AT_COMMON_SIZES;
+void BM_string_strlen::Run(int iters, int nbytes) {
   StopBenchmarkTiming();
   char* s = new char[nbytes];
   memset(s, 'x', nbytes);
@@ -106,7 +108,6 @@
   }
 
   StopBenchmarkTiming();
-  SetBenchmarkBytesProcessed(int64_t(iters) * int64_t(nbytes));
+  SetBenchmarkBytesProcessed(uint64_t(iters) * uint64_t(nbytes));
   delete[] s;
 }
-BENCHMARK(BM_string_strlen)->AT_COMMON_SIZES;
diff --git a/benchmarks/time_benchmark.cpp b/benchmarks/time_benchmark.cpp
index f093ec1..6688bbc 100644
--- a/benchmarks/time_benchmark.cpp
+++ b/benchmarks/time_benchmark.cpp
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
-#include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <time.h>
 
-static void BM_time_clock_gettime(int iters) {
+#include <benchmark/Benchmark.h>
+
+BENCHMARK_NO_ARG(BM_time_clock_gettime);
+void BM_time_clock_gettime::Run(int iters) {
   StartBenchmarkTiming();
 
   timespec t;
@@ -31,9 +31,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_time_clock_gettime);
 
-static void BM_time_clock_gettime_syscall(int iters) {
+BENCHMARK_NO_ARG(BM_time_clock_gettime_syscall);
+void BM_time_clock_gettime_syscall::Run(int iters) {
   StartBenchmarkTiming();
 
   timespec t;
@@ -43,9 +43,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_time_clock_gettime_syscall);
 
-static void BM_time_gettimeofday(int iters) {
+BENCHMARK_NO_ARG(BM_time_gettimeofday);
+void BM_time_gettimeofday::Run(int iters) {
   StartBenchmarkTiming();
 
   timeval tv;
@@ -55,9 +55,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_time_gettimeofday);
 
-static void BM_time_gettimeofday_syscall(int iters) {
+BENCHMARK_NO_ARG(BM_time_gettimeofday_syscall);
+void BM_time_gettimeofday_syscall::Run(int iters) {
   StartBenchmarkTiming();
 
   timeval tv;
@@ -67,9 +67,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_time_gettimeofday_syscall);
 
-static void BM_time_time(int iters) {
+BENCHMARK_NO_ARG(BM_time_time);
+void BM_time_time::Run(int iters) {
   StartBenchmarkTiming();
 
   for (int i = 0; i < iters; ++i) {
@@ -78,4 +78,3 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_time_time);
diff --git a/benchmarks/unistd_benchmark.cpp b/benchmarks/unistd_benchmark.cpp
index 94be1dd..09ca0e6 100644
--- a/benchmarks/unistd_benchmark.cpp
+++ b/benchmarks/unistd_benchmark.cpp
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#include "benchmark.h"
-
 #include <sys/syscall.h>
 #include <unistd.h>
 
-static void BM_unistd_getpid(int iters) {
+#include <benchmark/Benchmark.h>
+
+BENCHMARK_NO_ARG(BM_unistd_getpid);
+void BM_unistd_getpid::Run(int iters) {
   StartBenchmarkTiming();
 
   for (int i = 0; i < iters; ++i) {
@@ -28,9 +29,9 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_unistd_getpid);
 
-static void BM_unistd_getpid_syscall(int iters) {
+BENCHMARK_NO_ARG(BM_unistd_getpid_syscall);
+void BM_unistd_getpid_syscall::Run(int iters) {
   StartBenchmarkTiming();
 
   for (int i = 0; i < iters; ++i) {
@@ -39,14 +40,14 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_unistd_getpid_syscall);
 
 #if defined(__BIONIC__)
 
 // Stop GCC optimizing out our pure function.
 /* Must not be static! */ pid_t (*gettid_fp)() = gettid;
 
-static void BM_unistd_gettid(int iters) {
+BENCHMARK_NO_ARG(BM_unistd_gettid);
+void BM_unistd_gettid::Run(int iters) {
   StartBenchmarkTiming();
 
   for (int i = 0; i < iters; ++i) {
@@ -55,11 +56,11 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_unistd_gettid);
 
 #endif
 
-static void BM_unistd_gettid_syscall(int iters) {
+BENCHMARK_NO_ARG(BM_unistd_gettid_syscall);
+void BM_unistd_gettid_syscall::Run(int iters) {
   StartBenchmarkTiming();
 
   for (int i = 0; i < iters; ++i) {
@@ -68,4 +69,3 @@
 
   StopBenchmarkTiming();
 }
-BENCHMARK(BM_unistd_gettid_syscall);
diff --git a/benchmarks/utils.cpp b/benchmarks/utils.cpp
new file mode 100644
index 0000000..863b9db
--- /dev/null
+++ b/benchmarks/utils.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <string>
+
+#include "utils.h"
+
+int Round(int n) {
+  int base = 1;
+  while (base*10 < n) {
+    base *= 10;
+  }
+  if (n < 2*base) {
+    return 2*base;
+  }
+  if (n < 5*base) {
+    return 5*base;
+  }
+  return 10*base;
+}
+
+// Similar to the code in art, but supporting both binary and decimal prefixes.
+std::string PrettyInt(long value, size_t base) {
+  if (base != 2 && base != 10) abort();
+
+  uint64_t count = static_cast<uint64_t>(value);
+  bool negative_number = false;
+  if (value < 0) {
+    negative_number = true;
+    count = static_cast<uint64_t>(-value);
+  }
+
+  // The byte thresholds at which we display amounts. A count is displayed
+  // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
+  static const uint64_t kUnitThresholds2[] = {
+    1024*1024*1024 /* Gi */, 2*1024*1024 /* Mi */, 3*1024 /* Ki */, 0,
+  };
+  static const uint64_t kUnitThresholds10[] = {
+    1000*1000*1000 /* G */, 2*1000*1000 /* M */, 3*1000 /* k */, 0,
+  };
+  static const uint64_t kAmountPerUnit2[] = { 1024*1024*1024, 1024*1024, 1024, 1 };
+  static const uint64_t kAmountPerUnit10[] = { 1000*1000*1000, 1000*1000, 1000, 1 };
+  static const char* const kUnitStrings2[] = { "Gi", "Mi", "Ki", "" };
+  static const char* const kUnitStrings10[] = { "G", "M", "k", "" };
+
+  // Which set are we using?
+  const uint64_t* kUnitThresholds = ((base == 2) ? kUnitThresholds2 : kUnitThresholds10);
+  const uint64_t* kAmountPerUnit = ((base == 2) ? kAmountPerUnit2 : kAmountPerUnit10);
+  const char* const* kUnitStrings = ((base == 2) ? kUnitStrings2 : kUnitStrings10);
+
+  size_t i = 0;
+  for (; kUnitThresholds[i] != 0; ++i) {
+    if (count >= kUnitThresholds[i]) {
+      break;
+    }
+  }
+  char* s = NULL;
+  asprintf(&s, "%s%" PRId64 "%s", (negative_number ? "-" : ""),
+           count / kAmountPerUnit[i], kUnitStrings[i]);
+  std::string result(s);
+  free(s);
+  return result;
+}
diff --git a/benchmarks/utils.h b/benchmarks/utils.h
new file mode 100644
index 0000000..c3c64ba
--- /dev/null
+++ b/benchmarks/utils.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BENCHMARKS_UTILS_H
+#define BENCHMARKS_UTILS_H
+
+#include <stddef.h>
+#include <string>
+
+int Round(int n);
+std::string PrettyInt(long value, size_t base);
+
+#endif  // BENCHMARKS_UTILS_H
diff --git a/libc/Android.mk b/libc/Android.mk
index b5e4899..cb1d8c0 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -116,6 +116,7 @@
     bionic/error.cpp \
     bionic/eventfd_read.cpp \
     bionic/eventfd_write.cpp \
+    bionic/faccessat.cpp \
     bionic/fchmod.cpp \
     bionic/fchmodat.cpp \
     bionic/ffs.cpp \
@@ -143,6 +144,7 @@
     bionic/mbrtoc16.cpp \
     bionic/mbrtoc32.cpp \
     bionic/mbstate.cpp \
+    bionic/mempcpy.cpp \
     bionic/mkdir.cpp \
     bionic/mkfifo.cpp \
     bionic/mknod.cpp \
@@ -214,6 +216,7 @@
     bionic/wait.cpp \
     bionic/wchar.cpp \
     bionic/wctype.cpp \
+    bionic/wmempcpy.cpp \
 
 libc_bionic_src_files :=
 
@@ -1123,7 +1126,7 @@
 include $(CLEAR_VARS)
 
 LOCAL_MODULE := libc_ndk
-LOCAL_CLANG := true
+LOCAL_CLANG := $(use_clang)
 LOCAL_ASFLAGS := $(LOCAL_CFLAGS)
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CFLAGS := $(libc_common_cflags) -fvisibility=hidden -O0
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index aae7de7..150dd14 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -130,7 +130,7 @@
 int __getdents64:getdents64(unsigned int, struct dirent*, unsigned int)   arm,arm64,mips,mips64,x86,x86_64
 
 int __openat:openat(int, const char*, int, mode_t) all
-int faccessat(int, const char*, int, int)  all
+int ___faccessat:faccessat(int, const char*, int)  all
 int ___fchmodat:fchmodat(int, const char*, mode_t)  all
 int fchownat(int, const char*, uid_t, gid_t, int)  all
 int fstatat64|fstatat:fstatat64(int, const char*, struct stat*, int)   arm,mips,x86
diff --git a/libc/arch-arm/syscalls/faccessat.S b/libc/arch-arm/syscalls/___faccessat.S
similarity index 81%
rename from libc/arch-arm/syscalls/faccessat.S
rename to libc/arch-arm/syscalls/___faccessat.S
index a1df5c0..1d09cf7 100644
--- a/libc/arch-arm/syscalls/faccessat.S
+++ b/libc/arch-arm/syscalls/___faccessat.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(faccessat)
+ENTRY(___faccessat)
     mov     ip, r7
     ldr     r7, =__NR_faccessat
     swi     #0
@@ -11,4 +11,5 @@
     bxls    lr
     neg     r0, r0
     b       __set_errno_internal
-END(faccessat)
+END(___faccessat)
+.hidden ___faccessat
diff --git a/libc/arch-arm64/syscalls/faccessat.S b/libc/arch-arm64/syscalls/___faccessat.S
similarity index 79%
rename from libc/arch-arm64/syscalls/faccessat.S
rename to libc/arch-arm64/syscalls/___faccessat.S
index 4c96cfa..6a41b69 100644
--- a/libc/arch-arm64/syscalls/faccessat.S
+++ b/libc/arch-arm64/syscalls/___faccessat.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(faccessat)
+ENTRY(___faccessat)
     mov     x8, __NR_faccessat
     svc     #0
 
@@ -11,4 +11,5 @@
     b.hi    __set_errno_internal
 
     ret
-END(faccessat)
+END(___faccessat)
+.hidden ___faccessat
diff --git a/libc/arch-mips/bionic/setjmp.S b/libc/arch-mips/bionic/setjmp.S
index 05d0e25..1c26553 100644
--- a/libc/arch-mips/bionic/setjmp.S
+++ b/libc/arch-mips/bionic/setjmp.S
@@ -1,4 +1,31 @@
 /*
+ * Copyright (C) 2014-2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
  * Copyright (c) 2001-2002 Opsycon AB  (www.opsycon.se / www.opsycon.com)
  *
  * Redistribution and use in source and binary forms, with or without
@@ -94,23 +121,31 @@
 #include <private/bionic_asm.h>
 #include <machine/setjmp.h>
 
-/* On Mips32, jmpbuf begins with optional 4-byte filler so that
- *  all saved FP regs are aligned on 8-byte boundary, despite this whole
- *  struct being mis-declared to users as an array of (4-byte) longs.
- *  All the following offsets are then from the rounded-up base addr
+/* jmpbuf is declared to users as an array of longs, which is only
+ * 4-byte aligned in 32-bit builds.  The Mips jmpbuf begins with a
+ * dynamically-sized 0- or 4-byte unused filler so that double-prec FP regs
+ * are saved to 8-byte-aligned mem cells.
+ * All the following jmpbuf offsets are from the rounded-DOWN base addr.
  */
 
 /* Fields of same size on all MIPS abis: */
-#define	SC_MAGIC        (0*4)		/* 4 bytes, identify jmpbuf */
-#define	SC_MASK		(1*4)		/* 4 bytes, saved signal mask */
-#define	SC_FPSR		(2*4)		/* 4 bytes, floating point control/status reg */
-/*     	filler2		(3*4)		   4 bytes, pad to 8-byte boundary */
+/*     	field:		byte offset:	size:						*/
+/*     	dynam filler	(0*4)		   0-4 bytes of rounddown filler, DON'T TOUCH!!
+						often overlays user storage!!		*/
+#define	SC_MAGIC_OFFSET	(1*4)		/* 4 bytes, identify jmpbuf, first actual field */
+#define	SC_FLAG_OFFSET	(2*4)		/* 4 bytes, savesigs flag */
+#define	SC_FPSR_OFFSET	(3*4)		/* 4 bytes, floating point control/status reg */
+/* following fields are 8-byte aligned */
+#define	SC_MASK_OFFSET	(4*4)		/* 16 bytes, mips32/mips64 version of sigset_t */
+#define	SC_SPARE_OFFSET	(8*4)		/* 8 bytes, reserved for future uses */
 
 /* Registers that are 4-byte on mips32 o32, and 8-byte on mips64 n64 abi */
-#define	SC_REGS_SAVED	12		/* ra,gp,sp,s0-s8 */
-#define	SC_REGS		(4*4)		/* SC_REGS_SAVED*REGSZ bytes */
+#define	SC_REGS_OFFSET	(10*4)		/* SC_REGS_BYTES */
+#define	SC_REGS_SAVED	12 /*regs*/	/* ra,s0-s8,gp,sp */
+#define	SC_REGS_BYTES   (SC_REGS_SAVED*REGSZ)
+#define	SC_REGS		SC_REGS_OFFSET
 
-/* Floating pt registers are 8-bytes on all abis,
+/* Double floating pt registers are 8-bytes on all abis,
  * but the number of saved fp regs varies for o32/n32 versus n64 abis:
  */
 
@@ -120,22 +155,20 @@
 #define	SC_FPREGS_SAVED	6  /* even fp regs f20,f22,f24,f26,f28,f30 */
 #endif
 
-#define	SC_FPREGS	(SC_REGS + SC_REGS_SAVED*REGSZ)  /* SC_FPREGS_SAVED*REGSZ_FP bytes */
+#define	SC_FPREGS_OFFSET (SC_REGS_OFFSET + SC_REGS_BYTES)  /* SC_FPREGS_BYTES */
+#define	SC_FPREGS_BYTES (SC_FPREGS_SAVED*REGSZ_FP)
+#define	SC_FPREGS	SC_FPREGS_OFFSET
 
-#define	SC_BYTES	(SC_FPREGS + SC_FPREGS_SAVED*REGSZ_FP)
-#define	SC_LONGS	(SC_BYTES/REGSZ)
+#define	SC_TOTAL_BYTES	(SC_FPREGS_OFFSET + SC_FPREGS_BYTES)
+#define	SC_TOTAL_LONGS	(SC_TOTAL_BYTES/REGSZ)
 
-#ifdef __LP64__
-/* SC_LONGS is 22, so _JBLEN should be 22 or larger */
-#else
-/* SC_LONGS is 28, but must also allocate dynamic-roundup filler.
-   so _JBLEN should be 29 or larger */
+#if SC_TOTAL_LONGS > _JBLEN
+#error _JBLEN is too small
 #endif
 
 /*
- * _setjmp, _longjmp (restoring signal state)
  *
- *  GPOFF and FRAMESIZE must be the same for both _setjmp and _longjmp!
+ *  GPOFF and FRAMESIZE must be the same for all setjmp/longjmp routines
  *
  */
 
@@ -145,30 +178,33 @@
 GPOFF= FRAMESZ-2*REGSZ
 RAOFF= FRAMESZ-1*REGSZ
 
-NON_LEAF(setjmp, FRAMESZ, ra)
+NON_LEAF(sigsetjmp, FRAMESZ, ra)
 	.mask	0x80000000, RAOFF
 	PTR_SUBU sp, FRAMESZ			# allocate stack frame
-	SETUP_GP64(GPOFF, setjmp)
+	SETUP_GP64(GPOFF, sigsetjmp)
 	SAVE_GP(GPOFF)
 	.set	reorder
 
+setjmp_common:
 #ifndef __LP64__
-	addiu   a0, 7				# roundup jmpbuf addr to 8-byte boundary
-	li      t0, ~7
-	and     a0, t0
+	li	t0, ~7
+	and	a0, t0				# round jmpbuf addr DOWN to 8-byte boundary
 #endif
+	sw	a1, SC_FLAG_OFFSET(a0)		# save savesigs flag
+	beqz	a1, 1f				# do saving of signal mask?
 
-	REG_S	ra, RAOFF(sp)			# save state
+	REG_S	ra, RAOFF(sp)			# spill state
 	REG_S	a0, A0OFF(sp)
-	move	a0, zero			# get current signal mask
-	jal	sigblock
+	# call sigprocmask(int how ignored, sigset_t* null, sigset_t* SC_MASK(a0)):
+	LA	a2, SC_MASK_OFFSET(a0)		# gets current signal mask
+	li	a0, 0				# how; ignored when new mask is null
+	li	a1, 0				# null new mask
+	jal	sigprocmask			# get current signal mask
 	REG_L	a0, A0OFF(sp)
 	REG_L	ra, RAOFF(sp)
-
-	REG_S	v0, SC_MASK(a0)			# save sc_mask = sigblock(0)
-
+1:
 	li	v0, 0xACEDBADE			# sigcontext magic number
-	sw	v0, SC_MAGIC(a0)
+	sw	v0, SC_MAGIC_OFFSET(a0)
 	# callee-saved long-sized regs:
 	REG_S	ra, SC_REGS+0*REGSZ(a0)
 	REG_S	s0, SC_REGS+1*REGSZ(a0)
@@ -181,9 +217,9 @@
 	REG_S	s7, SC_REGS+8*REGSZ(a0)
 	REG_S	s8, SC_REGS+9*REGSZ(a0)
 	REG_L	v0, GPOFF(sp)
-	REG_S	v0, SC_REGS+10*REGSZ(a0)
+	REG_S	v0, SC_REGS+10*REGSZ(a0)	# save gp
 	PTR_ADDU v0, sp, FRAMESZ
-	REG_S	v0, SC_REGS+11*REGSZ(a0)
+	REG_S	v0, SC_REGS+11*REGSZ(a0)	# save orig sp
 
 	cfc1	v0, $31
 
@@ -199,7 +235,7 @@
 	s.d	$f31, SC_FPREGS+7*REGSZ_FP(a0)
 #else
 	# callee-saved fp regs on mips o32 ABI are
-	#   the even-numbered fp regs $f20,$f22,...$f30
+	#   the even-numbered double fp regs $f20,$f22,...$f30
 	s.d	$f20, SC_FPREGS+0*REGSZ_FP(a0)
 	s.d	$f22, SC_FPREGS+1*REGSZ_FP(a0)
 	s.d	$f24, SC_FPREGS+2*REGSZ_FP(a0)
@@ -207,37 +243,68 @@
 	s.d	$f28, SC_FPREGS+4*REGSZ_FP(a0)
 	s.d	$f30, SC_FPREGS+5*REGSZ_FP(a0)
 #endif
-	sw	v0, SC_FPSR(a0)
+	sw	v0, SC_FPSR_OFFSET(a0)
 	move	v0, zero
 	RESTORE_GP64
 	PTR_ADDU sp, FRAMESZ
 	j	ra
-END(setjmp)
+END(sigsetjmp)
 
-NON_LEAF(longjmp, FRAMESZ, ra)
+
+# Alternate entry points:
+
+NON_LEAF(setjmp, FRAMESZ, ra)
 	.mask	0x80000000, RAOFF
 	PTR_SUBU sp, FRAMESZ
-	SETUP_GP64(GPOFF, longjmp)
+	SETUP_GP64(GPOFF, setjmp)		# can't share sigsetjmp's gp code
+	SAVE_GP(GPOFF)
+	.set	reorder
+
+	li	a1, 1				# save/restore signals state
+	b	setjmp_common			# tail call
+END(setjmp)
+
+
+NON_LEAF(_setjmp, FRAMESZ, ra)
+	.mask	0x80000000, RAOFF
+	PTR_SUBU sp, FRAMESZ
+	SETUP_GP64(GPOFF, _setjmp)		# can't share sigsetjmp's gp code
+	SAVE_GP(GPOFF)
+	.set	reorder
+
+	li	a1, 0				# don't save/restore signals
+	b	setjmp_common			# tail call
+END(_setjmp)
+
+
+NON_LEAF(siglongjmp, FRAMESZ, ra)
+	.mask	0x80000000, RAOFF
+	PTR_SUBU sp, FRAMESZ
+	SETUP_GP64(GPOFF, siglongjmp)
 	SAVE_GP(GPOFF)
 	.set	reorder
 
 #ifndef __LP64__
-	addiu	a0, 7				# roundup jmpbuf addr to 8-byte boundary
-	li      t0, ~7
-	and	a0, t0
+	li	t0, ~7
+	and	a0, t0				# round jmpbuf addr DOWN to 8-byte boundary
 #endif
+	lw	v0, SC_MAGIC_OFFSET(a0)
+	li	t0, 0xACEDBADE
+	bne	v0, t0, longjmp_botch		# jump if error
 
-	REG_S	a1, A1OFF(sp)
+	lw	t0, SC_FLAG_OFFSET(a0)		# get savesigs flag
+	beqz	t0, 1f				# restore signal mask?
+
+	REG_S	a1, A1OFF(sp)			# temp spill
 	REG_S	a0, A0OFF(sp)
-	lw	a0, SC_MASK(a0)
-	jal	sigsetmask
+        # call sigprocmask(int how SIG_SETMASK, sigset_t* SC_MASK(a0), sigset_t* null):
+	LA	a1, SC_MASK_OFFSET(a0)		# signals being restored
+	li	a0, 3				# mips SIG_SETMASK
+	li	a2, 0				# null
+	jal	sigprocmask			# restore signal mask
 	REG_L	a0, A0OFF(sp)
 	REG_L	a1, A1OFF(sp)
-
-	lw	v0, SC_MAGIC(a0)
-	li	t0, 0xACEDBADE
-	bne	v0, t0, longjmp_botch			# jump if error
-
+1:
 	# callee-saved long-sized regs:
 	REG_L	ra, SC_REGS+0*REGSZ(a0)
 	REG_L	s0, SC_REGS+1*REGSZ(a0)
@@ -252,8 +319,8 @@
 	REG_L	gp, SC_REGS+10*REGSZ(a0)
 	REG_L	sp, SC_REGS+11*REGSZ(a0)
 
-	lw	v0, SC_FPSR(a0)
-	ctc1	v0, $31
+	lw	v0, SC_FPSR_OFFSET(a0)
+	ctc1	v0, $31			# restore old fr mode before fp values
 #ifdef __LP64__
 	# callee-saved fp regs on mips n64 ABI are $f24..$f31
 	l.d	$f24, SC_FPREGS+0*REGSZ_FP(a0)
@@ -266,7 +333,7 @@
 	l.d	$f31, SC_FPREGS+7*REGSZ_FP(a0)
 #else
 	# callee-saved fp regs on mips o32 ABI are
-	#   the even-numbered fp regs $f20,$f22,...$f30
+	#   the even-numbered double fp regs $f20,$f22,...$f30
 	l.d	$f20, SC_FPREGS+0*REGSZ_FP(a0)
 	l.d	$f22, SC_FPREGS+1*REGSZ_FP(a0)
 	l.d	$f24, SC_FPREGS+2*REGSZ_FP(a0)
@@ -278,192 +345,19 @@
 	li	a1, 1			# never return 0!
 1:
 	move	v0, a1
-	j	ra
+	j	ra			# return to setjmp call site
 
 longjmp_botch:
 	jal	longjmperror
 	jal	abort
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-END(longjmp)
-
-
-/*
- * _setjmp, _longjmp (not restoring signal state)
- *
- *  GPOFF and FRAMESIZE must be the same for both _setjmp and _longjmp!
- *
- */
-
-FRAMESZ= MKFSIZ(0,4)
-GPOFF= FRAMESZ-2*REGSZ
-
-LEAF(_setjmp, FRAMESZ)
-	PTR_SUBU sp, FRAMESZ
-	SETUP_GP64(GPOFF, _setjmp)
-	SAVE_GP(GPOFF)
-	.set	reorder
-
-#ifndef __LP64__
-	addiu   a0, 7				# roundup jmpbuf addr to 8-byte boundary
-	li      t0, ~7
-	and     a0, t0
-#endif
-
-	# SC_MASK is unused here
-
-	li	v0, 0xACEDBADE			# sigcontext magic number
-	sw	v0, SC_MAGIC(a0)
-	# callee-saved long-sized regs:
-	REG_S	ra, SC_REGS+0*REGSZ(a0)
-	REG_S	s0, SC_REGS+1*REGSZ(a0)
-	REG_S	s1, SC_REGS+2*REGSZ(a0)
-	REG_S	s2, SC_REGS+3*REGSZ(a0)
-	REG_S	s3, SC_REGS+4*REGSZ(a0)
-	REG_S	s4, SC_REGS+5*REGSZ(a0)
-	REG_S	s5, SC_REGS+6*REGSZ(a0)
-	REG_S	s6, SC_REGS+7*REGSZ(a0)
-	REG_S	s7, SC_REGS+8*REGSZ(a0)
-	REG_S	s8, SC_REGS+9*REGSZ(a0)
-	REG_L	v0, GPOFF(sp)
-	REG_S	v0, SC_REGS+10*REGSZ(a0)
-	PTR_ADDU v0, sp, FRAMESZ
-	REG_S	v0, SC_REGS+11*REGSZ(a0)
-
-	cfc1	v0, $31
-
-#ifdef __LP64__
-	# callee-saved fp regs on mips n64 ABI are $f24..$f31
-	s.d	$f24, SC_FPREGS+0*REGSZ_FP(a0)
-	s.d	$f25, SC_FPREGS+1*REGSZ_FP(a0)
-	s.d	$f26, SC_FPREGS+2*REGSZ_FP(a0)
-	s.d	$f27, SC_FPREGS+3*REGSZ_FP(a0)
-	s.d	$f28, SC_FPREGS+4*REGSZ_FP(a0)
-	s.d	$f29, SC_FPREGS+5*REGSZ_FP(a0)
-	s.d	$f30, SC_FPREGS+6*REGSZ_FP(a0)
-	s.d	$f31, SC_FPREGS+7*REGSZ_FP(a0)
-#else
-	# callee-saved fp regs on mips o32 ABI are
-	#   the even-numbered fp regs $f20,$f22,...$f30
-	s.d	$f20, SC_FPREGS+0*REGSZ_FP(a0)
-	s.d	$f22, SC_FPREGS+1*REGSZ_FP(a0)
-	s.d	$f24, SC_FPREGS+2*REGSZ_FP(a0)
-	s.d	$f26, SC_FPREGS+3*REGSZ_FP(a0)
-	s.d	$f28, SC_FPREGS+4*REGSZ_FP(a0)
-	s.d	$f30, SC_FPREGS+5*REGSZ_FP(a0)
-#endif
-	sw	v0, SC_FPSR(a0)
-	move	v0, zero
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-	j	ra
-END(_setjmp)
-
-
-LEAF(_longjmp, FRAMESZ)
-	PTR_SUBU sp, FRAMESZ
-	SETUP_GP64(GPOFF, _longjmp)
-	SAVE_GP(GPOFF)
-	.set	reorder
-
-#ifndef __LP64__
-	addiu	a0, 7				# roundup jmpbuf addr to 8-byte boundary
-	li      t0, ~7
-	and	a0, t0
-#endif
-
-	# SC_MASK is unused here
-
-	lw	v0, SC_MAGIC(a0)
-	li	t0, 0xACEDBADE
-	bne	v0, t0, _longjmp_botch			# jump if error
-
-	# callee-saved long-sized regs:
-	REG_L	ra, SC_REGS+0*REGSZ(a0)
-	REG_L	s0, SC_REGS+1*REGSZ(a0)
-	REG_L	s1, SC_REGS+2*REGSZ(a0)
-	REG_L	s2, SC_REGS+3*REGSZ(a0)
-	REG_L	s3, SC_REGS+4*REGSZ(a0)
-	REG_L	s4, SC_REGS+5*REGSZ(a0)
-	REG_L	s5, SC_REGS+6*REGSZ(a0)
-	REG_L	s6, SC_REGS+7*REGSZ(a0)
-	REG_L	s7, SC_REGS+8*REGSZ(a0)
-	REG_L	s8, SC_REGS+9*REGSZ(a0)
-	REG_L	gp, SC_REGS+10*REGSZ(a0)
-	REG_L	sp, SC_REGS+11*REGSZ(a0)
-
-	lw	v0, SC_FPSR(a0)
-	ctc1	v0, $31
-#ifdef __LP64__
-	# callee-saved fp regs on mips n64 ABI are $f24..$f31
-	l.d	$f24, SC_FPREGS+0*REGSZ_FP(a0)
-	l.d	$f25, SC_FPREGS+1*REGSZ_FP(a0)
-	l.d	$f26, SC_FPREGS+2*REGSZ_FP(a0)
-	l.d	$f27, SC_FPREGS+3*REGSZ_FP(a0)
-	l.d	$f28, SC_FPREGS+4*REGSZ_FP(a0)
-	l.d	$f29, SC_FPREGS+5*REGSZ_FP(a0)
-	l.d	$f30, SC_FPREGS+6*REGSZ_FP(a0)
-	l.d	$f31, SC_FPREGS+7*REGSZ_FP(a0)
-#else
-	# callee-saved fp regs on mips o32 ABI are
-	#   the even-numbered fp regs $f20,$f22,...$f30
-	l.d	$f20, SC_FPREGS+0*REGSZ_FP(a0)
-	l.d	$f22, SC_FPREGS+1*REGSZ_FP(a0)
-	l.d	$f24, SC_FPREGS+2*REGSZ_FP(a0)
-	l.d	$f26, SC_FPREGS+3*REGSZ_FP(a0)
-	l.d	$f28, SC_FPREGS+4*REGSZ_FP(a0)
-	l.d	$f30, SC_FPREGS+5*REGSZ_FP(a0)
-#endif
-	bne	a1, zero, 1f
-	li	a1, 1			# never return 0!
-1:
-	move	v0, a1
-	j	ra
-
-_longjmp_botch:
-	jal	longjmperror
-	jal	abort
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-END(_longjmp)
-
-/*
- * trampolines for sigsetjmp and  siglongjmp save and restore mask.
- *
- */
-FRAMESZ= MKFSIZ(1,1)
-GPOFF= FRAMESZ-2*REGSZ
-
-LEAF(sigsetjmp, FRAMESZ)
-	PTR_SUBU sp, FRAMESZ
-	SETUP_GP64(GPOFF, sigsetjmp)
-	.set	reorder
-	sw	a1, _JBLEN*REGSZ(a0)		# save "savemask"
-	bne	a1, 0x0, 1f			# do saving of signal mask?
-	LA	t9, _setjmp
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-	jr t9
-
-1:	LA	t9, setjmp
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-	jr t9
-END(sigsetjmp)
-
-LEAF(siglongjmp, FRAMESZ)
-	PTR_SUBU sp, FRAMESZ
-	SETUP_GP64(GPOFF, siglongjmp)
-	.set	reorder
-	lw	t0, _JBLEN*REGSZ(a0)		# get "savemask"
-	bne	t0, 0x0, 1f			# restore signal mask?
-	LA	t9, _longjmp
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-	jr	t9
-1:
-	LA	t9, longjmp
-	RESTORE_GP64
-	PTR_ADDU sp, FRAMESZ
-	jr	t9
 END(siglongjmp)
+
+
+	.globl	longjmp
+	.type	longjmp, @function
+	.equ	longjmp, siglongjmp	# alias for siglongjmp
+
+
+	.globl	_longjmp
+	.type	_longjmp, @function
+	.equ	_longjmp, siglongjmp	# alias for siglongjmp
diff --git a/libc/arch-mips/include/machine/setjmp.h b/libc/arch-mips/include/machine/setjmp.h
index a9707dc..4067d51 100644
--- a/libc/arch-mips/include/machine/setjmp.h
+++ b/libc/arch-mips/include/machine/setjmp.h
@@ -6,9 +6,10 @@
 #define _MIPS_SETJMP_H_
 
 #ifdef __LP64__
-#define	_JBLEN	22		/* size, in 8-byte longs, of a mips64 jmp_buf */
+#define	_JBLEN	25	/* size, in 8-byte longs, of a mips64 jmp_buf/sigjmp_buf */
 #else
-#define	_JBLEN	29		/* size, in 4-byte longs, of a mips32 jmp_buf */
+#define	_JBLEN	157	/* historical size, in 4-byte longs, of a mips32 jmp_buf */
+			/* actual used size is 34 */
 #endif
 
 #endif /* !_MIPS_SETJMP_H_ */
diff --git a/libc/arch-mips/syscalls/faccessat.S b/libc/arch-mips/syscalls/___faccessat.S
similarity index 82%
rename from libc/arch-mips/syscalls/faccessat.S
rename to libc/arch-mips/syscalls/___faccessat.S
index e616106..4e11bae 100644
--- a/libc/arch-mips/syscalls/faccessat.S
+++ b/libc/arch-mips/syscalls/___faccessat.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(faccessat)
+ENTRY(___faccessat)
     .set noreorder
     .cpload t9
     li v0, __NR_faccessat
@@ -16,4 +16,5 @@
     j t9
     nop
     .set reorder
-END(faccessat)
+END(___faccessat)
+.hidden ___faccessat
diff --git a/libc/arch-mips64/bionic/stat.cpp b/libc/arch-mips64/bionic/stat.cpp
index df63906..2767fbd 100644
--- a/libc/arch-mips64/bionic/stat.cpp
+++ b/libc/arch-mips64/bionic/stat.cpp
@@ -29,7 +29,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
-#include <asm/unistd.h>
+#include <unistd.h>
 
 struct kernel_stat {
  unsigned int st_dev;
diff --git a/libc/arch-mips64/syscalls/faccessat.S b/libc/arch-mips64/syscalls/___faccessat.S
similarity index 85%
rename from libc/arch-mips64/syscalls/faccessat.S
rename to libc/arch-mips64/syscalls/___faccessat.S
index 18bb800..240625f 100644
--- a/libc/arch-mips64/syscalls/faccessat.S
+++ b/libc/arch-mips64/syscalls/___faccessat.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(faccessat)
+ENTRY(___faccessat)
     .set push
     .set noreorder
     li v0, __NR_faccessat
@@ -22,4 +22,5 @@
     j t9
     move ra, t0
     .set pop
-END(faccessat)
+END(___faccessat)
+.hidden ___faccessat
diff --git a/libc/arch-x86/syscalls/faccessat.S b/libc/arch-x86/syscalls/___faccessat.S
similarity index 70%
rename from libc/arch-x86/syscalls/faccessat.S
rename to libc/arch-x86/syscalls/___faccessat.S
index 9d52231..361a6ea 100644
--- a/libc/arch-x86/syscalls/faccessat.S
+++ b/libc/arch-x86/syscalls/___faccessat.S
@@ -2,7 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(faccessat)
+ENTRY(___faccessat)
     pushl   %ebx
     .cfi_def_cfa_offset 8
     .cfi_rel_offset ebx, 0
@@ -12,13 +12,9 @@
     pushl   %edx
     .cfi_adjust_cfa_offset 4
     .cfi_rel_offset edx, 0
-    pushl   %esi
-    .cfi_adjust_cfa_offset 4
-    .cfi_rel_offset esi, 0
-    mov     20(%esp), %ebx
-    mov     24(%esp), %ecx
-    mov     28(%esp), %edx
-    mov     32(%esp), %esi
+    mov     16(%esp), %ebx
+    mov     20(%esp), %ecx
+    mov     24(%esp), %edx
     movl    $__NR_faccessat, %eax
     int     $0x80
     cmpl    $-MAX_ERRNO, %eax
@@ -28,9 +24,9 @@
     call    __set_errno_internal
     addl    $4, %esp
 1:
-    popl    %esi
     popl    %edx
     popl    %ecx
     popl    %ebx
     ret
-END(faccessat)
+END(___faccessat)
+.hidden ___faccessat
diff --git a/libc/arch-x86_64/syscalls/faccessat.S b/libc/arch-x86_64/syscalls/___faccessat.S
similarity index 81%
rename from libc/arch-x86_64/syscalls/faccessat.S
rename to libc/arch-x86_64/syscalls/___faccessat.S
index 05a6e78..e8fd3f5 100644
--- a/libc/arch-x86_64/syscalls/faccessat.S
+++ b/libc/arch-x86_64/syscalls/___faccessat.S
@@ -2,8 +2,7 @@
 
 #include <private/bionic_asm.h>
 
-ENTRY(faccessat)
-    movq    %rcx, %r10
+ENTRY(___faccessat)
     movl    $__NR_faccessat, %eax
     syscall
     cmpq    $-MAX_ERRNO, %rax
@@ -13,4 +12,5 @@
     call    __set_errno_internal
 1:
     ret
-END(faccessat)
+END(___faccessat)
+.hidden ___faccessat
diff --git a/libc/bionic/dup2.cpp b/libc/bionic/dup2.cpp
index 0b8632b..98c5646 100644
--- a/libc/bionic/dup2.cpp
+++ b/libc/bionic/dup2.cpp
@@ -26,8 +26,19 @@
  * SUCH DAMAGE.
  */
 
+#include <fcntl.h>
 #include <unistd.h>
 
 int dup2(int old_fd, int new_fd) {
+  // If old_fd is equal to new_fd and a valid file descriptor, dup2 returns
+  // old_fd without closing it. This is not true of dup3, so we have to
+  // handle this case ourselves.
+  if (old_fd == new_fd) {
+    if (fcntl(old_fd, F_GETFD) == -1) {
+      return -1;
+    }
+    return old_fd;
+  }
+
   return dup3(old_fd, new_fd, 0);
 }
diff --git a/libc/bionic/faccessat.cpp b/libc/bionic/faccessat.cpp
new file mode 100644
index 0000000..5f375e0
--- /dev/null
+++ b/libc/bionic/faccessat.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+extern "C" int ___faccessat(int, const char*, int);
+
+int faccessat(int dirfd, const char* pathname, int mode, int flags) {
+  // "The mode specifies the accessibility check(s) to be performed,
+  // and is either the value F_OK, or a mask consisting of the
+  // bitwise OR of one or more of R_OK, W_OK, and X_OK."
+  if ((mode != F_OK) && ((mode & ~(R_OK | W_OK | X_OK)) != 0) &&
+      ((mode & (R_OK | W_OK | X_OK)) == 0)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (flags != 0) {
+    // We deliberately don't support AT_SYMLINK_NOFOLLOW, a glibc
+    // only feature which is error prone and dangerous.
+    //
+    // AT_EACCESS isn't supported either. Android doesn't have setuid
+    // programs, and never runs code with euid!=uid. It could be
+    // implemented in an expensive way, following the model at
+    // https://gitlab.com/bminor/musl/commit/0a05eace163cee9b08571d2ff9d90f5e82d9c228
+    // but not worth it.
+    errno = EINVAL;
+    return -1;
+  }
+
+  return ___faccessat(dirfd, pathname, mode);
+}
diff --git a/libc/bionic/mempcpy.cpp b/libc/bionic/mempcpy.cpp
new file mode 100644
index 0000000..b7b72f7
--- /dev/null
+++ b/libc/bionic/mempcpy.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+void* mempcpy(void* dst, const void* src, size_t n) {
+  return reinterpret_cast<char*>(memcpy(dst, src, n)) + n;
+}
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index 5d1cbb0..ceff6a1 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -30,6 +30,7 @@
 
 #include <ctype.h>
 #include <dirent.h>
+#include <errno.h>
 #include <inttypes.h>
 #include <pthread.h>
 #include <signal.h>
diff --git a/libc/bionic/wmempcpy.cpp b/libc/bionic/wmempcpy.cpp
new file mode 100644
index 0000000..54ebf86
--- /dev/null
+++ b/libc/bionic/wmempcpy.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <wchar.h>
+
+wchar_t* wmempcpy(wchar_t* dst, const wchar_t* src, size_t n) {
+  return wmemcpy(dst, src, n) + n;
+}
diff --git a/libc/include/string.h b/libc/include/string.h
index d67928c..fffe136 100644
--- a/libc/include/string.h
+++ b/libc/include/string.h
@@ -44,6 +44,9 @@
 extern void*  memrchr(const void *, int, size_t) __purefunc;
 extern int    memcmp(const void *, const void *, size_t) __purefunc;
 extern void*  memcpy(void* __restrict, const void* __restrict, size_t);
+#if defined(__USE_GNU)
+extern void*  mempcpy(void* __restrict, const void* __restrict, size_t);
+#endif
 extern void*  memmove(void *, const void *, size_t);
 extern void*  memset(void *, int, size_t);
 extern void*  memmem(const void *, size_t, const void *, size_t) __purefunc;
diff --git a/libc/include/sys/resource.h b/libc/include/sys/resource.h
index a91fa53..3f8dd45 100644
--- a/libc/include/sys/resource.h
+++ b/libc/include/sys/resource.h
@@ -36,6 +36,10 @@
 
 __BEGIN_DECLS
 
+/* The kernel header doesn't have these, but POSIX does. */
+#define RLIM_SAVED_CUR RLIM_INFINITY
+#define RLIM_SAVED_MAX RLIM_INFINITY
+
 typedef unsigned long rlim_t;
 
 extern int getrlimit(int, struct rlimit*);
diff --git a/libc/include/sys/syscall.h b/libc/include/sys/syscall.h
index 34a29df..21eaf33 100644
--- a/libc/include/sys/syscall.h
+++ b/libc/include/sys/syscall.h
@@ -25,20 +25,13 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+
 #ifndef _SYS_SYSCALL_H_
 #define _SYS_SYSCALL_H_
 
-#include <errno.h>
-#include <sys/cdefs.h>
-#include <sys/types.h>
-#include <asm/unistd.h>
+#include <asm/unistd.h> /* Linux kernel __NR_* names. */
+#include <sys/glibc-syscalls.h> /* glibc-compatible SYS_* aliases. */
 
-#include <sys/glibc-syscalls.h> /* glibc-compatible SYS_* aliases for our __NR_* names. */
-
-__BEGIN_DECLS
-
-long syscall(long number, ...);
-
-__END_DECLS
+/* The syscall function itself is declared in <unistd.h>, not here. */
 
 #endif /* _SYS_SYSCALL_H_ */
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index 9fcb5db..92d3abe 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -204,6 +204,8 @@
 
 long sysconf(int);
 
+long syscall(long number, ...);
+
 extern int daemon(int, int);
 
 #if defined(__arm__) || (defined(__mips__) && !defined(__LP64__))
diff --git a/libc/include/wchar.h b/libc/include/wchar.h
index cfd2299..ea6aca0 100644
--- a/libc/include/wchar.h
+++ b/libc/include/wchar.h
@@ -151,6 +151,9 @@
 extern wchar_t          *wmemchr(const wchar_t *, wchar_t, size_t);
 extern int               wmemcmp(const wchar_t *, const wchar_t *, size_t);
 extern wchar_t          *wmemcpy(wchar_t *, const wchar_t *, size_t);
+#if defined(__USE_GNU)
+extern wchar_t          *wmempcpy(wchar_t *, const wchar_t *, size_t);
+#endif
 extern wchar_t          *wmemmove(wchar_t *, const wchar_t *, size_t);
 extern wchar_t          *wmemset(wchar_t *, wchar_t, size_t);
 extern int               wprintf(const wchar_t *, ...);
diff --git a/libc/private/bionic_futex.h b/libc/private/bionic_futex.h
index bd2bd36..401577a 100644
--- a/libc/private/bionic_futex.h
+++ b/libc/private/bionic_futex.h
@@ -34,6 +34,7 @@
 #include <stddef.h>
 #include <sys/cdefs.h>
 #include <sys/syscall.h>
+#include <unistd.h>
 
 __BEGIN_DECLS
 
diff --git a/tests/gtest_main.cpp b/tests/gtest_main.cpp
index 86d6466..664e4a1 100644
--- a/tests/gtest_main.cpp
+++ b/tests/gtest_main.cpp
@@ -124,15 +124,15 @@
 
   int64_t GetTestTime() const { return elapsed_time_ns_; }
 
-  void AppendFailureMessage(const std::string& s) { failure_message_ += s; }
+  void AppendTestOutput(const std::string& s) { output_ += s; }
 
-  const std::string& GetFailureMessage() const { return failure_message_; }
+  const std::string& GetTestOutput() const { return output_; }
 
  private:
   const std::string name_;
   TestResult result_;
   int64_t elapsed_time_ns_;
-  std::string failure_message_;
+  std::string output_;
 };
 
 class TestCase {
@@ -196,10 +196,6 @@
   std::vector<Test> test_list_;
 };
 
-// This is the file descriptor used by the child process to write failure message.
-// The parent process will collect the information and dump to stdout / xml file.
-static int child_output_fd;
-
 class TestResultPrinter : public testing::EmptyTestEventListener {
  public:
   TestResultPrinter() : pinfo_(NULL) {}
@@ -219,25 +215,9 @@
     return;
 
   // Print failure message from the assertion (e.g. expected this and got that).
-  char buf[1024];
-  snprintf(buf, sizeof(buf), "%s:(%d) Failure in test %s.%s\n%s\n", result.file_name(),
-                                                                    result.line_number(),
-                                                                    pinfo_->test_case_name(),
-                                                                    pinfo_->name(),
-                                                                    result.message());
-
-  int towrite = strlen(buf);
-  char* p = buf;
-  while (towrite > 0) {
-    ssize_t bytes_written = TEMP_FAILURE_RETRY(write(child_output_fd, p, towrite));
-    if (bytes_written == -1) {
-      fprintf(stderr, "failed to write child_output_fd: %s\n", strerror(errno));
-      exit(1);
-    } else {
-      towrite -= bytes_written;
-      p += bytes_written;
-    }
-  }
+  printf("%s:(%d) Failure in test %s.%s\n%s\n", result.file_name(), result.line_number(),
+         pinfo_->test_case_name(), pinfo_->name(), result.message());
+  fflush(stdout);
 }
 
 static int64_t NanoTime() {
@@ -332,8 +312,8 @@
     printf("\n");
   }
 
-  const std::string& failure_message = testcase.GetTest(test_id).GetFailureMessage();
-  printf("%s", failure_message.c_str());
+  const std::string& test_output = testcase.GetTest(test_id).GetTestOutput();
+  printf("%s", test_output.c_str());
   fflush(stdout);
 }
 
@@ -481,8 +461,8 @@
         fputs(" />\n", fp);
       } else {
         fputs(">\n", fp);
-        const std::string& failure_message = testcase.GetTest(j).GetFailureMessage();
-        fprintf(fp, "      <failure message=\"%s\" type=\"\">\n", failure_message.c_str());
+        const std::string& test_output = testcase.GetTest(j).GetTestOutput();
+        fprintf(fp, "      <failure message=\"%s\" type=\"\">\n", test_output.c_str());
         fputs("      </failure>\n", fp);
         fputs("    </testcase>\n", fp);
       }
@@ -538,7 +518,10 @@
   } else if (pid == 0) {
     // In child process, run a single test.
     close(pipefd[0]);
-    child_output_fd = pipefd[1];
+    close(STDOUT_FILENO);
+    close(STDERR_FILENO);
+    dup2(pipefd[1], STDOUT_FILENO);
+    dup2(pipefd[1], STDERR_FILENO);
 
     if (sigprocmask(SIG_SETMASK, &sigmask, NULL) == -1) {
       perror("sigprocmask SIG_SETMASK");
@@ -692,7 +675,7 @@
     ssize_t bytes_read = TEMP_FAILURE_RETRY(read(child_proc.child_read_fd, buf, sizeof(buf) - 1));
     if (bytes_read > 0) {
       buf[bytes_read] = '\0';
-      testcase.GetTest(test_id).AppendFailureMessage(buf);
+      testcase.GetTest(test_id).AppendTestOutput(buf);
     } else if (bytes_read == 0) {
       break; // Read end.
     } else {
@@ -713,7 +696,7 @@
     char buf[1024];
     snprintf(buf, sizeof(buf), "%s killed because of timeout at %" PRId64 " ms.\n",
              testcase.GetTestName(test_id).c_str(), testcase.GetTestTime(test_id) / 1000000);
-    testcase.GetTest(test_id).AppendFailureMessage(buf);
+    testcase.GetTest(test_id).AppendTestOutput(buf);
 
   } else if (WIFSIGNALED(child_proc.exit_status)) {
     // Record signal terminated test as failed.
@@ -721,7 +704,7 @@
     char buf[1024];
     snprintf(buf, sizeof(buf), "%s terminated by signal: %s.\n",
              testcase.GetTestName(test_id).c_str(), strsignal(WTERMSIG(child_proc.exit_status)));
-    testcase.GetTest(test_id).AppendFailureMessage(buf);
+    testcase.GetTest(test_id).AppendTestOutput(buf);
 
   } else {
     testcase.SetTestResult(test_id, WEXITSTATUS(child_proc.exit_status) == 0 ?
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index 66cf848..1d63c76 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -1395,3 +1395,8 @@
   EXPECT_EQ(0U, strnlen(heap_src, 1024*1024*1024));
   delete[] heap_src;
 }
+
+TEST(string, mempcpy) {
+  char dst[6];
+  ASSERT_EQ(&dst[4], reinterpret_cast<char*>(mempcpy(dst, "hello", 4)));
+}
diff --git a/tests/sys_stat_test.cpp b/tests/sys_stat_test.cpp
index 7bbb7c6..28c7c52 100644
--- a/tests/sys_stat_test.cpp
+++ b/tests/sys_stat_test.cpp
@@ -219,3 +219,43 @@
   ASSERT_EQ(ENOTSUP, errno);
   unlink(linkname);
 }
+
+TEST(sys_stat, faccessat_EINVAL) {
+  ASSERT_EQ(-1, faccessat(AT_FDCWD, "/dev/null", F_OK, ~AT_SYMLINK_NOFOLLOW));
+  ASSERT_EQ(EINVAL, errno);
+#if defined(__BIONIC__)
+  ASSERT_EQ(-1, faccessat(AT_FDCWD, "/dev/null", ~(R_OK | W_OK | X_OK), 0));
+  ASSERT_EQ(EINVAL, errno);
+#else
+  ASSERT_EQ(0, faccessat(AT_FDCWD, "/dev/null", ~(R_OK | W_OK | X_OK), AT_SYMLINK_NOFOLLOW));
+  ASSERT_EQ(-1, faccessat(AT_FDCWD, "/dev/null", ~(R_OK | W_OK | X_OK), 0));
+  ASSERT_EQ(EINVAL, errno);
+#endif
+}
+
+TEST(sys_stat, faccessat_AT_SYMLINK_NOFOLLOW_EINVAL) {
+#if defined(__BIONIC__)
+  // Android doesn't support AT_SYMLINK_NOFOLLOW
+  ASSERT_EQ(-1, faccessat(AT_FDCWD, "/dev/null", F_OK, AT_SYMLINK_NOFOLLOW));
+  ASSERT_EQ(EINVAL, errno);
+#else
+  ASSERT_EQ(0, faccessat(AT_FDCWD, "/dev/null", F_OK, AT_SYMLINK_NOFOLLOW));
+#endif
+}
+
+TEST(sys_stat, faccessat_dev_null) {
+  ASSERT_EQ(0, faccessat(AT_FDCWD, "/dev/null", F_OK, 0));
+  ASSERT_EQ(0, faccessat(AT_FDCWD, "/dev/null", R_OK, 0));
+  ASSERT_EQ(0, faccessat(AT_FDCWD, "/dev/null", W_OK, 0));
+  ASSERT_EQ(0, faccessat(AT_FDCWD, "/dev/null", R_OK|W_OK, 0));
+}
+
+TEST(sys_stat, faccessat_nonexistant) {
+  ASSERT_EQ(-1, faccessat(AT_FDCWD, "/blah", F_OK, AT_SYMLINK_NOFOLLOW));
+#if defined(__BIONIC__)
+  // Android doesn't support AT_SYMLINK_NOFOLLOW
+  ASSERT_EQ(EINVAL, errno);
+#else
+  ASSERT_EQ(ENOENT, errno);
+#endif
+}
diff --git a/tests/unistd_test.cpp b/tests/unistd_test.cpp
index f5c0524..f54a461 100644
--- a/tests/unistd_test.cpp
+++ b/tests/unistd_test.cpp
@@ -801,3 +801,22 @@
   VERIFY_SYSCONF_NOT_SUPPORT(_SC_XOPEN_UUCP);
 #endif // defined(__BIONIC__)
 }
+
+TEST(unistd, dup2_same) {
+  // POSIX says of dup2:
+  // If fildes2 is already a valid open file descriptor ...
+  // [and] fildes is equal to fildes2 ... dup2() shall return
+  // fildes2 without closing it.
+  // This isn't true of dup3(2), so we need to manually implement that.
+
+  // Equal and valid.
+  int fd = open("/proc/version", O_RDONLY);
+  ASSERT_TRUE(fd != -1);
+  ASSERT_EQ(fd, dup2(fd, fd));
+  ASSERT_EQ(0, close(fd)); // Check that dup2 didn't close fd.
+
+  // Equal, but invalid.
+  errno = 0;
+  ASSERT_EQ(-1, dup2(fd, fd));
+  ASSERT_EQ(EBADF, errno);
+}
diff --git a/tests/wchar_test.cpp b/tests/wchar_test.cpp
index a1d1501..e86d56d 100644
--- a/tests/wchar_test.cpp
+++ b/tests/wchar_test.cpp
@@ -667,3 +667,8 @@
   wcstoull_l(L"123", NULL, 37, LC_GLOBAL_LOCALE);
   ASSERT_EQ(EINVAL, errno);
 }
+
+TEST(wchar, wmempcpy) {
+  wchar_t dst[6];
+  ASSERT_EQ(&dst[4], wmempcpy(dst, L"hello", 4));
+}