Merge "Use hidden visibility for internal-use-only functions"
diff --git a/libc/Android.mk b/libc/Android.mk
index 40191ee..eaeb6fc 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -53,7 +53,6 @@
 	stdlib/exit.c \
 	stdlib/getenv.c \
 	stdlib/putenv.c \
-	stdlib/qsort.c \
 	stdlib/setenv.c \
 	stdlib/strtod.c \
 	stdlib/strtoimax.c \
@@ -289,6 +288,7 @@
     upstream-freebsd/lib/libc/stdio/tempnam.c \
     upstream-freebsd/lib/libc/stdio/tmpnam.c \
     upstream-freebsd/lib/libc/stdio/wsetup.c \
+    upstream-freebsd/lib/libc/stdlib/qsort.c \
     upstream-freebsd/lib/libc/stdlib/realpath.c \
     upstream-freebsd/lib/libc/string/wcpcpy.c \
     upstream-freebsd/lib/libc/string/wcpncpy.c \
diff --git a/libc/arch-arm/cortex-a15/bionic/memset.S b/libc/arch-arm/cortex-a15/bionic/memset.S
index 7bb3297..2e1ad54 100644
--- a/libc/arch-arm/cortex-a15/bionic/memset.S
+++ b/libc/arch-arm/cortex-a15/bionic/memset.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2013 The Android Open Source Project
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -35,11 +35,12 @@
          * memset() returns its first argument.
 		 */
 
-    .fpu    neon
+        .fpu        neon
+        .syntax     unified
 
 ENTRY(bzero)
-        mov     r2, r1
-        mov     r1, #0
+        mov         r2, r1
+        mov         r1, #0
         // Fall through to memset...
 END(bzero)
 
@@ -47,60 +48,117 @@
         .save       {r0}
         stmfd       sp!, {r0}
 
-        vdup.8      q0, r1
-
-        /* do we have at least 16-bytes to write (needed for alignment below) */
+        // The new algorithm is slower for copies < 16 so use the old
+        // neon code in that case.
         cmp         r2, #16
-        blo         3f
+        blo         set_less_than_16_unknown_align
 
-        /* align destination to 16 bytes for the write-buffer */
-        rsb         r3, r0, #0
-        ands        r3, r3, #0xF
-        beq         2f
+        // Use strd which requires an even and odd register so move the
+        // values so that:
+        //   r0 and r1 contain the memset value
+        //   r2 is the number of bytes to set
+        //   r3 is the destination pointer
+        mov         r3, r0
 
-        /* write up to 15-bytes (count in r3) */
-        sub         r2, r2, r3
-        movs        ip, r3, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        movs        ip, r3, lsl #29
-        bge         1f
+        // Copy the byte value in every byte of r1.
+        mov         r1, r1, lsl #24
+        orr         r1, r1, r1, lsr #8
+        orr         r1, r1, r1, lsr #16
 
-        // writes 4 bytes, 32-bits aligned
-        vst1.32     {d0[0]}, [r0, :32]!
-1:      bcc         2f
+check_alignment:
+        // Align destination to a double word to avoid the strd crossing
+        // a cache line boundary.
+        ands        ip, r3, #7
+        bne         do_double_word_align
 
-        // writes 8 bytes, 64-bits aligned
-        vst1.8      {d0}, [r0, :64]!
-2:
-        /* make sure we have at least 32 bytes to write */
-        subs        r2, r2, #32
-        blo         2f
-        vmov        q1, q0
+double_word_aligned:
+        mov         r0, r1
 
-1:      /* The main loop writes 32 bytes at a time */
-        subs        r2, r2, #32
-        vst1.8      {d0 - d3}, [r0, :128]!
-        bhs         1b
+        subs        r2, #64
+        blo         set_less_than_64
 
-2:      /* less than 32 left */
-        add         r2, r2, #32
-        tst         r2, #0x10
-        beq         3f
+1:      // Main loop sets 64 bytes at a time.
+        .irp        offset, #0, #8, #16, #24, #32, #40, #48, #56
+        strd        r0, r1, [r3, \offset]
+        .endr
 
-        // writes 16 bytes, 128-bits aligned
-        vst1.8      {d0, d1}, [r0, :128]!
-3:      /* write up to 15-bytes (count in r2) */
+        add         r3, #64
+        subs        r2, #64
+        bge         1b
+
+set_less_than_64:
+        // Restore r2 to the count of bytes left to set.
+        add         r2, #64
+        lsls        ip, r2, #27
+        bcc         set_less_than_32
+        // Set 32 bytes.
+        .irp        offset, #0, #8, #16, #24
+        strd        r0, r1, [r3, \offset]
+        .endr
+        add         r3, #32
+
+set_less_than_32:
+        bpl         set_less_than_16
+        // Set 16 bytes.
+        .irp        offset, #0, #8
+        strd        r0, r1, [r3, \offset]
+        .endr
+        add         r3, #16
+
+set_less_than_16:
+        // Less than 16 bytes to set.
+        lsls        ip, r2, #29
+        bcc         set_less_than_8
+
+        // Set 8 bytes.
+        strd        r0, r1, [r3], #8
+
+set_less_than_8:
+        bpl         set_less_than_4
+        // Set 4 bytes
+        str         r1, [r3], #4
+
+set_less_than_4:
+        lsls        ip, r2, #31
+        it          ne
+        strbne      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3]
+
+        ldmfd       sp!, {r0}
+        bx          lr
+
+do_double_word_align:
+        rsb         ip, ip, #8
+        sub         r2, r2, ip
+        movs        r0, ip, lsl #31
+        it          mi
+        strbmi      r1, [r3], #1
+        itt         cs
+        strbcs      r1, [r3], #1
+        strbcs      r1, [r3], #1
+
+        // Dst is at least word aligned by this point.
+        cmp         ip, #4
+        blo         double_word_aligned
+        str         r1, [r3], #4
+        b           double_word_aligned
+
+set_less_than_16_unknown_align:
+        // Set up to 15 bytes.
+        vdup.8      d0, r1
         movs        ip, r2, lsl #29
         bcc         1f
         vst1.8      {d0}, [r0]!
 1:      bge         2f
         vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
+        it          mi
+        strbmi      r1, [r0], #1
+        itt         cs
+        strbcs      r1, [r0], #1
+        strbcs      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
 END(memset)
diff --git a/libc/stdlib/qsort.c b/libc/stdlib/qsort.c
deleted file mode 100644
index f6fc8e1..0000000
--- a/libc/stdlib/qsort.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*	$OpenBSD: qsort.c,v 1.10 2005/08/08 08:05:37 espie Exp $ */
-/*-
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/types.h>
-#include <stdlib.h>
-
-static __inline char	*med3(char *, char *, char *, int (*)(const void *, const void *));
-static __inline void	 swapfunc(char *, char *, int, int);
-
-#define min(a, b)	(a) < (b) ? a : b
-
-/*
- * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
- */
-#define swapcode(TYPE, parmi, parmj, n) {		\
-	long i = (n) / sizeof (TYPE);			\
-	TYPE *pi = (TYPE *) (parmi);			\
-	TYPE *pj = (TYPE *) (parmj);			\
-	do {						\
-		TYPE	t = *pi;			\
-		*pi++ = *pj;				\
-		*pj++ = t;				\
-        } while (--i > 0);				\
-}
-
-#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
-	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
-
-static __inline void
-swapfunc(char *a, char *b, int n, int swaptype)
-{
-	if (swaptype <= 1)
-		swapcode(long, a, b, n)
-	else
-		swapcode(char, a, b, n)
-}
-
-#define swap(a, b)					\
-	if (swaptype == 0) {				\
-		long t = *(long *)(a);			\
-		*(long *)(a) = *(long *)(b);		\
-		*(long *)(b) = t;			\
-	} else						\
-		swapfunc(a, b, es, swaptype)
-
-#define vecswap(a, b, n)	if ((n) > 0) swapfunc(a, b, n, swaptype)
-
-static __inline char *
-med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
-{
-	return cmp(a, b) < 0 ?
-	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
-              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
-}
-
-void
-qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
-{
-	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-	int d, r, swaptype, swap_cnt;
-	char *a = aa;
-
-loop:	SWAPINIT(a, es);
-	swap_cnt = 0;
-	if (n < 7) {
-		for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-	pm = (char *)a + (n / 2) * es;
-	if (n > 7) {
-		pl = (char *)a;
-		pn = (char *)a + (n - 1) * es;
-		if (n > 40) {
-			d = (n / 8) * es;
-			pl = med3(pl, pl + d, pl + 2 * d, cmp);
-			pm = med3(pm - d, pm, pm + d, cmp);
-			pn = med3(pn - 2 * d, pn - d, pn, cmp);
-		}
-		pm = med3(pl, pm, pn, cmp);
-	}
-	swap(a, pm);
-	pa = pb = (char *)a + es;
-
-	pc = pd = (char *)a + (n - 1) * es;
-	for (;;) {
-		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pa, pb);
-				pa += es;
-			}
-			pb += es;
-		}
-		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pc, pd);
-				pd -= es;
-			}
-			pc -= es;
-		}
-		if (pb > pc)
-			break;
-		swap(pb, pc);
-		swap_cnt = 1;
-		pb += es;
-		pc -= es;
-	}
-	if (swap_cnt == 0) {  /* Switch to insertion sort */
-		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-
-	pn = (char *)a + n * es;
-	r = min(pa - (char *)a, pb - pa);
-	vecswap(a, pb - r, r);
-	r = min(pd - pc, pn - pd - (int)es);
-	vecswap(pb, pn - r, r);
-	if ((r = pb - pa) > (int)es)
-		qsort(a, r / es, es, cmp);
-	if ((r = pd - pc) > (int)es) {
-		/* Iterate rather than recurse to save stack space */
-		a = pn - r;
-		n = r / es;
-		goto loop;
-	}
-	/* qsort(pn - r, r / es, es, cmp); */
-}
diff --git a/libc/upstream-freebsd/lib/libc/stdlib/qsort.c b/libc/upstream-freebsd/lib/libc/stdlib/qsort.c
new file mode 100644
index 0000000..3687b05
--- /dev/null
+++ b/libc/upstream-freebsd/lib/libc/stdlib/qsort.c
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)qsort.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdlib.h>
+
+#ifdef I_AM_QSORT_R
+typedef int		 cmp_t(void *, const void *, const void *);
+#else
+typedef int		 cmp_t(const void *, const void *);
+#endif
+static inline char	*med3(char *, char *, char *, cmp_t *, void *);
+static inline void	 swapfunc(char *, char *, int, int);
+
+#define min(a, b)	(a) < (b) ? a : b
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+#define swapcode(TYPE, parmi, parmj, n) { 		\
+	long i = (n) / sizeof (TYPE); 			\
+	TYPE *pi = (TYPE *) (parmi); 		\
+	TYPE *pj = (TYPE *) (parmj); 		\
+	do { 						\
+		TYPE	t = *pi;		\
+		*pi++ = *pj;				\
+		*pj++ = t;				\
+        } while (--i > 0);				\
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static inline void
+swapfunc(a, b, n, swaptype)
+	char *a, *b;
+	int n, swaptype;
+{
+	if(swaptype <= 1)
+		swapcode(long, a, b, n)
+	else
+		swapcode(char, a, b, n)
+}
+
+#define swap(a, b)					\
+	if (swaptype == 0) {				\
+		long t = *(long *)(a);			\
+		*(long *)(a) = *(long *)(b);		\
+		*(long *)(b) = t;			\
+	} else						\
+		swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+#ifdef I_AM_QSORT_R
+#define	CMP(t, x, y) (cmp((t), (x), (y)))
+#else
+#define	CMP(t, x, y) (cmp((x), (y)))
+#endif
+
+static inline char *
+med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk
+#ifndef I_AM_QSORT_R
+__unused
+#endif
+)
+{
+	return CMP(thunk, a, b) < 0 ?
+	       (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
+              :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
+}
+
+#ifdef I_AM_QSORT_R
+void
+qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
+#else
+#define thunk NULL
+void
+qsort(void *a, size_t n, size_t es, cmp_t *cmp)
+#endif
+{
+	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+	size_t d, r;
+	int cmp_result;
+	int swaptype, swap_cnt;
+
+loop:	SWAPINIT(a, es);
+	swap_cnt = 0;
+	if (n < 7) {
+		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+			for (pl = pm; 
+			     pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+			     pl -= es)
+				swap(pl, pl - es);
+		return;
+	}
+	pm = (char *)a + (n / 2) * es;
+	if (n > 7) {
+		pl = a;
+		pn = (char *)a + (n - 1) * es;
+		if (n > 40) {
+			d = (n / 8) * es;
+			pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
+			pm = med3(pm - d, pm, pm + d, cmp, thunk);
+			pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
+		}
+		pm = med3(pl, pm, pn, cmp, thunk);
+	}
+	swap(a, pm);
+	pa = pb = (char *)a + es;
+
+	pc = pd = (char *)a + (n - 1) * es;
+	for (;;) {
+		while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
+			if (cmp_result == 0) {
+				swap_cnt = 1;
+				swap(pa, pb);
+				pa += es;
+			}
+			pb += es;
+		}
+		while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
+			if (cmp_result == 0) {
+				swap_cnt = 1;
+				swap(pc, pd);
+				pd -= es;
+			}
+			pc -= es;
+		}
+		if (pb > pc)
+			break;
+		swap(pb, pc);
+		swap_cnt = 1;
+		pb += es;
+		pc -= es;
+	}
+	if (swap_cnt == 0) {  /* Switch to insertion sort */
+		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+			for (pl = pm; 
+			     pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+			     pl -= es)
+				swap(pl, pl - es);
+		return;
+	}
+
+	pn = (char *)a + n * es;
+	r = min(pa - (char *)a, pb - pa);
+	vecswap(a, pb - r, r);
+	r = min(pd - pc, pn - pd - es);
+	vecswap(pb, pn - r, r);
+	if ((r = pb - pa) > es)
+#ifdef I_AM_QSORT_R
+		qsort_r(a, r / es, es, thunk, cmp);
+#else
+		qsort(a, r / es, es, cmp);
+#endif
+	if ((r = pd - pc) > es) {
+		/* Iterate rather than recurse to save stack space */
+		a = pn - r;
+		n = r / es;
+		goto loop;
+	}
+/*		qsort(pn - r, r / es, es, cmp);*/
+}
diff --git a/tests/stdlib_test.cpp b/tests/stdlib_test.cpp
index fed39f8..e5d7812 100644
--- a/tests/stdlib_test.cpp
+++ b/tests/stdlib_test.cpp
@@ -109,3 +109,26 @@
   ASSERT_STREQ(executable_path, p);
   free(p);
 }
+
+TEST(stdlib, qsort) {
+  struct s {
+    char name[16];
+    static int comparator(const void* lhs, const void* rhs) {
+      return strcmp(reinterpret_cast<const s*>(lhs)->name, reinterpret_cast<const s*>(rhs)->name);
+    }
+  };
+  s entries[3];
+  strcpy(entries[0].name, "charlie");
+  strcpy(entries[1].name, "bravo");
+  strcpy(entries[2].name, "alpha");
+
+  qsort(entries, 3, sizeof(s), s::comparator);
+  ASSERT_STREQ("alpha", entries[0].name);
+  ASSERT_STREQ("bravo", entries[1].name);
+  ASSERT_STREQ("charlie", entries[2].name);
+
+  qsort(entries, 3, sizeof(s), s::comparator);
+  ASSERT_STREQ("alpha", entries[0].name);
+  ASSERT_STREQ("bravo", entries[1].name);
+  ASSERT_STREQ("charlie", entries[2].name);
+}