Add optimized AArch64 versions of bcopy and wmemmove based on memmove

Add optimized versions of bcopy and wmemmove for AArch64 based on the
memmove implementation

Change-Id: I82fbe8a7221ce224c567ffcfed7a94a53640fca8
Signed-off-by: Bernhard Rosenkraenzer <Bernhard.Rosenkranzer@linaro.org>
diff --git a/libc/arch-arm64/generic/bionic/memmove.S b/libc/arch-arm64/generic/bionic/memmove.S
index d6ecb86..8b366a3 100644
--- a/libc/arch-arm64/generic/bionic/memmove.S
+++ b/libc/arch-arm64/generic/bionic/memmove.S
@@ -29,11 +29,16 @@
  *
  * ARMv8-a, AArch64
  * Unaligned accesses
+ * wchar_t is 4 bytes
  */
 
 #include <private/bionic_asm.h>
 
 /* Parameters and result.  */
+#ifdef BCOPY
+#define origdstin	x1
+#define origsrc	x0
+#endif
 #define dstin	x0
 #define src	x1
 #define count	x2
@@ -54,7 +59,18 @@
 #define D_l	x13
 #define D_h	x14
 
+#ifdef BCOPY
+ENTRY(bcopy)
+	/* Swap src and dst so that a branch to memcpy doesn't cause issues. */
+	mov	tmp1, origsrc
+	mov	origsrc, origdstin
+	mov	origdstin, tmp1
+#elif defined(WMEMMOVE)
+ENTRY(wmemmove)
+	lsl	count, count, #2
+#else
 ENTRY(memmove)
+#endif
 	cmp	dstin, src
 	b.lo	.Ldownwards
 	add	tmp1, src, count
@@ -316,4 +332,10 @@
 	tst	count, #0x3f
 	b.ne	.Ltail63down
 	ret
+#ifdef BCOPY
+END(bcopy)
+#elif defined(WMEMMOVE)
+END(wmemmove)
+#else
 END(memmove)
+#endif