Add stack unwinding directives to assembly leaf functions.

So that the real culprit of native crashes can surface in the stack trace.
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index 97331d3..4ea2c6d 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -40,6 +40,7 @@
 #define NEON_MAX_PREFETCH_DISTANCE 320
 
 memcpy:
+        .fnstart
 		mov	ip, r0
 		cmp	r2, #16
 		blt     4f	@ Have less than 16 bytes to copy
@@ -128,6 +129,7 @@
 		ldrmib	r3, [r1], #1
 		strmib	r3, [ip], #1
 		bx	lr
+        .fnend
 
 #else	/* __ARM_ARCH__ < 7 */