Add function marks and size indications

Add a macro to annotate function end and start using both ENTRY and END
for each function. This allows valgrind (and presumably other debugging
tools) to use the debug symbols to trace the functions.

Change-Id: I5f09cef8e22fb356eb6f5cee952b031e567599b6
diff --git a/libc/arch-arm/bionic/_exit_with_stack_teardown.S b/libc/arch-arm/bionic/_exit_with_stack_teardown.S
index 89f6c90..c2d7758 100644
--- a/libc/arch-arm/bionic/_exit_with_stack_teardown.S
+++ b/libc/arch-arm/bionic/_exit_with_stack_teardown.S
@@ -26,15 +26,10 @@
  * SUCH DAMAGE.
  */
 #include <asm/unistd.h>
-
-.text
-.type _exit_with_stack_teardown, #function
-.globl _exit_with_stack_teardown
-.align 4
+#include <machine/asm.h>
 
 @ void _exit_with_stack_teardown(void * stackBase, int stackSize, int retCode)
-
-_exit_with_stack_teardown:
+ENTRY(_exit_with_stack_teardown)
 
 #if __ARM_EABI__
     mov     lr, r2
@@ -53,3 +48,4 @@
     @ exit() should never return, cause a crash if it does
     mov		r0, #0
     ldr		r0, [r0]
+END(_exit_with_stack_teardown)
diff --git a/libc/arch-arm/bionic/_setjmp.S b/libc/arch-arm/bionic/_setjmp.S
index 5626219..6b8aa50 100644
--- a/libc/arch-arm/bionic/_setjmp.S
+++ b/libc/arch-arm/bionic/_setjmp.S
@@ -70,6 +70,7 @@
 
         mov	r0, #0x00000000
         bx      lr
+END(_setjmp)
 
 .L_setjmp_magic:
 	.word	_JB_MAGIC__SETJMP
@@ -109,3 +110,4 @@
 	bl	PIC_SYM(_C_LABEL(longjmperror), PLT)
 	bl	PIC_SYM(_C_LABEL(abort), PLT)
 	b	. - 8		/* Cannot get here */
+END(_longjmp)
diff --git a/libc/arch-arm/bionic/atomics_arm.S b/libc/arch-arm/bionic/atomics_arm.S
index d94f6b1..4d9cbcf 100644
--- a/libc/arch-arm/bionic/atomics_arm.S
+++ b/libc/arch-arm/bionic/atomics_arm.S
@@ -26,17 +26,9 @@
  * SUCH DAMAGE.
  */
 #include <sys/linux-syscalls.h>
+#include <machine/asm.h>
 #include <machine/cpu-features.h>
 
-.global __atomic_cmpxchg
-.type __atomic_cmpxchg, %function
-.global __atomic_swap
-.type __atomic_swap, %function
-.global __atomic_dec
-.type __atomic_dec, %function
-.global __atomic_inc
-.type __atomic_inc, %function
-
 #define FUTEX_WAIT 0
 #define FUTEX_WAKE 1
 
@@ -48,8 +40,7 @@
  */
 
 /* r0(addr) -> r0(old) */
-__atomic_dec:
-    .fnstart
+ENTRY(__atomic_dec)
     mov     r1, r0                      @ copy addr so we don't clobber it
 1:  ldrex   r0, [r1]                    @ load current value into r0
     sub     r2, r0, #1                  @ generate new value into r2
@@ -57,11 +48,10 @@
     cmp     r3, #0                      @ success?
     bxeq    lr                          @ yes, return
     b       1b                          @ no, retry
-    .fnend
+END(__atomic_dec)
 
 /* r0(addr) -> r0(old) */
-__atomic_inc:
-    .fnstart
+ENTRY(__atomic_inc)
     mov     r1, r0
 1:  ldrex   r0, [r1]
     add     r2, r0, #1
@@ -69,11 +59,10 @@
     cmp     r3, #0
     bxeq    lr
     b       1b
-    .fnend
+END(__atomic_inc)
 
 /* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-__atomic_cmpxchg:
-    .fnstart
+ENTRY(__atomic_cmpxchg)
 1:  mov     ip, #2                      @ ip=2 means "new != old"
     ldrex   r3, [r2]                    @ load current value into r3
     teq     r0, r3                      @ new == old?
@@ -82,18 +71,17 @@
     beq     1b                          @ yes, retry
     mov     r0, ip                      @ return 0 on success, 2 on failure
     bx      lr
-    .fnend
+END(__atomic_cmpxchg)
 
 /* r0(new) r1(addr) -> r0(old) */
-__atomic_swap:
-    .fnstart
+ENTRY(__atomic_swap)
 1:  ldrex   r2, [r1]
     strex   r3, r0, [r1]
     teq     r3, #0
     bne     1b
     mov     r0, r2
     bx      lr
-    .fnend
+END(__atomic_swap)
 
 #else /*not defined __ARM_HAVE_LDREX_STREX*/
 /*
@@ -107,8 +95,7 @@
     .equ    kernel_atomic_base, 0xFFFF0FFF
 
 /* r0(addr) -> r0(old) */
-__atomic_dec:
-    .fnstart
+ENTRY(__atomic_dec)
     .save {r4, lr}
     stmdb   sp!, {r4, lr}
     mov     r2, r0
@@ -122,11 +109,10 @@
     add     r0, r1, #1
     ldmia   sp!, {r4, lr}
     bx      lr
-    .fnend
+END(__atomic_dec)
 
 /* r0(addr) -> r0(old) */
-__atomic_inc:
-    .fnstart
+ENTRY(__atomic_inc)
     .save {r4, lr}
     stmdb   sp!, {r4, lr}
     mov     r2, r0
@@ -140,11 +126,10 @@
     sub     r0, r1, #1
     ldmia   sp!, {r4, lr}
     bx      lr
-    .fnend
+END(__atomic_inc)
 
 /* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-__atomic_cmpxchg:
-    .fnstart
+ENTRY(__atomic_cmpxchg)
     .save {r4, lr}
     stmdb   sp!, {r4, lr}
     mov     r4, r0          /* r4 = save oldvalue */
@@ -160,14 +145,13 @@
 2: @ atomic_cmpxchg
     ldmia   sp!, {r4, lr}
     bx      lr
-    .fnend
+END(__atomic_cmpxchg)
 
 /* r0(new) r1(addr) -> r0(old) */
-__atomic_swap:
-    .fnstart
+ENTRY(__atomic_swap)
     swp     r0, r0, [r1]
     bx      lr
-    .fnend
+END(__atomic_swap)
 
 #endif /*not defined __ARM_HAVE_LDREX_STREX*/
 
@@ -191,18 +175,16 @@
 
 #if __ARM_EABI__
 
-__futex_syscall3:
-    .fnstart
+ENTRY(__futex_syscall3)
     stmdb   sp!, {r4, r7}
     .save   {r4, r7}
     ldr     r7, =__NR_futex
     swi     #0
     ldmia   sp!, {r4, r7}
     bx      lr
-    .fnend
+END(__futex_syscall3)
 
-__futex_wait:
-    .fnstart
+ENTRY(__futex_wait)
     stmdb   sp!, {r4, r7}
     .save   {r4, r7}
     mov     r3, r2
@@ -212,10 +194,9 @@
     swi     #0
     ldmia   sp!, {r4, r7}
     bx      lr
-    .fnend
+END(__futex_wait)
 
-__futex_wake:
-    .fnstart
+ENTRY(__futex_wake)
     .save   {r4, r7}
     stmdb   sp!, {r4, r7}
     mov     r2, r1
@@ -224,28 +205,32 @@
     swi     #0
     ldmia   sp!, {r4, r7}
     bx      lr
-    .fnend
+END(__futex_wake)
 
 #else
 
-__futex_syscall3:
+ENTRY(__futex_syscall3)
     swi     #__NR_futex
     bx      lr
+END(__futex_syscall3)
 
-__futex_wait:
+ENTRY(__futex_wait)
     mov     r3, r2
     mov     r2, r1
     mov     r1, #FUTEX_WAIT
     swi     #__NR_futex
     bx      lr
+END(__futex_wait)
 
-__futex_wake:
+ENTRY(__futex_wake)
     mov     r2, r1
     mov     r1, #FUTEX_WAKE
     swi     #__NR_futex
     bx      lr
+END(__futex_wake)
 
 #endif
 
-__futex_syscall4:
+ENTRY(__futex_syscall4)
     b __futex_syscall3
+END(__futex_syscall4)
diff --git a/libc/arch-arm/bionic/clone.S b/libc/arch-arm/bionic/clone.S
index 9c25053..a95d2d6 100644
--- a/libc/arch-arm/bionic/clone.S
+++ b/libc/arch-arm/bionic/clone.S
@@ -26,14 +26,9 @@
  * SUCH DAMAGE.
  */
 #include <sys/linux-syscalls.h>
+#include <machine/asm.h>
 
-    .text
-    .type __pthread_clone, #function
-    .global __pthread_clone
-    .align 4
-    .fnstart
-
-__pthread_clone:
+ENTRY(__pthread_clone)
     @ insert the args onto the new stack
     str     r0, [r1, #-4]
     str     r3, [r1, #-8]
@@ -73,7 +68,7 @@
 __error:
     mov     r0, #-1
     bx      lr
-    .fnend
+END(__pthread_clone)
 
 
     #
@@ -88,12 +83,8 @@
     #       at the end of the parameter list makes the
     #       implementation much simpler.
     #
-    .type __bionic_clone, #function
-    .globl __bionic_clone
-    .align 4
-    .fnstart
 
-__bionic_clone:
+ENTRY(__bionic_clone)
     mov     ip, sp
     .save   {r4, r5, r6, r7}
 
@@ -124,5 +115,4 @@
     ldr    r0, [sp, #-4]
     ldr    r1, [sp, #-8]
     b      __bionic_clone_entry
-
-    .fnend
+END(__bionic_clone)
diff --git a/libc/arch-arm/bionic/ffs.S b/libc/arch-arm/bionic/ffs.S
index 13bd169..103ddd1 100644
--- a/libc/arch-arm/bionic/ffs.S
+++ b/libc/arch-arm/bionic/ffs.S
@@ -61,6 +61,7 @@
 	ldrneb  r0, [ r2, r0, lsr #26 ]
 
 	bx		lr
+END(ffs)
 
 .text;
 .type .L_ffs_table, _ASM_TYPE_OBJECT;
@@ -78,5 +79,6 @@
 	clzne	r0, r0
 	rsbne	r0, r0, #32
 	bx		lr
+END(ffs)
 #endif /* !defined(__ARM_HAVE_CLZ) */
 
diff --git a/libc/arch-arm/bionic/kill.S b/libc/arch-arm/bionic/kill.S
index 2954091..33dfc2b 100644
--- a/libc/arch-arm/bionic/kill.S
+++ b/libc/arch-arm/bionic/kill.S
@@ -33,17 +33,13 @@
    of a corrupted malloc heap).
 */
 #include <sys/linux-syscalls.h>
+#include <machine/asm.h>
 
 #ifndef __NR_kill
 #define __NR_kill   37
 #endif
 
-    .text
-    .type kill, #function
-    .globl kill
-    .align 4
-
-kill:
+ENTRY(kill)
     stmfd   sp!, {r4-r7, ip, lr}
     ldr     r7, =__NR_kill
     swi     #0
@@ -51,3 +47,4 @@
     movs    r0, r0
     bxpl    lr
     b       __set_syscall_errno
+END(kill)
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S
index 67dcddc..c872a51 100644
--- a/libc/arch-arm/bionic/memcmp.S
+++ b/libc/arch-arm/bionic/memcmp.S
@@ -27,12 +27,7 @@
  */
 
 #include <machine/cpu-features.h>
-
-    .text
-
-    .global memcmp
-    .type memcmp, %function
-    .align 4
+#include <machine/asm.h>
 
 /*
  * Optimized memcmp() for ARM9.
@@ -43,8 +38,7 @@
  * (2) The loads are scheduled in a way they won't stall
  */
 
-memcmp:
-        .fnstart
+ENTRY(memcmp)
         PLD         (r0, #0)
         PLD         (r1, #0)
 
@@ -176,7 +170,7 @@
 9:      /* restore registers and return */
         ldmfd       sp!, {r4, lr}
         bx          lr
-        .fnend
+END(memcmp)
 
 
 
diff --git a/libc/arch-arm/bionic/memcmp16.S b/libc/arch-arm/bionic/memcmp16.S
index f398588..99c9b88 100644
--- a/libc/arch-arm/bionic/memcmp16.S
+++ b/libc/arch-arm/bionic/memcmp16.S
@@ -27,12 +27,7 @@
  */
 
 #include <machine/cpu-features.h>
-
-    .text
-
-    .global __memcmp16
-    .type __memcmp16, %function
-    .align 4
+#include <machine/asm.h>
 
 /*
  * Optimized memcmp16() for ARM9.
@@ -43,8 +38,7 @@
  * (2) The loads are scheduled in a way they won't stall
  */
 
-__memcmp16:
-        .fnstart
+ENTRY(__memcmp16)
         PLD         (r0, #0)
         PLD         (r1, #0)
 
@@ -95,8 +89,6 @@
         /* restore registers and return */
         ldmnefd     sp!, {r4, lr}
         bxne        lr
-        .fnend
-
 
 
 0:      /* here the first pointer is aligned, and we have at least 3 words
@@ -237,3 +229,4 @@
 7:      /* fix up the 2 pointers and fallthrough... */
         sub         r1, r1, #2
         b           2b
+END(__memcmp16)
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index ba55996..04ba848 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -27,6 +27,7 @@
  */
 
 #include <machine/cpu-features.h>
+#include <machine/asm.h>
 
 #if defined(__ARM_NEON__)
 
@@ -143,18 +144,12 @@
 
         ldmfd       sp!, {r0, lr}
         bx          lr
-        .fnend
+END(memcpy)
 
 
 #else   /* __ARM_ARCH__ < 7 */
 
 
-	.text
-
-    .global memcpy
-    .type memcpy, %function
-    .align 4
-
 		/*
 		 * Optimized memcpy() for ARM.
          *
@@ -162,12 +157,11 @@
 		 * so we have to preserve R0.
 		 */
 
-memcpy:
+ENTRY(memcpy)
 		/* The stack must always be 64-bits aligned to be compliant with the
 		 * ARM ABI. Since we have to save R0, we might as well save R4
 		 * which we can use for better pipelining of the reads below
 		 */
-        .fnstart
         .save       {r0, r4, lr}
         stmfd       sp!, {r0, r4, lr}
         /* Making room for r5-r11 which will be spilled later */
@@ -504,7 +498,7 @@
         add         sp,  sp, #28
 		ldmfd		sp!, {r0, r4, lr}
 		bx			lr
-        .fnend
+END(memcpy)
 
 
 #endif    /* __ARM_ARCH__ < 7 */
diff --git a/libc/arch-arm/bionic/memset.S b/libc/arch-arm/bionic/memset.S
index 93abe15..273b9e3 100644
--- a/libc/arch-arm/bionic/memset.S
+++ b/libc/arch-arm/bionic/memset.S
@@ -25,15 +25,8 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-	.text
 
-    .global memset
-    .type memset, %function
-
-    .global bzero
-    .type bzero, %function
-
-    .align
+#include <machine/asm.h>
 	
 		/*
 		 * Optimized memset() for ARM.
@@ -41,15 +34,15 @@
          * memset() returns its first argument.
 		 */
 	
-bzero:
+ENTRY(bzero)
         mov     r2, r1
         mov     r1, #0
+END(bzero)
 
-memset:	
+ENTRY(memset)
 		/* compute the offset to align the destination
 		 * offset = (4-(src&3))&3 = -src & 3
 		 */
-        .fnstart
         .save       {r0, r4-r7, lr}
 		stmfd		sp!, {r0, r4-r7, lr}
 		rsb			r3, r0, #0
@@ -113,5 +106,4 @@
 		strcsb		r1, [r0]
         ldmfd		sp!, {r0, r4-r7, lr}
         bx          lr
-        .fnend
-    
+END(memset)
diff --git a/libc/arch-arm/bionic/setjmp.S b/libc/arch-arm/bionic/setjmp.S
index 59aff66..996e55e 100644
--- a/libc/arch-arm/bionic/setjmp.S
+++ b/libc/arch-arm/bionic/setjmp.S
@@ -79,6 +79,7 @@
 
 	mov	r0, #0x00000000
 	bx      lr
+END(setjmp)
 
 .Lsetjmp_magic:
 	.word	_JB_MAGIC_SETJMP
@@ -138,3 +139,4 @@
 	bl	PIC_SYM(_C_LABEL(longjmperror), PLT)
 	bl	PIC_SYM(_C_LABEL(abort), PLT)
 	b	. - 8		/* Cannot get here */
+END(longjmp)
diff --git a/libc/arch-arm/bionic/sigsetjmp.S b/libc/arch-arm/bionic/sigsetjmp.S
index 50e6429..12311e5 100644
--- a/libc/arch-arm/bionic/sigsetjmp.S
+++ b/libc/arch-arm/bionic/sigsetjmp.S
@@ -33,6 +33,8 @@
  * SUCH DAMAGE.
  */
 
+#define _ALIGN_TEXT .align 0
+
 #include <machine/asm.h>
 #include <machine/setjmp.h>
 
@@ -50,6 +52,7 @@
 	teq	r1, #0
 	beq	PIC_SYM(_C_LABEL(_setjmp), PLT)
 	b	PIC_SYM(_C_LABEL(setjmp), PLT)
+END(sigsetjmp)
 
 .L_setjmp_magic:
 	.word	_JB_MAGIC__SETJMP
@@ -60,3 +63,4 @@
 	teq	r2, r3
 	beq	PIC_SYM(_C_LABEL(_longjmp), PLT)
 	b	PIC_SYM(_C_LABEL(longjmp), PLT)
+END(siglongjmp)
diff --git a/libc/arch-arm/bionic/strcpy.S b/libc/arch-arm/bionic/strcpy.S
index 70c353f..21dafda 100644
--- a/libc/arch-arm/bionic/strcpy.S
+++ b/libc/arch-arm/bionic/strcpy.S
@@ -30,15 +30,9 @@
  */
 
 #include <machine/cpu-features.h>
+#include <machine/asm.h>
 
-	.text
-
-	.global strcpy
-	.type strcpy, %function
-	.align 4
-
-strcpy:
-	.fnstart
+ENTRY(strcpy)
 	PLD(r1, #0)
 	eor	r2, r0, r1
 	mov	ip, r0
@@ -136,3 +130,4 @@
 	cmp	r2, #0
 	bne	4b
 	bx	lr
+END(strcpy)
diff --git a/libc/arch-arm/bionic/tkill.S b/libc/arch-arm/bionic/tkill.S
index 7b3301a..fdc5ed4 100644
--- a/libc/arch-arm/bionic/tkill.S
+++ b/libc/arch-arm/bionic/tkill.S
@@ -32,18 +32,15 @@
    abort due to a fatal runtime error (e.g. detection
    of a corrupted malloc heap).
 */
+
 #include <sys/linux-syscalls.h>
+#include <machine/asm.h>
 
 #ifndef __NR_tkill
 #define __NR_tkill  238
 #endif
 
-    .text
-    .type tkill, #function
-    .globl tkill
-    .align 4
-
-tkill:
+ENTRY(tkill)
     stmfd   sp!, {r4-r7, ip, lr}
     ldr     r7, =__NR_tkill
     swi     #0
@@ -51,3 +48,4 @@
     movs    r0, r0
     bxpl    lr
     b       __set_syscall_errno
+END(tkill)
diff --git a/libc/arch-arm/include/machine/asm.h b/libc/arch-arm/include/machine/asm.h
index c7bd017..7b8f053 100644
--- a/libc/arch-arm/include/machine/asm.h
+++ b/libc/arch-arm/include/machine/asm.h
@@ -70,7 +70,13 @@
 #define _ASM_TYPE_FUNCTION	#function
 #define _ASM_TYPE_OBJECT	#object
 #define _ENTRY(x) \
-	.text; _ALIGN_TEXT; .globl x; .type x,_ASM_TYPE_FUNCTION; x:
+	.text; _ALIGN_TEXT; .globl x; .type x,_ASM_TYPE_FUNCTION; x: .fnstart
+
+#define _ASM_SIZE(x)	.size x, .-x;
+
+#define _END(x) \
+	.fnend; \
+	_ASM_SIZE(x)
 
 #ifdef GPROF
 # ifdef __ELF__
@@ -86,8 +92,10 @@
 
 #define	ENTRY(y)	_ENTRY(_C_LABEL(y)); _PROF_PROLOGUE
 #define	ENTRY_NP(y)	_ENTRY(_C_LABEL(y))
+#define	END(y)		_END(_C_LABEL(y))
 #define	ASENTRY(y)	_ENTRY(_ASM_LABEL(y)); _PROF_PROLOGUE
 #define	ASENTRY_NP(y)	_ENTRY(_ASM_LABEL(y))
+#define	ASEND(y)	_END(_ASM_LABEL(y))
 
 #define	ASMSTR		.asciz