Align arm64 stacks to 16 bytes in __bionic_clone.

Also ensure that arm64/x86-64/x86 assembler uses local labels.
(There are are so many non-local labels in arm that fixing them
seems out of scope.)

Also synchronize the __bionic_clone.S comments.

Change-Id: I03b4f84780d996b54d6637a074638196bbb01cd4
diff --git a/libc/arch-arm64/bionic/__bionic_clone.S b/libc/arch-arm64/bionic/__bionic_clone.S
index af91320..74db790 100644
--- a/libc/arch-arm64/bionic/__bionic_clone.S
+++ b/libc/arch-arm64/bionic/__bionic_clone.S
@@ -35,29 +35,36 @@
     mov     x29,  sp
     str     x8,       [sp, #-16]!
 
-    /* store thread pointer & args in child stack */
+    # Align 'child_stack' to 16 bytes.
+    and     x1, x1, #~0xf
+
+    # Copy 'fn' and 'arg' onto the child stack.
     stp     x5, x6, [x1, #-16]
 
-    /* sys_clone */
+    # Zero out the top 32 bits of 'flags'. (Is this necessary?)
     uxtw    x0, w0
+
+    # Make the system call.
     mov     x8, __NR_clone
     svc     #0
 
-    /* check for child/parent */
-    cbz     x0,1f
+    # Are we the child?
+    cbz     x0, .L_bc_child
 
     ldr     x8,       [sp], #16
     ldp     x29, x30, [sp], #16
 
+    # Set errno if something went wrong.
     cmn     x0, #(MAX_ERRNO + 1)
     cneg    x0, x0, hi
     b.hi    __set_errno
 
     ret
 
-    /* thread initialization - set the end of the frame record chain */
-1:
+.L_bc_child:
+    # We're in the child now. Set the end of the frame record chain...
     mov     x29, xzr
+    # ...and call __bionic_clone_entry with the 'fn' and 'arg' we stored on the child stack.
     ldp     x0, x1, [sp, #-16]
     b       __bionic_clone_entry
 END(__bionic_clone)
diff --git a/libc/arch-arm64/bionic/_setjmp.S b/libc/arch-arm64/bionic/_setjmp.S
index dfa861b..3836899 100644
--- a/libc/arch-arm64/bionic/_setjmp.S
+++ b/libc/arch-arm64/bionic/_setjmp.S
@@ -73,7 +73,7 @@
     ldr     w9, .L_setjmp_magic
     ldr     w10, [x0, #(_JB_MAGIC * 4)]
     cmp     w9, w10
-    b.ne    botch
+    b.ne    .L_fail
 
     /* restore core registers */
     ldp     x30, x10, [x0, #(_JB_CORE_BASE * 4 + 16 * 0)]
@@ -93,10 +93,10 @@
 
     /* validate sp (sp mod 16 = 0) and lr (lr mod 4 = 0) */
     tst     x30, #3
-    b.ne    botch
+    b.ne    .L_fail
     mov     x10, sp
     tst     x10, #15
-    b.ne    botch
+    b.ne    .L_fail
 
     /* set return value */
     cmp     w1, wzr
@@ -104,7 +104,7 @@
     ret
 
     /* validation failed, die die die */
-botch:
+.L_fail:
     bl      PIC_SYM(longjmperror, PLT)
     bl      PIC_SYM(abort, PLT)
     b        . - 8       /* Cannot get here */
diff --git a/libc/arch-arm64/bionic/setjmp.S b/libc/arch-arm64/bionic/setjmp.S
index 9a68d86..f9d2266 100644
--- a/libc/arch-arm64/bionic/setjmp.S
+++ b/libc/arch-arm64/bionic/setjmp.S
@@ -85,7 +85,7 @@
     ldr     w9, .L_setjmp_magic
     ldr     w10, [x0, #(_JB_MAGIC * 4)]
     cmp     w9, w10
-    b.ne    botch
+    b.ne    .L_fail
 
     /* restore core registers */
     ldp     x30, x10, [x0, #(_JB_CORE_BASE * 4 + 16 * 0)]
@@ -105,10 +105,10 @@
 
     /* validate sp (sp mod 16 = 0) and lr (lr mod 4 = 0) */
     tst     x30, #3
-    b.ne    botch
+    b.ne    .L_fail
     mov     x10, sp
     tst     x10, #15
-    b.ne    botch
+    b.ne    .L_fail
 
     /* set return value */
     cmp     w1, wzr
@@ -116,7 +116,7 @@
     ret
 
     /* validation failed, die die die */
-botch:
+.L_fail:
     bl      PIC_SYM(longjmperror, PLT)
     bl      PIC_SYM(abort, PLT)
     b       . - 8       /* Cannot get here */
diff --git a/libc/arch-x86/bionic/__bionic_clone.S b/libc/arch-x86/bionic/__bionic_clone.S
index bc7f71d..cb0a363 100644
--- a/libc/arch-x86/bionic/__bionic_clone.S
+++ b/libc/arch-x86/bionic/__bionic_clone.S
@@ -6,7 +6,7 @@
         pushl   %esi
         pushl   %edi
 
-        # Align child stack.
+        # Align 'child_stack' to 16 bytes.
         movl    20(%esp), %ecx
         andl    $~15, %ecx
 
@@ -28,8 +28,8 @@
 
         # Check result.
         cmpl    $0, %eax
-        je      bc_child
-        jg      bc_parent
+        je      .L_bc_child
+        jg      .L_bc_parent
 
         # An error occurred, so set errno and return -1.
         negl    %eax
@@ -37,15 +37,15 @@
         call    __set_errno
         addl    $4, %esp
         orl     $-1, %eax
-        jmp     bc_return
+        jmp     .L_bc_return
 
-bc_child:
+.L_bc_child:
         call    __bionic_clone_entry
         hlt
 
-bc_parent:
+.L_bc_parent:
         # we're the parent; nothing to do.
-bc_return:
+.L_bc_return:
         popl    %edi
         popl    %esi
         popl    %ebx
diff --git a/libc/arch-x86_64/bionic/__bionic_clone.S b/libc/arch-x86_64/bionic/__bionic_clone.S
index c2ebebd..62c9666 100644
--- a/libc/arch-x86_64/bionic/__bionic_clone.S
+++ b/libc/arch-x86_64/bionic/__bionic_clone.S
@@ -30,7 +30,7 @@
 
 // pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
 ENTRY(__bionic_clone)
-        # Enforce 16-byte alignment for child stack.
+        # Align 'child_stack' to 16 bytes.
         andq    $~15, %rsi
 
         # Copy 'fn' and 'arg' onto the child stack.