Align arm64 stacks to 16 bytes in __bionic_clone.

Also ensure that arm64/x86-64/x86 assembler uses local labels.
(There are are so many non-local labels in arm that fixing them
seems out of scope.)

Also synchronize the __bionic_clone.S comments.

Change-Id: I03b4f84780d996b54d6637a074638196bbb01cd4
diff --git a/libc/arch-x86/bionic/__bionic_clone.S b/libc/arch-x86/bionic/__bionic_clone.S
index bc7f71d..cb0a363 100644
--- a/libc/arch-x86/bionic/__bionic_clone.S
+++ b/libc/arch-x86/bionic/__bionic_clone.S
@@ -6,7 +6,7 @@
         pushl   %esi
         pushl   %edi
 
-        # Align child stack.
+        # Align 'child_stack' to 16 bytes.
         movl    20(%esp), %ecx
         andl    $~15, %ecx
 
@@ -28,8 +28,8 @@
 
         # Check result.
         cmpl    $0, %eax
-        je      bc_child
-        jg      bc_parent
+        je      .L_bc_child
+        jg      .L_bc_parent
 
         # An error occurred, so set errno and return -1.
         negl    %eax
@@ -37,15 +37,15 @@
         call    __set_errno
         addl    $4, %esp
         orl     $-1, %eax
-        jmp     bc_return
+        jmp     .L_bc_return
 
-bc_child:
+.L_bc_child:
         call    __bionic_clone_entry
         hlt
 
-bc_parent:
+.L_bc_parent:
         # we're the parent; nothing to do.
-bc_return:
+.L_bc_return:
         popl    %edi
         popl    %esi
         popl    %ebx