Fix arm64 clone stack handling.

Make sure we adjust the stack pointer so a signal can't overwrite data.

Bug: 15195265
Change-Id: I5ab9469a82cb214c32f40a713268a1ab74a4c6fa
diff --git a/libc/arch-arm/bionic/__bionic_clone.S b/libc/arch-arm/bionic/__bionic_clone.S
index b02a709..a268f9d 100644
--- a/libc/arch-arm/bionic/__bionic_clone.S
+++ b/libc/arch-arm/bionic/__bionic_clone.S
@@ -42,12 +42,14 @@
     # load extra parameters
     ldmfd   ip, {r4, r5, r6}
 
-    # store 'fn' and 'arg' to the child stack
+    # Push 'fn' and 'arg' onto the child stack.
     stmdb   r1!, {r5, r6}
 
-    # System call
+    # Make the system call.
     ldr     r7, =__NR_clone
     swi     #0
+
+    # Are we the child?
     movs    r0, r0
     beq     1f
 
@@ -61,6 +63,7 @@
 1:  # The child.
     # Setting lr to 0 will make the unwinder stop at __start_thread
     mov    lr, #0
+    # Call __start_thread with the 'fn' and 'arg' we stored on the child stack.
     pop    {r0, r1}
     b      __start_thread
 END(__bionic_clone)
diff --git a/libc/arch-arm64/bionic/__bionic_clone.S b/libc/arch-arm64/bionic/__bionic_clone.S
index 56ac0f6..27e44e7 100644
--- a/libc/arch-arm64/bionic/__bionic_clone.S
+++ b/libc/arch-arm64/bionic/__bionic_clone.S
@@ -31,8 +31,8 @@
 // pid_t __bionic_clone(int flags, void* child_stack, pid_t* parent_tid, void* tls, pid_t* child_tid, int (*fn)(void*), void* arg);
 
 ENTRY(__bionic_clone)
-    # Copy 'fn' and 'arg' onto the child stack.
-    stp     x5, x6, [x1, #-16]
+    # Push 'fn' and 'arg' onto the child stack.
+    stp     x5, x6, [x1, #-16]!
 
     # Make the system call.
     mov     x8, __NR_clone
@@ -49,12 +49,12 @@
     ret
 
 .L_bc_child:
-    # We're in the child now. Set the end of the frame record chain...
-    mov     x29, xzr
-    # Setting x30 to 0 will make the unwinder stop at __start_thread
-    mov     x30, xzr
-    # ...and call __start_thread with the 'fn' and 'arg' we stored on the child stack.
-    ldp     x0, x1, [sp, #-16]
+    # We're in the child now. Set the end of the frame record chain.
+    mov     x29, #0
+    # Setting x30 to 0 will make the unwinder stop at __start_thread.
+    mov     x30, #0
+    # Call __start_thread with the 'fn' and 'arg' we stored on the child stack.
+    ldp     x0, x1, [sp], #16
     b       __start_thread
 END(__bionic_clone)
 .hidden __bionic_clone