EABI syscall cleanup.

We cleaned up the auto-generated ones a while back to not touch
the stack unnecessarily if they have <= 4 arguments. This patch
cleans up some hand-crafted ones.

Also improve comments in clone.S.

Change-Id: I8850bf98f2b26829385315304472a760e6880ed8
diff --git a/libc/arch-arm/bionic/futex_arm.S b/libc/arch-arm/bionic/futex_arm.S
index e21a385..4131cdb 100644
--- a/libc/arch-arm/bionic/futex_arm.S
+++ b/libc/arch-arm/bionic/futex_arm.S
@@ -34,11 +34,10 @@
 
 // __futex_syscall3(*ftx, op, val)
 ENTRY(__futex_syscall3)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
+    mov     ip, r7
     ldr     r7, =__NR_futex
     swi     #0
-    ldmia   sp!, {r4, r7}
+    mov     r7, ip
     bx      lr
 END(__futex_syscall3)
 
@@ -49,25 +48,23 @@
 
 // __futex_wait(*ftx, val, *timespec)
 ENTRY(__futex_wait)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
+    mov     ip, r7
     mov     r3, r2
     mov     r2, r1
     mov     r1, #FUTEX_WAIT
     ldr     r7, =__NR_futex
     swi     #0
-    ldmia   sp!, {r4, r7}
+    mov     r7, ip
     bx      lr
 END(__futex_wait)
 
 // __futex_wake(*ftx, counter)
 ENTRY(__futex_wake)
-    .save   {r4, r7}
-    stmdb   sp!, {r4, r7}
+    mov     ip, r7
     mov     r2, r1
     mov     r1, #FUTEX_WAKE
     ldr     r7, =__NR_futex
     swi     #0
-    ldmia   sp!, {r4, r7}
+    mov     r7, ip
     bx      lr
 END(__futex_wake)