Merge "Return has_DT_SYMBOLIC flag."
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
index 36da2d9..a2e9c22 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
@@ -40,12 +40,10 @@
 ENTRY(__strcat_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
-    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -195,9 +193,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcat_chk_failed)
-    .save   {r0, lr}
-    .save   {r4, r5}
-
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
index c3e3e14..db76686 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
@@ -39,7 +39,6 @@
 ENTRY(__strcpy_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -161,7 +160,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcpy_chk_failed)
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S
index da4f3dd..410b663 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S
@@ -72,7 +72,6 @@
 ENTRY(memcpy)
         pld     [r1, #64]
         push    {r0, lr}
-        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -85,7 +84,6 @@
 ENTRY_PRIVATE(__memcpy_chk_fail)
         // Preserve lr for backtrace.
         push    {lr}
-        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
index 6ba4931..2a73852 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
@@ -54,7 +54,6 @@
  */
 
 ENTRY_PRIVATE(MEMCPY_BASE)
-        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -172,7 +171,6 @@
 END(MEMCPY_BASE)
 
 ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
-        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -181,17 +179,14 @@
         // i.e., not keeping the stack looking like users expect
         // (highest numbered register at highest address).
         strd    r4, r5, [sp, #-8]!
-        .save   {r4, r5}
         .cfi_adjust_cfa_offset 8
         .cfi_rel_offset r4, 0
         .cfi_rel_offset r5, 4
         strd    r6, r7, [sp, #-8]!
-        .save   {r6, r7}
         .cfi_adjust_cfa_offset 8
         .cfi_rel_offset r6, 0
         .cfi_rel_offset r7, 0
         strd    r8, r9, [sp, #-8]!
-        .save   {r8, r9}
         .cfi_adjust_cfa_offset 8
         .cfi_rel_offset r8, 0
         .cfi_rel_offset r9, 4
diff --git a/libc/arch-arm/cortex-a15/bionic/memset.S b/libc/arch-arm/cortex-a15/bionic/memset.S
index 12c68d6..e4a1ec8 100644
--- a/libc/arch-arm/cortex-a15/bionic/memset.S
+++ b/libc/arch-arm/cortex-a15/bionic/memset.S
@@ -44,7 +44,6 @@
         bls         .L_done
 
         // Preserve lr for backtrace.
-        .save       {lr}
         push        {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
@@ -68,7 +67,6 @@
 END(bzero)
 
 ENTRY(memset)
-        .save       {r0}
         stmfd       sp!, {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
diff --git a/libc/arch-arm/cortex-a15/bionic/strcmp.S b/libc/arch-arm/cortex-a15/bionic/strcmp.S
index 12da115..acedf0e 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S
@@ -168,7 +168,6 @@
         bne     .L_do_align
 
         /* Fast path.  */
-        .save   {r4-r7}
         init
 
 .L_doubleword_aligned:
diff --git a/libc/arch-arm/cortex-a15/bionic/strcpy.S b/libc/arch-arm/cortex-a15/bionic/strcpy.S
index cb878c4..2cfdb19 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcpy.S
@@ -62,6 +62,11 @@
 
     .macro m_push
     push    {r0, r4, r5, lr}
+    .cfi_def_cfa_offset 16
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r4, 4
+    .cfi_rel_offset r5, 8
+    .cfi_rel_offset lr, 12
     .endm // m_push
 
     .macro m_pop
@@ -78,61 +83,61 @@
     // For short copies, hard-code checking the first 8 bytes since this
     // new code doesn't win until after about 8 bytes.
     m_push
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
+    m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue
 
-strcpy_finish:
+.Lstrcpy_finish:
     m_pop
 
-strcpy_continue:
+.Lstrcpy_continue:
     pld     [r1, #0]
     ands    r3, r0, #7
-    beq     strcpy_check_src_align
+    beq     .Lstrcpy_check_src_align
 
     // Align to a double word (64 bits).
     rsb     r3, r3, #8
     lsls    ip, r3, #31
-    beq     strcpy_align_to_32
+    beq     .Lstrcpy_align_to_32
 
     ldrb    r2, [r1], #1
     strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
+    cbz     r2, .Lstrcpy_complete
 
-strcpy_align_to_32:
-    bcc     strcpy_align_to_64
+.Lstrcpy_align_to_32:
+    bcc     .Lstrcpy_align_to_64
 
     ldrb    r2, [r1], #1
     strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
+    cbz     r2, .Lstrcpy_complete
     ldrb    r2, [r1], #1
     strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
+    cbz     r2, .Lstrcpy_complete
 
-strcpy_align_to_64:
+.Lstrcpy_align_to_64:
     tst     r3, #4
-    beq     strcpy_check_src_align
+    beq     .Lstrcpy_check_src_align
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
     str     r2, [r0], #4
 
-strcpy_check_src_align:
+.Lstrcpy_check_src_align:
     // At this point dst is aligned to a double word, check if src
     // is also aligned to a double word.
     ands    r3, r1, #7
-    bne     strcpy_unaligned_copy
+    bne     .Lstrcpy_unaligned_copy
 
     .p2align 2
-strcpy_mainloop:
+.Lstrcpy_mainloop:
     ldrd    r2, r3, [r1], #8
 
     pld     [r1, #64]
@@ -140,128 +145,128 @@
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     strd    r2, r3, [r0], #8
-    b       strcpy_mainloop
+    b       .Lstrcpy_mainloop
 
-strcpy_complete:
+.Lstrcpy_complete:
     m_pop
 
-strcpy_zero_in_first_register:
+.Lstrcpy_zero_in_first_register:
     lsls    lr, ip, #17
-    bne     strcpy_copy1byte
-    bcs     strcpy_copy2bytes
+    bne     .Lstrcpy_copy1byte
+    bcs     .Lstrcpy_copy2bytes
     lsls    ip, ip, #1
-    bne     strcpy_copy3bytes
+    bne     .Lstrcpy_copy3bytes
 
-strcpy_copy4bytes:
+.Lstrcpy_copy4bytes:
     // Copy 4 bytes to the destiniation.
     str     r2, [r0]
     m_pop
 
-strcpy_copy1byte:
+.Lstrcpy_copy1byte:
     strb    r2, [r0]
     m_pop
 
-strcpy_copy2bytes:
+.Lstrcpy_copy2bytes:
     strh    r2, [r0]
     m_pop
 
-strcpy_copy3bytes:
+.Lstrcpy_copy3bytes:
     strh    r2, [r0], #2
     lsr     r2, #16
     strb    r2, [r0]
     m_pop
 
-strcpy_zero_in_second_register:
+.Lstrcpy_zero_in_second_register:
     lsls    lr, ip, #17
-    bne     strcpy_copy5bytes
-    bcs     strcpy_copy6bytes
+    bne     .Lstrcpy_copy5bytes
+    bcs     .Lstrcpy_copy6bytes
     lsls    ip, ip, #1
-    bne     strcpy_copy7bytes
+    bne     .Lstrcpy_copy7bytes
 
     // Copy 8 bytes to the destination.
     strd    r2, r3, [r0]
     m_pop
 
-strcpy_copy5bytes:
+.Lstrcpy_copy5bytes:
     str     r2, [r0], #4
     strb    r3, [r0]
     m_pop
 
-strcpy_copy6bytes:
+.Lstrcpy_copy6bytes:
     str     r2, [r0], #4
     strh    r3, [r0]
     m_pop
 
-strcpy_copy7bytes:
+.Lstrcpy_copy7bytes:
     str     r2, [r0], #4
     strh    r3, [r0], #2
     lsr     r3, #16
     strb    r3, [r0]
     m_pop
 
-strcpy_unaligned_copy:
+.Lstrcpy_unaligned_copy:
     // Dst is aligned to a double word, while src is at an unknown alignment.
     // There are 7 different versions of the unaligned copy code
     // to prevent overreading the src. The mainloop of every single version
     // will store 64 bits per loop. The difference is how much of src can
     // be read without potentially crossing a page boundary.
     tbb     [pc, r3]
-strcpy_unaligned_branchtable:
+.Lstrcpy_unaligned_branchtable:
     .byte 0
-    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2)
 
     .p2align 2
     // Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
+.Lstrcpy_unalign7:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldrb    r3, [r1]
-    cbz     r3, strcpy_unalign7_copy5bytes
+    cbz     r3, .Lstrcpy_unalign7_copy5bytes
     ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign7_copy6bytes
+    cbz     r4, .Lstrcpy_unalign7_copy6bytes
     ldrb    r5, [r1, #2]
-    cbz     r5, strcpy_unalign7_copy7bytes
+    cbz     r5, .Lstrcpy_unalign7_copy7bytes
 
     ldr     r3, [r1], #4
     pld     [r1, #64]
 
     lsrs    ip, r3, #24
     strd    r2, r3, [r0], #8
-    beq     strcpy_unalign_return
-    b       strcpy_unalign7
+    beq     .Lstrcpy_unalign_return
+    b       .Lstrcpy_unalign7
 
-strcpy_unalign7_copy5bytes:
+.Lstrcpy_unalign7_copy5bytes:
     str     r2, [r0], #4
     strb    r3, [r0]
-strcpy_unalign_return:
+.Lstrcpy_unalign_return:
     m_pop
 
-strcpy_unalign7_copy6bytes:
+.Lstrcpy_unalign7_copy6bytes:
     str     r2, [r0], #4
     strb    r3, [r0], #1
     strb    r4, [r0], #1
     m_pop
 
-strcpy_unalign7_copy7bytes:
+.Lstrcpy_unalign7_copy7bytes:
     str     r2, [r0], #4
     strb    r3, [r0], #1
     strb    r4, [r0], #1
@@ -270,41 +275,41 @@
 
     .p2align 2
     // Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
+.Lstrcpy_unalign6:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
+    cbz     r4, .Lstrcpy_unalign_copy5bytes
     ldrb    r5, [r1, #1]
-    cbz     r5, strcpy_unalign_copy6bytes
+    cbz     r5, .Lstrcpy_unalign_copy6bytes
 
     ldr     r3, [r1], #4
     pld     [r1, #64]
 
     tst     r3, #0xff0000
-    beq     strcpy_copy7bytes
+    beq     .Lstrcpy_copy7bytes
     lsrs    ip, r3, #24
     strd    r2, r3, [r0], #8
-    beq     strcpy_unalign_return
-    b       strcpy_unalign6
+    beq     .Lstrcpy_unalign_return
+    b       .Lstrcpy_unalign6
 
     .p2align 2
     // Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
+.Lstrcpy_unalign5:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
+    cbz     r4, .Lstrcpy_unalign_copy5bytes
 
     ldr     r3, [r1], #4
 
@@ -313,17 +318,17 @@
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     strd    r2, r3, [r0], #8
-    b       strcpy_unalign5
+    b       .Lstrcpy_unalign5
 
-strcpy_unalign_copy5bytes:
+.Lstrcpy_unalign_copy5bytes:
     str     r2, [r0], #4
     strb    r4, [r0]
     m_pop
 
-strcpy_unalign_copy6bytes:
+.Lstrcpy_unalign_copy6bytes:
     str     r2, [r0], #4
     strb    r4, [r0], #1
     strb    r5, [r0]
@@ -331,13 +336,13 @@
 
     .p2align 2
     // Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
+.Lstrcpy_unalign4:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldr     r3, [r1], #4
     pld     [r1, #64]
@@ -345,20 +350,20 @@
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     strd    r2, r3, [r0], #8
-    b       strcpy_unalign4
+    b       .Lstrcpy_unalign4
 
     .p2align 2
     // Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
+.Lstrcpy_unalign3:
     ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign3_copy1byte
+    cbz     r2, .Lstrcpy_unalign3_copy1byte
     ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign3_copy2bytes
+    cbz     r3, .Lstrcpy_unalign3_copy2bytes
     ldrb    r4, [r1, #2]
-    cbz     r4, strcpy_unalign3_copy3bytes
+    cbz     r4, .Lstrcpy_unalign3_copy3bytes
 
     ldr     r2, [r1], #4
     ldr     r3, [r1], #4
@@ -366,26 +371,26 @@
     pld     [r1, #64]
 
     lsrs    lr, r2, #24
-    beq     strcpy_copy4bytes
+    beq     .Lstrcpy_copy4bytes
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     strd    r2, r3, [r0], #8
-    b       strcpy_unalign3
+    b       .Lstrcpy_unalign3
 
-strcpy_unalign3_copy1byte:
+.Lstrcpy_unalign3_copy1byte:
     strb    r2, [r0]
     m_pop
 
-strcpy_unalign3_copy2bytes:
+.Lstrcpy_unalign3_copy2bytes:
     strb    r2, [r0], #1
     strb    r3, [r0]
     m_pop
 
-strcpy_unalign3_copy3bytes:
+.Lstrcpy_unalign3_copy3bytes:
     strb    r2, [r0], #1
     strb    r3, [r0], #1
     strb    r4, [r0]
@@ -393,34 +398,34 @@
 
     .p2align 2
     // Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
+.Lstrcpy_unalign2:
     ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
+    cbz     r2, .Lstrcpy_unalign_copy1byte
     ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign_copy2bytes
+    cbz     r4, .Lstrcpy_unalign_copy2bytes
 
     ldr     r2, [r1], #4
     ldr     r3, [r1], #4
     pld     [r1, #64]
 
     tst     r2, #0xff0000
-    beq     strcpy_copy3bytes
+    beq     .Lstrcpy_copy3bytes
     lsrs    ip, r2, #24
-    beq     strcpy_copy4bytes
+    beq     .Lstrcpy_copy4bytes
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     strd    r2, r3, [r0], #8
-    b       strcpy_unalign2
+    b       .Lstrcpy_unalign2
 
     .p2align 2
     // Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
+.Lstrcpy_unalign1:
     ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
+    cbz     r2, .Lstrcpy_unalign_copy1byte
 
     ldr     r2, [r1], #4
     ldr     r3, [r1], #4
@@ -430,21 +435,21 @@
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     strd    r2, r3, [r0], #8
-    b       strcpy_unalign1
+    b       .Lstrcpy_unalign1
 
-strcpy_unalign_copy1byte:
+.Lstrcpy_unalign_copy1byte:
     strb    r2, [r0]
     m_pop
 
-strcpy_unalign_copy2bytes:
+.Lstrcpy_unalign_copy2bytes:
     strb    r2, [r0], #1
     strb    r4, [r0]
     m_pop
diff --git a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
index 651aefc..45517f1 100644
--- a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
@@ -40,12 +40,10 @@
 ENTRY(__strcat_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
-    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -199,8 +197,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcat_chk_fail)
-    .save   {r0, lr}
-    .save   {r4, r5}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
index 2447780..67eca08 100644
--- a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
@@ -39,7 +39,6 @@
 ENTRY(__strcpy_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -165,7 +164,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcpy_chk_fail)
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy.S b/libc/arch-arm/cortex-a9/bionic/memcpy.S
index 8dcd937..db3e26f 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy.S
@@ -50,7 +50,6 @@
 ENTRY(memcpy)
         pld     [r1, #0]
         stmfd   sp!, {r0, lr}
-        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -64,7 +63,6 @@
 ENTRY_PRIVATE(__memcpy_chk_fail)
         // Preserve lr for backtrace.
         push    {lr}
-        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
index c385657..5e81305 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -33,7 +33,6 @@
  */
 
 ENTRY_PRIVATE(MEMCPY_BASE)
-        .save       {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -139,14 +138,12 @@
 END(MEMCPY_BASE)
 
 ENTRY_PRIVATE(MEMCPY_BASE_ALIGNED)
-        .save       {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
 
         /* Simple arm-only copy loop to handle aligned copy operations */
         stmfd       sp!, {r4-r8}
-        .save       {r4-r8}
         .cfi_adjust_cfa_offset 20
         .cfi_rel_offset r4, 0
         .cfi_rel_offset r5, 4
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index a5057eb..299f5a2 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -42,7 +42,6 @@
 
         // Preserve lr for backtrace.
         push        {lr}
-        .save       {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
@@ -72,7 +71,6 @@
         bhi         __memset_large_copy
 
         stmfd       sp!, {r0}
-        .save       {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
 
@@ -114,7 +112,6 @@
          * offset = (4-(src&3))&3 = -src & 3
          */
         stmfd       sp!, {r0, r4-r7, lr}
-        .save       {r0, r4-r7, lr}
         .cfi_def_cfa_offset 24
         .cfi_rel_offset r0, 0
         .cfi_rel_offset r4, 4
diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S
index 2411c65..4ff26c0 100644
--- a/libc/arch-arm/cortex-a9/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcmp.S
@@ -168,7 +168,6 @@
         bne     .L_do_align
 
         /* Fast path.  */
-        .save   {r4-r7}
         init
 
 .L_doubleword_aligned:
diff --git a/libc/arch-arm/cortex-a9/bionic/strcpy.S b/libc/arch-arm/cortex-a9/bionic/strcpy.S
index 9e9610b..d705aa3 100644
--- a/libc/arch-arm/cortex-a9/bionic/strcpy.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcpy.S
@@ -62,6 +62,11 @@
 
     .macro m_push
     push    {r0, r4, r5, lr}
+    .cfi_def_cfa_offset 16
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r4, 4
+    .cfi_rel_offset r5, 8
+    .cfi_rel_offset lr, 12
     .endm // m_push
 
     .macro m_ret inst
@@ -77,31 +82,31 @@
 ENTRY(strcpy)
     // Unroll the first 8 bytes that will be copied.
     m_push
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
+    m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
+    m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue
 
-strcpy_finish:
+.Lstrcpy_finish:
     m_ret   inst=pop
 
-strcpy_continue:
+.Lstrcpy_continue:
     pld     [r1, #0]
     ands    r3, r0, #7
-    bne     strcpy_align_dst
+    bne     .Lstrcpy_align_dst
 
-strcpy_check_src_align:
+.Lstrcpy_check_src_align:
     // At this point dst is aligned to a double word, check if src
     // is also aligned to a double word.
     ands    r3, r1, #7
-    bne     strcpy_unaligned_copy
+    bne     .Lstrcpy_unaligned_copy
 
     .p2align 2
-strcpy_mainloop:
+.Lstrcpy_mainloop:
     ldmia   r1!, {r2, r3}
 
     pld     [r1, #64]
@@ -109,17 +114,17 @@
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     stmia   r0!, {r2, r3}
-    b       strcpy_mainloop
+    b       .Lstrcpy_mainloop
 
-strcpy_zero_in_first_register:
+.Lstrcpy_zero_in_first_register:
     lsls    lr, ip, #17
     itt     ne
     strbne  r2, [r0]
@@ -136,7 +141,7 @@
     strb    r3, [r0]
     m_ret   inst=pop
 
-strcpy_zero_in_second_register:
+.Lstrcpy_zero_in_second_register:
     lsls    lr, ip, #17
     ittt    ne
     stmiane r0!, {r2}
@@ -156,18 +161,18 @@
     strb    r4, [r0]
     m_ret   inst=pop
 
-strcpy_align_dst:
+.Lstrcpy_align_dst:
     // Align to a double word (64 bits).
     rsb     r3, r3, #8
     lsls    ip, r3, #31
-    beq     strcpy_align_to_32
+    beq     .Lstrcpy_align_to_32
 
     ldrb    r2, [r1], #1
     strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
+    cbz     r2, .Lstrcpy_complete
 
-strcpy_align_to_32:
-    bcc     strcpy_align_to_64
+.Lstrcpy_align_to_32:
+    bcc     .Lstrcpy_align_to_64
 
     ldrb    r4, [r1], #1
     strb    r4, [r0], #1
@@ -180,76 +185,76 @@
     it      eq
     m_ret   inst=popeq
 
-strcpy_align_to_64:
+.Lstrcpy_align_to_64:
     tst     r3, #4
-    beq     strcpy_check_src_align
+    beq     .Lstrcpy_check_src_align
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
     stmia   r0!, {r2}
-    b       strcpy_check_src_align
+    b       .Lstrcpy_check_src_align
 
-strcpy_complete:
+.Lstrcpy_complete:
     m_ret   inst=pop
 
-strcpy_unaligned_copy:
+.Lstrcpy_unaligned_copy:
     // Dst is aligned to a double word, while src is at an unknown alignment.
     // There are 7 different versions of the unaligned copy code
     // to prevent overreading the src. The mainloop of every single version
     // will store 64 bits per loop. The difference is how much of src can
     // be read without potentially crossing a page boundary.
     tbb     [pc, r3]
-strcpy_unaligned_branchtable:
+.Lstrcpy_unaligned_branchtable:
     .byte 0
-    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2)
+    .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2)
 
     .p2align 2
     // Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
+.Lstrcpy_unalign7:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldrb    r3, [r1]
-    cbz     r3, strcpy_unalign7_copy5bytes
+    cbz     r3, .Lstrcpy_unalign7_copy5bytes
     ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign7_copy6bytes
+    cbz     r4, .Lstrcpy_unalign7_copy6bytes
     ldrb    r5, [r1, #2]
-    cbz     r5, strcpy_unalign7_copy7bytes
+    cbz     r5, .Lstrcpy_unalign7_copy7bytes
 
     ldr     r3, [r1], #4
     pld     [r1, #64]
 
     lsrs    ip, r3, #24
     stmia   r0!, {r2, r3}
-    beq     strcpy_unalign_return
-    b       strcpy_unalign7
+    beq     .Lstrcpy_unalign_return
+    b       .Lstrcpy_unalign7
 
-strcpy_unalign7_copy5bytes:
+.Lstrcpy_unalign7_copy5bytes:
     stmia   r0!, {r2}
     strb    r3, [r0]
-strcpy_unalign_return:
+.Lstrcpy_unalign_return:
     m_ret   inst=pop
 
-strcpy_unalign7_copy6bytes:
+.Lstrcpy_unalign7_copy6bytes:
     stmia   r0!, {r2}
     strb    r3, [r0], #1
     strb    r4, [r0], #1
     m_ret   inst=pop
 
-strcpy_unalign7_copy7bytes:
+.Lstrcpy_unalign7_copy7bytes:
     stmia   r0!, {r2}
     strb    r3, [r0], #1
     strb    r4, [r0], #1
@@ -258,30 +263,30 @@
 
     .p2align 2
     // Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
+.Lstrcpy_unalign6:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
+    cbz     r4, .Lstrcpy_unalign_copy5bytes
     ldrb    r5, [r1, #1]
-    cbz     r5, strcpy_unalign_copy6bytes
+    cbz     r5, .Lstrcpy_unalign_copy6bytes
 
     ldr     r3, [r1], #4
     pld     [r1, #64]
 
     tst     r3, #0xff0000
-    beq     strcpy_unalign6_copy7bytes
+    beq     .Lstrcpy_unalign6_copy7bytes
     lsrs    ip, r3, #24
     stmia   r0!, {r2, r3}
-    beq     strcpy_unalign_return
-    b       strcpy_unalign6
+    beq     .Lstrcpy_unalign_return
+    b       .Lstrcpy_unalign6
 
-strcpy_unalign6_copy7bytes:
+.Lstrcpy_unalign6_copy7bytes:
     stmia   r0!, {r2}
     strh    r3, [r0], #2
     lsr     r3, #16
@@ -290,16 +295,16 @@
 
     .p2align 2
     // Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
+.Lstrcpy_unalign5:
     ldr     r2, [r1], #4
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
+    cbz     r4, .Lstrcpy_unalign_copy5bytes
 
     ldr     r3, [r1], #4
 
@@ -308,17 +313,17 @@
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     stmia   r0!, {r2, r3}
-    b       strcpy_unalign5
+    b       .Lstrcpy_unalign5
 
-strcpy_unalign_copy5bytes:
+.Lstrcpy_unalign_copy5bytes:
     stmia   r0!, {r2}
     strb    r4, [r0]
     m_ret   inst=pop
 
-strcpy_unalign_copy6bytes:
+.Lstrcpy_unalign_copy6bytes:
     stmia   r0!, {r2}
     strb    r4, [r0], #1
     strb    r5, [r0]
@@ -326,13 +331,13 @@
 
     .p2align 2
     // Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
+.Lstrcpy_unalign4:
     ldmia   r1!, {r2}
 
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     ldmia   r1!, {r3}
     pld     [r1, #64]
@@ -340,20 +345,20 @@
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     stmia   r0!, {r2, r3}
-    b       strcpy_unalign4
+    b       .Lstrcpy_unalign4
 
     .p2align 2
     // Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
+.Lstrcpy_unalign3:
     ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign3_copy1byte
+    cbz     r2, .Lstrcpy_unalign3_copy1byte
     ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign3_copy2bytes
+    cbz     r3, .Lstrcpy_unalign3_copy2bytes
     ldrb    r4, [r1, #2]
-    cbz     r4, strcpy_unalign3_copy3bytes
+    cbz     r4, .Lstrcpy_unalign3_copy3bytes
 
     ldr     r2, [r1], #4
     ldr     r3, [r1], #4
@@ -361,26 +366,26 @@
     pld     [r1, #64]
 
     lsrs    lr, r2, #24
-    beq     strcpy_unalign_copy4bytes
+    beq     .Lstrcpy_unalign_copy4bytes
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     stmia   r0!, {r2, r3}
-    b       strcpy_unalign3
+    b       .Lstrcpy_unalign3
 
-strcpy_unalign3_copy1byte:
+.Lstrcpy_unalign3_copy1byte:
     strb    r2, [r0]
     m_ret   inst=pop
 
-strcpy_unalign3_copy2bytes:
+.Lstrcpy_unalign3_copy2bytes:
     strb    r2, [r0], #1
     strb    r3, [r0]
     m_ret   inst=pop
 
-strcpy_unalign3_copy3bytes:
+.Lstrcpy_unalign3_copy3bytes:
     strb    r2, [r0], #1
     strb    r3, [r0], #1
     strb    r4, [r0]
@@ -388,34 +393,34 @@
 
     .p2align 2
     // Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
+.Lstrcpy_unalign2:
     ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
+    cbz     r2, .Lstrcpy_unalign_copy1byte
     ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign_copy2bytes
+    cbz     r3, .Lstrcpy_unalign_copy2bytes
 
     ldr     r2, [r1], #4
     ldr     r3, [r1], #4
     pld     [r1, #64]
 
     tst     r2, #0xff0000
-    beq     strcpy_unalign_copy3bytes
+    beq     .Lstrcpy_unalign_copy3bytes
     lsrs    ip, r2, #24
-    beq     strcpy_unalign_copy4bytes
+    beq     .Lstrcpy_unalign_copy4bytes
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     stmia   r0!, {r2, r3}
-    b       strcpy_unalign2
+    b       .Lstrcpy_unalign2
 
     .p2align 2
     // Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
+.Lstrcpy_unalign1:
     ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
+    cbz     r2, .Lstrcpy_unalign_copy1byte
 
     ldr     r2, [r1], #4
     ldr     r3, [r1], #4
@@ -425,32 +430,32 @@
     sub     ip, r2, #0x01010101
     bic     ip, ip, r2
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
+    bne     .Lstrcpy_zero_in_first_register
 
     sub     ip, r3, #0x01010101
     bic     ip, ip, r3
     ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
+    bne     .Lstrcpy_zero_in_second_register
 
     stmia   r0!, {r2, r3}
-    b       strcpy_unalign1
+    b       .Lstrcpy_unalign1
 
-strcpy_unalign_copy1byte:
+.Lstrcpy_unalign_copy1byte:
     strb    r2, [r0]
     m_ret   inst=pop
 
-strcpy_unalign_copy2bytes:
+.Lstrcpy_unalign_copy2bytes:
     strb    r2, [r0], #1
     strb    r3, [r0]
     m_ret   inst=pop
 
-strcpy_unalign_copy3bytes:
+.Lstrcpy_unalign_copy3bytes:
     strh    r2, [r0], #2
     lsr     r2, #16
     strb    r2, [r0]
     m_ret   inst=pop
 
-strcpy_unalign_copy4bytes:
+.Lstrcpy_unalign_copy4bytes:
     stmia   r0, {r2}
     m_ret   inst=pop
 END(strcpy)
diff --git a/libc/arch-arm/denver/bionic/__strcat_chk.S b/libc/arch-arm/denver/bionic/__strcat_chk.S
index 36da2d9..a2e9c22 100644
--- a/libc/arch-arm/denver/bionic/__strcat_chk.S
+++ b/libc/arch-arm/denver/bionic/__strcat_chk.S
@@ -40,12 +40,10 @@
 ENTRY(__strcat_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
-    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -195,9 +193,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcat_chk_failed)
-    .save   {r0, lr}
-    .save   {r4, r5}
-
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/denver/bionic/__strcpy_chk.S b/libc/arch-arm/denver/bionic/__strcpy_chk.S
index c3e3e14..db76686 100644
--- a/libc/arch-arm/denver/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/denver/bionic/__strcpy_chk.S
@@ -39,7 +39,6 @@
 ENTRY(__strcpy_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -161,7 +160,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcpy_chk_failed)
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/denver/bionic/memcpy.S b/libc/arch-arm/denver/bionic/memcpy.S
index da4f3dd..410b663 100644
--- a/libc/arch-arm/denver/bionic/memcpy.S
+++ b/libc/arch-arm/denver/bionic/memcpy.S
@@ -72,7 +72,6 @@
 ENTRY(memcpy)
         pld     [r1, #64]
         push    {r0, lr}
-        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -85,7 +84,6 @@
 ENTRY_PRIVATE(__memcpy_chk_fail)
         // Preserve lr for backtrace.
         push    {lr}
-        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S
index 34becdb..246f159 100644
--- a/libc/arch-arm/krait/bionic/__strcat_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcat_chk.S
@@ -40,12 +40,10 @@
 ENTRY(__strcat_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
-    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -194,8 +192,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcat_chk_failed)
-    .save   {r0, lr}
-    .save   {r4, r5}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S
index c3e3e14..db76686 100644
--- a/libc/arch-arm/krait/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S
@@ -39,7 +39,6 @@
 ENTRY(__strcpy_chk)
     pld     [r0, #0]
     push    {r0, lr}
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -161,7 +160,6 @@
 #include "memcpy_base.S"
 
 ENTRY_PRIVATE(__strcpy_chk_failed)
-    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S
index 0b7b276..9ff46a8 100644
--- a/libc/arch-arm/krait/bionic/memcpy.S
+++ b/libc/arch-arm/krait/bionic/memcpy.S
@@ -53,7 +53,6 @@
 ENTRY(memcpy)
         pld     [r1, #64]
         stmfd   sp!, {r0, lr}
-        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
@@ -66,7 +65,6 @@
 ENTRY_PRIVATE(__memcpy_chk_fail)
         // Preserve lr for backtrace.
         push    {lr}
-        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S
index 99fc255..035dcf1 100644
--- a/libc/arch-arm/krait/bionic/memcpy_base.S
+++ b/libc/arch-arm/krait/bionic/memcpy_base.S
@@ -36,7 +36,6 @@
 // Assumes neon instructions and a cache line size of 32 bytes.
 
 ENTRY_PRIVATE(MEMCPY_BASE)
-        .save {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index 5d1943b..e9f6431 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -43,7 +43,6 @@
         bls         .L_done
 
         // Preserve lr for backtrace.
-        .save       {lr}
         push        {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
@@ -69,7 +68,6 @@
 
 /* memset() returns its first argument.  */
 ENTRY(memset)
-        .save       {r0}
         stmfd       sp!, {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
diff --git a/libc/arch-arm/krait/bionic/strcmp.S b/libc/arch-arm/krait/bionic/strcmp.S
index eacb82a..9121c01 100644
--- a/libc/arch-arm/krait/bionic/strcmp.S
+++ b/libc/arch-arm/krait/bionic/strcmp.S
@@ -168,7 +168,6 @@
         bne     .L_do_align
 
         /* Fast path.  */
-        .save   {r4-r7}
         init
 
 .L_doubleword_aligned:
diff --git a/libc/arch-mips64/mips64.mk b/libc/arch-mips64/mips64.mk
index 0d4b727..b962283 100644
--- a/libc/arch-mips64/mips64.mk
+++ b/libc/arch-mips64/mips64.mk
@@ -49,7 +49,6 @@
 libc_bionic_src_files_mips64 += \
     arch-mips64/bionic/__bionic_clone.S \
     arch-mips64/bionic/_exit_with_stack_teardown.S \
-    arch-mips64/bionic/__get_sp.S \
     arch-mips64/bionic/_setjmp.S \
     arch-mips64/bionic/setjmp.S \
     arch-mips64/bionic/sigsetjmp.S \