Clean up some ARMv4/ARMv5 cruft.

Change-Id: I29e836fea4b53901e29f96c6888869c35f6726be
diff --git a/libc/arch-arm/bionic/memcmp16.S b/libc/arch-arm/bionic/memcmp16.S
index 825c94f..afbb1b0 100644
--- a/libc/arch-arm/bionic/memcmp16.S
+++ b/libc/arch-arm/bionic/memcmp16.S
@@ -32,15 +32,15 @@
 /*
  * Optimized memcmp16() for ARM9.
  * This would not be optimal on XScale or ARM11, where more prefetching
- * and use of PLD will be needed.
+ * and use of pld will be needed.
  * The 2 major optimzations here are
  * (1) The main loop compares 16 bytes at a time
  * (2) The loads are scheduled in a way they won't stall
  */
 
 ENTRY(__memcmp16)
-        PLD         (r0, #0)
-        PLD         (r1, #0)
+        pld         [r0, #0]
+        pld         [r1, #0]
 
         /* take of the case where length is nul or the buffers are the same */
         cmp         r0, r1
@@ -62,13 +62,13 @@
         bpl         0f
 
         /* small blocks (less then 12 words) */
-        PLD         (r0, #32)
-        PLD         (r1, #32)
+        pld         [r0, #32]
+        pld         [r1, #32]
 
 1:      ldrh        r0, [r3], #2
         ldrh        ip, [r1], #2
         subs        r0, r0, ip
-        bxne        lr        
+        bxne        lr
         subs        r2, r2, #1
         bne         1b
         bx          lr
@@ -79,11 +79,11 @@
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r4, 0
         .cfi_rel_offset lr, 4
-        
+
         /* align first pointer to word boundary */
         tst         r3, #2
         beq         0f
-        
+
         ldrh        r0, [r3], #2
         ldrh        ip, [r1], #2
         sub         r2, r2, #1
@@ -111,10 +111,10 @@
         ldr         ip, [r1]
         subs        r2, r2, #(16 + 2)
         bmi         1f
-        
+
 0:
-        PLD         (r3, #64)
-        PLD         (r1, #64)
+        pld         [r3, #64]
+        pld         [r1, #64]
         ldr         r0, [r3], #4
         ldr         lr, [r1, #4]!
         eors        r0, r0, ip
@@ -139,14 +139,14 @@
         ldreq       r0, [r3], #4
         ldreq       ip, [r1, #4]!
         eoreqs      r0, r0, lr
-        bne         2f        
+        bne         2f
         subs        r2, r2, #16
         bhs         0b
 
         /* do we have at least 2 words left? */
 1:      adds        r2, r2, #(16 - 2 + 2)
         bmi         4f
-        
+
         /* finish off 2 words at a time */
 3:      ldr         r0, [r3], #4
         ldr         ip, [r1], #4
@@ -195,8 +195,8 @@
         sub         r2, r2, #8
 
 6:
-        PLD         (r3, #64)
-        PLD         (r1, #64)
+        pld         [r3, #64]
+        pld         [r1, #64]
         mov         ip, lr, lsr #16
         ldr         lr, [r1], #4
         ldr         r0, [r3], #4
diff --git a/libc/arch-arm/bionic/memcpy.S b/libc/arch-arm/bionic/memcpy.S
index 0dc86d5..f25b3e3 100644
--- a/libc/arch-arm/bionic/memcpy.S
+++ b/libc/arch-arm/bionic/memcpy.S
@@ -352,9 +352,9 @@
 
         // preload the destination because we'll align it to a cache line
         // with small writes. Also start the source "pump".
-        PLD         (r0, #0)
-        PLD         (r1, #0)
-        PLD         (r1, #32)
+        pld         [r0, #0]
+        pld         [r1, #0]
+        pld         [r1, #32]
 
 		/* it simplifies things to take care of len<4 early */
 		cmp			r2, #4
@@ -442,7 +442,7 @@
         add         r12, r12, #64
 
 1:      ldmia       r1!, { r4-r11 }
-        PLD         (r12, #64)
+        pld         [r12, #64]
         subs        r2, r2, #32
 
         // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
@@ -563,7 +563,7 @@
         ldr         r12, [r1], #4
 1:      mov         r4, r12
 		ldmia		r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
         subs        r2, r2, #32
         ldrhs       r12, [r1], #4
 		orr			r3, r3, r4,		lsl #16
@@ -590,7 +590,7 @@
         ldr         r12, [r1], #4
 1:      mov         r4, r12
 		ldmia		r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
 		subs		r2, r2, #32
         ldrhs       r12, [r1], #4
 		orr			r3, r3, r4,		lsl #24
@@ -617,7 +617,7 @@
         ldr         r12, [r1], #4
 1:      mov         r4, r12
 		ldmia		r1!, {   r5,r6,r7,  r8,r9,r10,r11}
-        PLD         (r1, #64)
+        pld         [r1, #64]
 		subs		r2, r2, #32
         ldrhs       r12, [r1], #4
 		orr			r3, r3, r4,		lsl #8
diff --git a/libc/arch-arm/bionic/strcmp.S b/libc/arch-arm/bionic/strcmp.S
index 764a531..42d41d1 100644
--- a/libc/arch-arm/bionic/strcmp.S
+++ b/libc/arch-arm/bionic/strcmp.S
@@ -52,8 +52,8 @@
 #define magic2(REG) REG, lsl #7
 
 ENTRY(strcmp)
-	PLD(r0, #0)
-	PLD(r1, #0)
+	pld	[r0, #0]
+	pld	[r1, #0]
 	eor	r2, r0, r1
 	tst	r2, #3
 
@@ -88,8 +88,8 @@
 	orr	r4, r4, r4, lsl #16
 	.p2align	2
 4:
-	PLD(r0, #8)
-	PLD(r1, #8)
+	pld	[r0, #8]
+	pld	[r1, #8]
 	sub	r2, ip, magic1(r4)
 	cmp	ip, r3
 	itttt	eq