Use unified syntax to compile with both llvm and gcc.

All arch-arm and arch-arm64 .S files were compiled
by gcc with and without this patch. The output object files
were identical. When compiled with llvm and this patch,
the output files were also identical to gcc's output.

BUG: 18061004
Change-Id: I458914d512ddf5496e4eb3d288bf032cd526d32b
diff --git a/libc/Android.mk b/libc/Android.mk
index fc56674..0aba776 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -941,10 +941,6 @@
 LOCAL_CFLAGS := $(libc_common_cflags) \
     -Wframe-larger-than=2048 \
 
-# memcpy.S, memchr.S, etc. do not compile with Clang.
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
 LOCAL_C_INCLUDES := $(libc_common_c_includes) bionic/libstdc++/include
@@ -972,10 +968,6 @@
 LOCAL_CFLAGS := $(libc_common_cflags) \
     -Wframe-larger-than=2048 \
 
-# memcpy.S, memchr.S, etc. do not compile with Clang.
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
 LOCAL_C_INCLUDES := $(libc_common_c_includes) bionic/libstdc++/include
@@ -1024,10 +1016,6 @@
 LOCAL_CFLAGS := $(libc_common_cflags) \
     -Wframe-larger-than=2048 \
 
-# memcpy.S, memchr.S, etc. do not compile with Clang.
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index 299f5a2..8ee6ac2 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -35,6 +35,7 @@
  */
 
     .fpu    neon
+    .syntax unified
 
 ENTRY(__memset_chk)
         cmp         r2, r3
@@ -100,9 +101,9 @@
 1:      bge         2f
         vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
+        strbmi      r1, [r0], #1
+        strbcs      r1, [r0], #1
+        strbcs      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
 END(memset)
@@ -131,11 +132,11 @@
         orr         r1, r1, r1, lsr #16
 
         movs        r12, r3, lsl #31
-        strcsb      r1, [r0], #1    /* can't use strh (alignment unknown) */
-        strcsb      r1, [r0], #1
-        strmib      r1, [r0], #1
+        strbcs      r1, [r0], #1    /* can't use strh (alignment unknown) */
+        strbcs      r1, [r0], #1
+        strbmi      r1, [r0], #1
         subs        r2, r2, r3
-        ldmlsfd     sp!, {r0, r4-r7, lr}   /* return */
+        popls       {r0, r4-r7, lr}   /* return */
         bxls        lr
 
         /* align the destination to a cache-line */
@@ -155,9 +156,9 @@
 
         /* conditionally writes 0 to 7 words (length in r3) */
         movs        r3, r3, lsl #28
-        stmcsia     r0!, {r1, lr}
-        stmcsia     r0!, {r1, lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmmi       r0!, {r1, lr}
         movs        r3, r3, lsl #2
         strcs       r1, [r0], #4
 
@@ -172,13 +173,13 @@
 
         /* conditionally stores 0 to 31 bytes */
         movs        r2, r2, lsl #28
-        stmcsia     r0!, {r1,r3,r12,lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1,r3,r12,lr}
+        stmmi       r0!, {r1, lr}
         movs        r2, r2, lsl #2
         strcs       r1, [r0], #4
-        strmih      r1, [r0], #2
+        strhmi      r1, [r0], #2
         movs        r2, r2, lsl #2
-        strcsb      r1, [r0]
+        strbcs      r1, [r0]
         ldmfd       sp!, {r0, r4-r7, lr}
         bx          lr
 END(__memset_large_copy)
diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S
index 70a2a58..c78dbd4 100644
--- a/libc/arch-arm/generic/bionic/memcmp.S
+++ b/libc/arch-arm/generic/bionic/memcmp.S
@@ -40,6 +40,8 @@
  * Optimized memcmp() for Cortex-A9.
  */
 
+.syntax unified
+
 ENTRY(memcmp)
         pld         [r0, #(CACHE_LINE_SIZE * 0)]
         pld         [r0, #(CACHE_LINE_SIZE * 1)]
@@ -161,25 +163,25 @@
         eors        r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         ldreq       r0, [r4], #4
         ldreq       lr, [r1, #4]!
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         ldreq       r0, [r4], #4
         ldreq       lr, [r1, #4]!
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         ldreq       r0, [r4], #4
         ldreq       lr, [r1, #4]!
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         bne         2f
         subs        r2, r2, #32
         bhs         0b
@@ -263,17 +265,17 @@
         ldreq       lr, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, lr, lsl #16
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         moveq       ip, lr, lsr #16
         ldreq       lr, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, lr, lsl #16
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         moveq       ip, lr, lsr #16
         ldreq       lr, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, lr, lsl #16
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         bne         7f
         subs        r2, r2, #16
         bhs         6b
@@ -317,7 +319,7 @@
         ldreq       r7, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, r7, lsl r6
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         bne         7f
         subs        r2, r2, #8
         bhs         6b
diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S
index b0c79ab..ea5a399 100644
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -37,6 +37,8 @@
          * so we have to preserve R0.
          */
 
+         .syntax unified
+
 ENTRY(__memcpy_chk)
         cmp         r2, r3
         bhi         __memcpy_chk_fail
@@ -81,12 +83,12 @@
          */
         movs        r12, r3, lsl #31
         sub         r2, r2, r3      /* we know that r3 <= r2 because r2 >= 4 */
-        ldrmib      r3, [r1], #1
-        ldrcsb      r4, [r1], #1
-        ldrcsb      r12,[r1], #1
-        strmib      r3, [r0], #1
-        strcsb      r4, [r0], #1
-        strcsb      r12,[r0], #1
+        ldrbmi      r3, [r1], #1
+        ldrbcs      r4, [r1], #1
+        ldrbcs      r12,[r1], #1
+        strbmi      r3, [r0], #1
+        strbcs      r4, [r0], #1
+        strbcs      r12,[r0], #1
 
 .Lsrc_aligned:
 
@@ -109,10 +111,10 @@
 
         /* conditionally copies 0 to 7 words (length in r3) */
         movs        r12, r3, lsl #28
-        ldmcsia     r1!, {r4, r5, r6, r7}   /* 16 bytes */
-        ldmmiia     r1!, {r8, r9}           /*  8 bytes */
-        stmcsia     r0!, {r4, r5, r6, r7}
-        stmmiia     r0!, {r8, r9}
+        ldmcs       r1!, {r4, r5, r6, r7}   /* 16 bytes */
+        ldmmi       r1!, {r8, r9}           /*  8 bytes */
+        stmcs       r0!, {r4, r5, r6, r7}
+        stmmi       r0!, {r8, r9}
         tst         r3, #0x4
         ldrne       r10,[r1], #4            /*  4 bytes */
         strne       r10,[r0], #4
@@ -177,18 +179,18 @@
 
         /* conditionnaly copies 0 to 31 bytes */
         movs        r12, r2, lsl #28
-        ldmcsia     r1!, {r4, r5, r6, r7}   /* 16 bytes */
-        ldmmiia     r1!, {r8, r9}           /*  8 bytes */
-        stmcsia     r0!, {r4, r5, r6, r7}
-        stmmiia     r0!, {r8, r9}
+        ldmcs       r1!, {r4, r5, r6, r7}   /* 16 bytes */
+        ldmmi       r1!, {r8, r9}           /*  8 bytes */
+        stmcs       r0!, {r4, r5, r6, r7}
+        stmmi       r0!, {r8, r9}
         movs        r12, r2, lsl #30
         ldrcs       r3, [r1], #4            /*  4 bytes */
-        ldrmih      r4, [r1], #2            /*  2 bytes */
+        ldrhmi      r4, [r1], #2            /*  2 bytes */
         strcs       r3, [r0], #4
-        strmih      r4, [r0], #2
+        strhmi      r4, [r0], #2
         tst         r2, #0x1
-        ldrneb      r3, [r1]                /*  last byte  */
-        strneb      r3, [r0]
+        ldrbne      r3, [r1]                /*  last byte  */
+        strbne      r3, [r0]
 
         /* we're done! restore everything and return */
 1:      ldmfd       sp!, {r5-r11}
@@ -228,11 +230,11 @@
          * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
          */
         movs        r5, r5, lsl #31
-        strmib      r3, [r0], #1
+        strbmi      r3, [r0], #1
         movmi       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
         movcs       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
         movcs       r3, r3, lsr #8
 
         cmp         r2, #4
@@ -363,23 +365,23 @@
 .Lpartial_word_tail:
         /* we have a partial word in the input buffer */
         movs        r5, lr, lsl #(31-3)
-        strmib      r3, [r0], #1
+        strbmi      r3, [r0], #1
         movmi       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
         movcs       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
 
         /* Refill spilled registers from the stack. Don't update sp. */
         ldmfd       sp, {r5-r11}
 
 .Lcopy_last_3_and_return:
         movs        r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
-        ldrmib      r2, [r1], #1
-        ldrcsb      r3, [r1], #1
-        ldrcsb      r12,[r1]
-        strmib      r2, [r0], #1
-        strcsb      r3, [r0], #1
-        strcsb      r12,[r0]
+        ldrbmi      r2, [r1], #1
+        ldrbcs      r3, [r1], #1
+        ldrbcs      r12,[r1]
+        strbmi      r2, [r0], #1
+        strbcs      r3, [r0], #1
+        strbcs      r12,[r0]
 
         /* we're done! restore sp and spilled registers and return */
         add         sp,  sp, #28
diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S
index be35de9..d17a9c4 100644
--- a/libc/arch-arm/generic/bionic/memset.S
+++ b/libc/arch-arm/generic/bionic/memset.S
@@ -35,6 +35,8 @@
          * memset() returns its first argument.
          */
 
+         .syntax unified
+
 ENTRY(__memset_chk)
         cmp         r2, r3
         bls         done
@@ -76,11 +78,11 @@
         orr         r1, r1, r1, lsr #16
 
         movs        r12, r3, lsl #31
-        strcsb      r1, [r0], #1    /* can't use strh (alignment unknown) */
-        strcsb      r1, [r0], #1
-        strmib      r1, [r0], #1
+        strbcs      r1, [r0], #1    /* can't use strh (alignment unknown) */
+        strbcs      r1, [r0], #1
+        strbmi      r1, [r0], #1
         subs        r2, r2, r3
-        ldmlsfd     sp!, {r0, r4-r7, lr}    /* return */
+        popls       {r0, r4-r7, lr}    /* return */
         bxls        lr
 
         /* align the destination to a cache-line */
@@ -100,9 +102,9 @@
 
         /* conditionally writes 0 to 7 words (length in r3) */
         movs        r3, r3, lsl #28
-        stmcsia     r0!, {r1, lr}
-        stmcsia     r0!, {r1, lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmmi       r0!, {r1, lr}
         movs        r3, r3, lsl #2
         strcs       r1, [r0], #4
 
@@ -117,13 +119,13 @@
 
         /* conditionally stores 0 to 31 bytes */
         movs        r2, r2, lsl #28
-        stmcsia     r0!, {r1,r3,r12,lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1,r3,r12,lr}
+        stmmi       r0!, {r1, lr}
         movs        r2, r2, lsl #2
         strcs       r1, [r0], #4
-        strmih      r1, [r0], #2
+        strhmi      r1, [r0], #2
         movs        r2, r2, lsl #2
-        strcsb      r1, [r0]
+        strbcs      r1, [r0]
         ldmfd       sp!, {r0, r4-r7, lr}
         bx          lr
 END(memset)
diff --git a/libc/arch-arm/generic/bionic/strcpy.S b/libc/arch-arm/generic/bionic/strcpy.S
index 802a62d..89ea098 100644
--- a/libc/arch-arm/generic/bionic/strcpy.S
+++ b/libc/arch-arm/generic/bionic/strcpy.S
@@ -32,6 +32,8 @@
 #include <machine/cpu-features.h>
 #include <private/bionic_asm.h>
 
+.syntax unified
+
 ENTRY(strcpy)
 	pld	[r1, #0]
 	eor	r2, r0, r1
@@ -108,15 +110,15 @@
 #ifdef __ARMEB__
 	tst	r2, #0xff00
 	iteet	ne
-	strneh	r2, [ip], #2
+	strhne	r2, [ip], #2
 	lsreq	r2, r2, #8
-	streqb	r2, [ip]
+	strbeq	r2, [ip]
 	tstne	r2, #0xff
 #else
 	tst	r2, #0xff
 	itet	ne
-	strneh	r2, [ip], #2
-	streqb	r2, [ip]
+	strhne	r2, [ip], #2
+	strbeq	r2, [ip]
 	tstne	r2, #0xff00
 #endif
 	bne	5b
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index e9f6431..a4fbe17 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -37,6 +37,7 @@
  */
 
     .fpu    neon
+    .syntax unified
 
 ENTRY(__memset_chk)
         cmp         r2, r3
@@ -98,9 +99,9 @@
 1:      bge         2f
         vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
+        strbmi      r1, [r0], #1
+        strbcs      r1, [r0], #1
+        strbcs      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
 END(memset)
diff --git a/libc/arch-arm64/generic/bionic/memchr.S b/libc/arch-arm64/generic/bionic/memchr.S
index e5ea57d..a00dd8d 100644
--- a/libc/arch-arm64/generic/bionic/memchr.S
+++ b/libc/arch-arm64/generic/bionic/memchr.S
@@ -101,7 +101,7 @@
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
-	mov	synd, vend.2d[0]
+	mov	synd, vend.d[0]
 	/* Clear the soff*2 lower bits */
 	lsl	tmp, soff, #1
 	lsr	synd, synd, tmp
@@ -121,7 +121,7 @@
 	/* Use a fast check for the termination condition */
 	orr	vend.16b, vhas_chr1.16b, vhas_chr2.16b
 	addp	vend.2d, vend.2d, vend.2d
-	mov	synd, vend.2d[0]
+	mov	synd, vend.d[0]
 	/* We're not out of data, loop if we haven't found the character */
 	cbz	synd, .Lloop
 
@@ -131,7 +131,7 @@
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
-	mov	synd, vend.2d[0]
+	mov	synd, vend.d[0]
 	/* Only do the clear for the last possible block */
 	b.hi	.Ltail
 
diff --git a/libc/arch-arm64/generic/bionic/strchr.S b/libc/arch-arm64/generic/bionic/strchr.S
index 469b83c..b54106d 100644
--- a/libc/arch-arm64/generic/bionic/strchr.S
+++ b/libc/arch-arm64/generic/bionic/strchr.S
@@ -109,7 +109,7 @@
 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
 	lsr	tmp1, tmp3, tmp1
 
-	mov	tmp3, vend1.2d[0]
+	mov	tmp3, vend1.d[0]
 	bic	tmp1, tmp3, tmp1	// Mask padding bits.
 	cbnz	tmp1, .Ltail
 
@@ -124,7 +124,7 @@
 	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
 	orr	vend1.16b, vend1.16b, vend2.16b
 	addp	vend1.2d, vend1.2d, vend1.2d
-	mov	tmp1, vend1.2d[0]
+	mov	tmp1, vend1.d[0]
 	cbz	tmp1, .Lloop
 
 	/* Termination condition found.  Now need to establish exactly why
@@ -138,7 +138,7 @@
 	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
 
-	mov	tmp1, vend1.2d[0]
+	mov	tmp1, vend1.d[0]
 .Ltail:
 	/* Count the trailing zeros, by bit reversing...  */
 	rbit	tmp1, tmp1