Blame - libc/arch-arm/bionic/strcmp.a15.S - platform_bionic

blob: b726a6e253ba9053c374ab18c780da572e930ab3 [file] [log] [blame]

Greta Yorsh	eb149e9	2012-11-30 09:31:38 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2013 ARM Ltd
				3	* All rights reserved.
				4	*
				5	* Redistribution and use in source and binary forms, with or without
				6	* modification, are permitted provided that the following conditions
				7	* are met:
				8	* 1. Redistributions of source code must retain the above copyright
				9	* notice, this list of conditions and the following disclaimer.
				10	* 2. Redistributions in binary form must reproduce the above copyright
				11	* notice, this list of conditions and the following disclaimer in the
				12	* documentation and/or other materials provided with the distribution.
				13	* 3. The name of the company may not be used to endorse or promote
				14	* products derived from this software without specific prior written
				15	* permission.
				16	*
				17	* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
				18	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
				19	* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
				20	* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				21	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
				22	* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				23	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				24	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				25	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				26	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27	*/
				28
				29	#include "arm_asm.h"
				30
				31	#ifdef __ARMEB__
				32	#define S2LOMEM lsl
				33	#define S2LOMEMEQ lsleq
				34	#define S2HIMEM lsr
				35	#define MSB 0x000000ff
				36	#define LSB 0xff000000
				37	#define BYTE0_OFFSET 24
				38	#define BYTE1_OFFSET 16
				39	#define BYTE2_OFFSET 8
				40	#define BYTE3_OFFSET 0
				41	#else /* not __ARMEB__ */
				42	#define S2LOMEM lsr
				43	#define S2LOMEMEQ lsreq
				44	#define S2HIMEM lsl
				45	#define BYTE0_OFFSET 0
				46	#define BYTE1_OFFSET 8
				47	#define BYTE2_OFFSET 16
				48	#define BYTE3_OFFSET 24
				49	#define MSB 0xff000000
				50	#define LSB 0x000000ff
				51	#endif /* not __ARMEB__ */
				52
				53	.syntax unified
				54
				55	#if defined (__thumb__)
				56	.thumb
				57	.thumb_func
				58	#endif
				59	.global strcmp
				60	.type strcmp, %function
				61	strcmp:
				62
				63	#if (defined (__thumb__) && !defined (__thumb2__))
				64	1:
				65	ldrb r2, [r0]
				66	ldrb r3, [r1]
				67	adds r0, r0, #1
				68	adds r1, r1, #1
				69	cmp r2, #0
				70	beq 2f
				71	cmp r2, r3
				72	beq 1b
				73	2:
				74	subs r0, r2, r3
				75	bx lr
				76	#elif (defined (__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED))
				77	1:
				78	ldrb r2, [r0], #1
				79	ldrb r3, [r1], #1
				80	cmp r2, #1
				81	it cs
				82	cmpcs r2, r3
				83	beq 1b
				84	subs r0, r2, r3
				85	RETURN
				86
				87
				88	#elif (defined (_ISA_THUMB_2) \|\| defined (_ISA_ARM_6))
				89	/* Use LDRD whenever possible. */
				90
				91	/* The main thing to look out for when comparing large blocks is that
				92	the loads do not cross a page boundary when loading past the index
				93	of the byte with the first difference or the first string-terminator.
				94
				95	For example, if the strings are identical and the string-terminator
				96	is at index k, byte by byte comparison will not load beyond address
				97	s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
				98	k; double word - up to 7 bytes. If the load of these bytes crosses
				99	a page boundary, it might cause a memory fault (if the page is not mapped)
				100	that would not have happened in byte by byte comparison.
				101
				102	If an address is (double) word aligned, then a load of a (double) word
				103	from that address will not cross a page boundary.
				104	Therefore, the algorithm below considers word and double-word alignment
				105	of strings separately. */
				106
				107	/* High-level description of the algorithm.
				108
				109	* The fast path: if both strings are double-word aligned,
				110	use LDRD to load two words from each string in every loop iteration.
				111	* If the strings have the same offset from a word boundary,
				112	use LDRB to load and compare byte by byte until
				113	the first string is aligned to a word boundary (at most 3 bytes).
				114	This is optimized for quick return on short unaligned strings.
				115	* If the strings have the same offset from a double-word boundary,
				116	use LDRD to load two words from each string in every loop iteration, as in the fast path.
				117	* If the strings do not have the same offset from a double-word boundary,
				118	load a word from the second string before the loop to initialize the queue.
				119	Use LDRD to load two words from every string in every loop iteration.
				120	Inside the loop, load the second word from the second string only after comparing
				121	the first word, using the queued value, to guarantee safety across page boundaries.
				122	* If the strings do not have the same offset from a word boundary,
				123	use LDR and a shift queue. Order of loads and comparisons matters,
				124	similarly to the previous case.
				125
				126	* Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
				127	* The only difference between ARM and Thumb modes is the use of CBZ instruction.
				128	* The only difference between big and little endian is the use of REV in little endian
				129	to compute the return value, instead of MOV.
				130	* No preload. [TODO.]
				131	*/
				132
				133	.macro m_cbz reg label
				134	#ifdef __thumb2__
				135	cbz \reg, \label
				136	#else /* not defined __thumb2__ */
				137	cmp \reg, #0
				138	beq \label
				139	#endif /* not defined __thumb2__ */
				140	.endm /* m_cbz */
				141
				142	.macro m_cbnz reg label
				143	#ifdef __thumb2__
				144	cbnz \reg, \label
				145	#else /* not defined __thumb2__ */
				146	cmp \reg, #0
				147	bne \label
				148	#endif /* not defined __thumb2__ */
				149	.endm /* m_cbnz */
				150
				151	.macro init
				152	/* Macro to save temporary registers and prepare magic values. */
				153	subs sp, sp, #16
				154	strd r4, r5, [sp, #8]
				155	strd r6, r7, [sp]
				156	mvn r6, #0 /* all F */
				157	mov r7, #0 /* all 0 */
				158	.endm /* init */
				159
				160	.macro magic_compare_and_branch w1 w2 label
				161	/* Macro to compare registers w1 and w2 and conditionally branch to label. */
				162	cmp \w1, \w2 /* Are w1 and w2 the same? */
				163	magic_find_zero_bytes \w1
				164	it eq
				165	cmpeq ip, #0 /* Is there a zero byte in w1? */
				166	bne \label
				167	.endm /* magic_compare_and_branch */
				168
				169	.macro magic_find_zero_bytes w1
				170	/* Macro to find all-zero bytes in w1, result is in ip. */
				171	#if (defined (__ARM_FEATURE_DSP))
				172	uadd8 ip, \w1, r6
				173	sel ip, r7, r6
				174	#else /* not defined (__ARM_FEATURE_DSP) */
				175	/* __ARM_FEATURE_DSP is not defined for some Cortex-M processors.
				176	Coincidently, these processors only have Thumb-2 mode, where we can use the
				177	the (large) magic constant available directly as an immediate in instructions.
				178	Note that we cannot use the magic constant in ARM mode, where we need
				179	to create the constant in a register. */
				180	sub ip, \w1, #0x01010101
				181	bic ip, ip, \w1
				182	and ip, ip, #0x80808080
				183	#endif /* not defined (__ARM_FEATURE_DSP) */
				184	.endm /* magic_find_zero_bytes */
				185
				186	.macro setup_return w1 w2
				187	#ifdef __ARMEB__
				188	mov r1, \w1
				189	mov r2, \w2
				190	#else /* not __ARMEB__ */
				191	rev r1, \w1
				192	rev r2, \w2
				193	#endif /* not __ARMEB__ */
				194	.endm /* setup_return */
				195
				196	/*
				197	optpld r0, #0
				198	optpld r1, #0
				199	*/
				200
				201	/* Are both strings double-word aligned? */
				202	orr ip, r0, r1
				203	tst ip, #7
				204	bne do_align
				205
				206	/* Fast path. */
				207	init
				208
				209	doubleword_aligned:
				210
				211	/* Get here when the strings to compare are double-word aligned. */
				212	/* Compare two words in every iteration. */
				213	.p2align 2
				214	2:
				215	/*
				216	optpld r0, #16
				217	optpld r1, #16
				218	*/
				219
				220	/* Load the next double-word from each string. */
				221	ldrd r2, r3, [r0], #8
				222	ldrd r4, r5, [r1], #8
				223
				224	magic_compare_and_branch w1=r2, w2=r4, label=return_24
				225	magic_compare_and_branch w1=r3, w2=r5, label=return_35
				226	b 2b
				227
				228	do_align:
				229	/* Is the first string word-aligned? */
				230	ands ip, r0, #3
				231	beq word_aligned_r0
				232
				233	/* Fast compare byte by byte until the first string is word-aligned. */
				234	/* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
				235	to read until the next word boudnary is 4-ip. */
				236	bic r0, r0, #3
				237	ldr r2, [r0], #4
				238	lsls ip, ip, #31
				239	beq byte2
				240	bcs byte3
				241
				242	byte1:
				243	ldrb ip, [r1], #1
				244	uxtb r3, r2, ror #BYTE1_OFFSET
				245	subs ip, r3, ip
				246	bne fast_return
				247	m_cbz reg=r3, label=fast_return
				248
				249	byte2:
				250	ldrb ip, [r1], #1
				251	uxtb r3, r2, ror #BYTE2_OFFSET
				252	subs ip, r3, ip
				253	bne fast_return
				254	m_cbz reg=r3, label=fast_return
				255
				256	byte3:
				257	ldrb ip, [r1], #1
				258	uxtb r3, r2, ror #BYTE3_OFFSET
				259	subs ip, r3, ip
				260	bne fast_return
				261	m_cbnz reg=r3, label=word_aligned_r0
				262
				263	fast_return:
				264	mov r0, ip
				265	bx lr
				266
				267	word_aligned_r0:
				268	init
				269	/* The first string is word-aligned. */
				270	/* Is the second string word-aligned? */
				271	ands ip, r1, #3
				272	bne strcmp_unaligned
				273
				274	word_aligned:
				275	/* The strings are word-aligned. */
				276	/* Is the first string double-word aligned? */
				277	tst r0, #4
				278	beq doubleword_aligned_r0
				279
				280	/* If r0 is not double-word aligned yet, align it by loading
				281	and comparing the next word from each string. */
				282	ldr r2, [r0], #4
				283	ldr r4, [r1], #4
				284	magic_compare_and_branch w1=r2 w2=r4 label=return_24
				285
				286	doubleword_aligned_r0:
				287	/* Get here when r0 is double-word aligned. */
				288	/* Is r1 doubleword_aligned? */
				289	tst r1, #4
				290	beq doubleword_aligned
				291
				292	/* Get here when the strings to compare are word-aligned,
				293	r0 is double-word aligned, but r1 is not double-word aligned. */
				294
				295	/* Initialize the queue. */
				296	ldr r5, [r1], #4
				297
				298	/* Compare two words in every iteration. */
				299	.p2align 2
				300	3:
				301	/*
				302	optpld r0, #16
				303	optpld r1, #16
				304	*/
				305
				306	/* Load the next double-word from each string and compare. */
				307	ldrd r2, r3, [r0], #8
				308	magic_compare_and_branch w1=r2 w2=r5 label=return_25
				309	ldrd r4, r5, [r1], #8
				310	magic_compare_and_branch w1=r3 w2=r4 label=return_34
				311	b 3b
				312
				313	.macro miscmp_word offsetlo offsethi
				314	/* Macro to compare misaligned strings. */
				315	/* r0, r1 are word-aligned, and at least one of the strings
				316	is not double-word aligned. */
				317	/* Compare one word in every loop iteration. */
				318	/* OFFSETLO is the original bit-offset of r1 from a word-boundary,
				319	OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */
				320
				321	/* Initialize the shift queue. */
				322	ldr r5, [r1], #4
				323
				324	/* Compare one word from each string in every loop iteration. */
				325	.p2align 2
				326	7:
				327	ldr r3, [r0], #4
				328	S2LOMEM r5, r5, #\offsetlo
				329	magic_find_zero_bytes w1=r3
				330	cmp r7, ip, S2HIMEM #\offsetlo
				331	and r2, r3, r6, S2LOMEM #\offsetlo
				332	it eq
				333	cmpeq r2, r5
				334	bne return_25
				335	ldr r5, [r1], #4
				336	cmp ip, #0
				337	eor r3, r2, r3
				338	S2HIMEM r2, r5, #\offsethi
				339	it eq
				340	cmpeq r3, r2
				341	bne return_32
				342	b 7b
				343	.endm /* miscmp_word */
				344
				345	strcmp_unaligned:
				346	/* r0 is word-aligned, r1 is at offset ip from a word. */
				347	/* Align r1 to the (previous) word-boundary. */
				348	bic r1, r1, #3
				349
				350	/* Unaligned comparison word by word using LDRs. */
				351	cmp ip, #2
				352	beq miscmp_word_16 /* If ip == 2. */
				353	bge miscmp_word_24 /* If ip == 3. */
				354	miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */
				355	miscmp_word_16: miscmp_word offsetlo=16 offsethi=16
				356	miscmp_word_24: miscmp_word offsetlo=24 offsethi=8
				357
				358
				359	return_32:
				360	setup_return w1=r3, w2=r2
				361	b do_return
				362	return_34:
				363	setup_return w1=r3, w2=r4
				364	b do_return
				365	return_25:
				366	setup_return w1=r2, w2=r5
				367	b do_return
				368	return_35:
				369	setup_return w1=r3, w2=r5
				370	b do_return
				371	return_24:
				372	setup_return w1=r2, w2=r4
				373
				374	do_return:
				375
				376	#ifdef __ARMEB__
				377	mov r0, ip
				378	#else /* not __ARMEB__ */
				379	rev r0, ip
				380	#endif /* not __ARMEB__ */
				381
				382	/* Restore temporaries early, before computing the return value. */
				383	ldrd r6, r7, [sp]
				384	ldrd r4, r5, [sp, #8]
				385	adds sp, sp, #16
				386
				387	/* There is a zero or a different byte between r1 and r2. */
				388	/* r0 contains a mask of all-zero bytes in r1. */
				389	/* Using r0 and not ip here because cbz requires low register. */
				390	m_cbz reg=r0, label=compute_return_value
				391	clz r0, r0
				392	/* r0 contains the number of bits on the left of the first all-zero byte in r1. */
				393	rsb r0, r0, #24
				394	/* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */
				395	lsr r1, r1, r0
				396	lsr r2, r2, r0
				397
				398	compute_return_value:
				399	movs r0, #1
				400	cmp r1, r2
				401	/* The return value is computed as follows.
				402	If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
				403	If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
				404	which means r0:=r0-r0-1 and r0 is #-1 at return.
				405	If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
				406	which means r0:=r0-r0 and r0 is #0 at return.
				407	(C==0 and Z==1) cannot happen because the carry bit is "not borrow". */
				408	it ls
				409	sbcls r0, r0, r0
				410	bx lr
				411
				412
				413	#else /* !(defined (_ISA_THUMB_2) \|\| defined (_ISA_ARM_6)
				414	defined (__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED) \|\|
				415	(defined (__thumb__) && !defined (__thumb2__))) */
				416
				417	/* Use LDR whenever possible. */
				418
				419	#ifdef __thumb2__
				420	#define magic1(REG) 0x01010101
				421	#define magic2(REG) 0x80808080
				422	#else
				423	#define magic1(REG) REG
				424	#define magic2(REG) REG, lsl #7
				425	#endif
				426
				427	optpld r0
				428	optpld r1
				429	eor r2, r0, r1
				430	tst r2, #3
				431	/* Strings not at same byte offset from a word boundary. */
				432	bne strcmp_unaligned
				433	ands r2, r0, #3
				434	bic r0, r0, #3
				435	bic r1, r1, #3
				436	ldr ip, [r0], #4
				437	it eq
				438	ldreq r3, [r1], #4
				439	beq 1f
				440	/* Although s1 and s2 have identical initial alignment, they are
				441	not currently word aligned. Rather than comparing bytes,
				442	make sure that any bytes fetched from before the addressed
				443	bytes are forced to 0xff. Then they will always compare
				444	equal. */
				445	eor r2, r2, #3
				446	lsl r2, r2, #3
				447	mvn r3, MSB
				448	S2LOMEM r2, r3, r2
				449	ldr r3, [r1], #4
				450	orr ip, ip, r2
				451	orr r3, r3, r2
				452	1:
				453	#ifndef __thumb2__
				454	/* Load the 'magic' constant 0x01010101. */
				455	str r4, [sp, #-4]!
				456	mov r4, #1
				457	orr r4, r4, r4, lsl #8
				458	orr r4, r4, r4, lsl #16
				459	#endif
				460	.p2align 2
				461	4:
				462	optpld r0, #8
				463	optpld r1, #8
				464	sub r2, ip, magic1(r4)
				465	cmp ip, r3
				466	itttt eq
				467	/* check for any zero bytes in first word */
				468	biceq r2, r2, ip
				469	tsteq r2, magic2(r4)
				470	ldreq ip, [r0], #4
				471	ldreq r3, [r1], #4
				472	beq 4b
				473	2:
				474	/* There's a zero or a different byte in the word */
				475	S2HIMEM r0, ip, #24
				476	S2LOMEM ip, ip, #8
				477	cmp r0, #1
				478	it cs
				479	cmpcs r0, r3, S2HIMEM #24
				480	it eq
				481	S2LOMEMEQ r3, r3, #8
				482	beq 2b
				483	/* On a big-endian machine, r0 contains the desired byte in bits
				484	0-7; on a little-endian machine they are in bits 24-31. In
				485	both cases the other bits in r0 are all zero. For r3 the
				486	interesting byte is at the other end of the word, but the
				487	other bits are not necessarily zero. We need a signed result
				488	representing the differnece in the unsigned bytes, so for the
				489	little-endian case we can't just shift the interesting bits
				490	up. */
				491	#ifdef __ARMEB__
				492	sub r0, r0, r3, lsr #24
				493	#else
				494	and r3, r3, #255
				495	#ifdef __thumb2__
				496	/* No RSB instruction in Thumb2 */
				497	lsr r0, r0, #24
				498	sub r0, r0, r3
				499	#else
				500	rsb r0, r3, r0, lsr #24
				501	#endif
				502	#endif
				503	#ifndef __thumb2__
				504	ldr r4, [sp], #4
				505	#endif
				506	RETURN
				507
				508
				509	strcmp_unaligned:
				510
				511	#if 0
				512	/* The assembly code below is based on the following alogrithm. */
				513	#ifdef __ARMEB__
				514	#define RSHIFT <<
				515	#define LSHIFT >>
				516	#else
				517	#define RSHIFT >>
				518	#define LSHIFT <<
				519	#endif
				520
				521	#define body(shift) \
				522	mask = 0xffffffffU RSHIFT shift; \
				523	w1 = *wp1++; \
				524	w2 = *wp2++; \
				525	do \
				526	{ \
				527	t1 = w1 & mask; \
				528	if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
				529	{ \
				530	w2 RSHIFT= shift; \
				531	break; \
				532	} \
				533	if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
				534	{ \
				535	/* See comment in assembler below re syndrome on big-endian */\
				536	if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
				537	w2 RSHIFT= shift; \
				538	else \
				539	{ \
				540	w2 = *wp2; \
				541	t1 = w1 RSHIFT (32 - shift); \
				542	w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
				543	} \
				544	break; \
				545	} \
				546	w2 = *wp2++; \
				547	t1 ^= w1; \
				548	if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
				549	{ \
				550	t1 = w1 >> (32 - shift); \
				551	w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
				552	break; \
				553	} \
				554	w1 = *wp1++; \
				555	} while (1)
				556
				557	const unsigned* wp1;
				558	const unsigned* wp2;
				559	unsigned w1, w2;
				560	unsigned mask;
				561	unsigned shift;
				562	unsigned b1 = 0x01010101;
				563	char c1, c2;
				564	unsigned t1;
				565
				566	while (((unsigned) s1) & 3)
				567	{
				568	c1 = *s1++;
				569	c2 = *s2++;
				570	if (c1 == 0 \|\| c1 != c2)
				571	return c1 - (int)c2;
				572	}
				573	wp1 = (unsigned*) (((unsigned)s1) & ~3);
				574	wp2 = (unsigned*) (((unsigned)s2) & ~3);
				575	t1 = ((unsigned) s2) & 3;
				576	if (t1 == 1)
				577	{
				578	body(8);
				579	}
				580	else if (t1 == 2)
				581	{
				582	body(16);
				583	}
				584	else
				585	{
				586	body (24);
				587	}
				588
				589	do
				590	{
				591	#ifdef __ARMEB__
				592	c1 = (char) t1 >> 24;
				593	c2 = (char) w2 >> 24;
				594	#else /* not __ARMEB__ */
				595	c1 = (char) t1;
				596	c2 = (char) w2;
				597	#endif /* not __ARMEB__ */
				598	t1 RSHIFT= 8;
				599	w2 RSHIFT= 8;
				600	} while (c1 != 0 && c1 == c2);
				601	return c1 - c2;
				602	#endif /* 0 */
				603
				604
				605	wp1 .req r0
				606	wp2 .req r1
				607	b1 .req r2
				608	w1 .req r4
				609	w2 .req r5
				610	t1 .req ip
				611	@ r3 is scratch
				612
				613	/* First of all, compare bytes until wp1(sp1) is word-aligned. */
				614	1:
				615	tst wp1, #3
				616	beq 2f
				617	ldrb r2, [wp1], #1
				618	ldrb r3, [wp2], #1
				619	cmp r2, #1
				620	it cs
				621	cmpcs r2, r3
				622	beq 1b
				623	sub r0, r2, r3
				624	RETURN
				625
				626	2:
				627	str r5, [sp, #-4]!
				628	str r4, [sp, #-4]!
				629	//stmfd sp!, {r4, r5}
				630	mov b1, #1
				631	orr b1, b1, b1, lsl #8
				632	orr b1, b1, b1, lsl #16
				633
				634	and t1, wp2, #3
				635	bic wp2, wp2, #3
				636	ldr w1, [wp1], #4
				637	ldr w2, [wp2], #4
				638	cmp t1, #2
				639	beq 2f
				640	bhi 3f
				641
				642	/* Critical inner Loop: Block with 3 bytes initial overlap */
				643	.p2align 2
				644	1:
				645	bic t1, w1, MSB
				646	cmp t1, w2, S2LOMEM #8
				647	sub r3, w1, b1
				648	bic r3, r3, w1
				649	bne 4f
				650	ands r3, r3, b1, lsl #7
				651	it eq
				652	ldreq w2, [wp2], #4
				653	bne 5f
				654	eor t1, t1, w1
				655	cmp t1, w2, S2HIMEM #24
				656	bne 6f
				657	ldr w1, [wp1], #4
				658	b 1b
				659	4:
				660	S2LOMEM w2, w2, #8
				661	b 8f
				662
				663	5:
				664	#ifdef __ARMEB__
				665	/* The syndrome value may contain false ones if the string ends
				666	with the bytes 0x01 0x00 */
				667	tst w1, #0xff000000
				668	itt ne
				669	tstne w1, #0x00ff0000
				670	tstne w1, #0x0000ff00
				671	beq 7f
				672	#else
				673	bics r3, r3, #0xff000000
				674	bne 7f
				675	#endif
				676	ldrb w2, [wp2]
				677	S2LOMEM t1, w1, #24
				678	#ifdef __ARMEB__
				679	lsl w2, w2, #24
				680	#endif
				681	b 8f
				682
				683	6:
				684	S2LOMEM t1, w1, #24
				685	and w2, w2, LSB
				686	b 8f
				687
				688	/* Critical inner Loop: Block with 2 bytes initial overlap */
				689	.p2align 2
				690	2:
				691	S2HIMEM t1, w1, #16
				692	sub r3, w1, b1
				693	S2LOMEM t1, t1, #16
				694	bic r3, r3, w1
				695	cmp t1, w2, S2LOMEM #16
				696	bne 4f
				697	ands r3, r3, b1, lsl #7
				698	it eq
				699	ldreq w2, [wp2], #4
				700	bne 5f
				701	eor t1, t1, w1
				702	cmp t1, w2, S2HIMEM #16
				703	bne 6f
				704	ldr w1, [wp1], #4
				705	b 2b
				706
				707	5:
				708	#ifdef __ARMEB__
				709	/* The syndrome value may contain false ones if the string ends
				710	with the bytes 0x01 0x00 */
				711	tst w1, #0xff000000
				712	it ne
				713	tstne w1, #0x00ff0000
				714	beq 7f
				715	#else
				716	lsls r3, r3, #16
				717	bne 7f
				718	#endif
				719	ldrh w2, [wp2]
				720	S2LOMEM t1, w1, #16
				721	#ifdef __ARMEB__
				722	lsl w2, w2, #16
				723	#endif
				724	b 8f
				725
				726	6:
				727	S2HIMEM w2, w2, #16
				728	S2LOMEM t1, w1, #16
				729	4:
				730	S2LOMEM w2, w2, #16
				731	b 8f
				732
				733	/* Critical inner Loop: Block with 1 byte initial overlap */
				734	.p2align 2
				735	3:
				736	and t1, w1, LSB
				737	cmp t1, w2, S2LOMEM #24
				738	sub r3, w1, b1
				739	bic r3, r3, w1
				740	bne 4f
				741	ands r3, r3, b1, lsl #7
				742	it eq
				743	ldreq w2, [wp2], #4
				744	bne 5f
				745	eor t1, t1, w1
				746	cmp t1, w2, S2HIMEM #8
				747	bne 6f
				748	ldr w1, [wp1], #4
				749	b 3b
				750	4:
				751	S2LOMEM w2, w2, #24
				752	b 8f
				753	5:
				754	/* The syndrome value may contain false ones if the string ends
				755	with the bytes 0x01 0x00 */
				756	tst w1, LSB
				757	beq 7f
				758	ldr w2, [wp2], #4
				759	6:
				760	S2LOMEM t1, w1, #8
				761	bic w2, w2, MSB
				762	b 8f
				763	7:
				764	mov r0, #0
				765	//ldmfd sp!, {r4, r5}
				766	ldr r4, [sp], #4
				767	ldr r5, [sp], #4
				768	RETURN
				769	8:
				770	and r2, t1, LSB
				771	and r0, w2, LSB
				772	cmp r0, #1
				773	it cs
				774	cmpcs r0, r2
				775	itt eq
				776	S2LOMEMEQ t1, t1, #8
				777	S2LOMEMEQ w2, w2, #8
				778	beq 8b
				779	sub r0, r2, r0
				780	//ldmfd sp!, {r4, r5}
				781	ldr r4, [sp], #4
				782	ldr r5, [sp], #4
				783	RETURN
				784
				785	#endif /* !(defined (_ISA_THUMB_2) \|\| defined (_ISA_ARM_6)
				786	defined (__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED) \|\|
				787	(defined (__thumb__) && !defined (__thumb2__))) */