blob: b726a6e253ba9053c374ab18c780da572e930ab3 [file] [log] [blame]
Greta Yorsheb149e92012-11-30 09:31:38 +00001/*
2 * Copyright (c) 2013 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "arm_asm.h"
30
31#ifdef __ARMEB__
32#define S2LOMEM lsl
33#define S2LOMEMEQ lsleq
34#define S2HIMEM lsr
35#define MSB 0x000000ff
36#define LSB 0xff000000
37#define BYTE0_OFFSET 24
38#define BYTE1_OFFSET 16
39#define BYTE2_OFFSET 8
40#define BYTE3_OFFSET 0
41#else /* not __ARMEB__ */
42#define S2LOMEM lsr
43#define S2LOMEMEQ lsreq
44#define S2HIMEM lsl
45#define BYTE0_OFFSET 0
46#define BYTE1_OFFSET 8
47#define BYTE2_OFFSET 16
48#define BYTE3_OFFSET 24
49#define MSB 0xff000000
50#define LSB 0x000000ff
51#endif /* not __ARMEB__ */
52
53.syntax unified
54
55#if defined (__thumb__)
56 .thumb
57 .thumb_func
58#endif
59 .global strcmp
60 .type strcmp, %function
61strcmp:
62
63#if (defined (__thumb__) && !defined (__thumb2__))
641:
65 ldrb r2, [r0]
66 ldrb r3, [r1]
67 adds r0, r0, #1
68 adds r1, r1, #1
69 cmp r2, #0
70 beq 2f
71 cmp r2, r3
72 beq 1b
732:
74 subs r0, r2, r3
75 bx lr
76#elif (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
771:
78 ldrb r2, [r0], #1
79 ldrb r3, [r1], #1
80 cmp r2, #1
81 it cs
82 cmpcs r2, r3
83 beq 1b
84 subs r0, r2, r3
85 RETURN
86
87
88#elif (defined (_ISA_THUMB_2) || defined (_ISA_ARM_6))
89 /* Use LDRD whenever possible. */
90
91/* The main thing to look out for when comparing large blocks is that
92 the loads do not cross a page boundary when loading past the index
93 of the byte with the first difference or the first string-terminator.
94
95 For example, if the strings are identical and the string-terminator
96 is at index k, byte by byte comparison will not load beyond address
97 s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
98 k; double word - up to 7 bytes. If the load of these bytes crosses
99 a page boundary, it might cause a memory fault (if the page is not mapped)
100 that would not have happened in byte by byte comparison.
101
102 If an address is (double) word aligned, then a load of a (double) word
103 from that address will not cross a page boundary.
104 Therefore, the algorithm below considers word and double-word alignment
105 of strings separately. */
106
107/* High-level description of the algorithm.
108
109 * The fast path: if both strings are double-word aligned,
110 use LDRD to load two words from each string in every loop iteration.
111 * If the strings have the same offset from a word boundary,
112 use LDRB to load and compare byte by byte until
113 the first string is aligned to a word boundary (at most 3 bytes).
114 This is optimized for quick return on short unaligned strings.
115 * If the strings have the same offset from a double-word boundary,
116 use LDRD to load two words from each string in every loop iteration, as in the fast path.
117 * If the strings do not have the same offset from a double-word boundary,
118 load a word from the second string before the loop to initialize the queue.
119 Use LDRD to load two words from every string in every loop iteration.
120 Inside the loop, load the second word from the second string only after comparing
121 the first word, using the queued value, to guarantee safety across page boundaries.
122 * If the strings do not have the same offset from a word boundary,
123 use LDR and a shift queue. Order of loads and comparisons matters,
124 similarly to the previous case.
125
126 * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
127 * The only difference between ARM and Thumb modes is the use of CBZ instruction.
128 * The only difference between big and little endian is the use of REV in little endian
129 to compute the return value, instead of MOV.
130 * No preload. [TODO.]
131*/
132
133 .macro m_cbz reg label
134#ifdef __thumb2__
135 cbz \reg, \label
136#else /* not defined __thumb2__ */
137 cmp \reg, #0
138 beq \label
139#endif /* not defined __thumb2__ */
140 .endm /* m_cbz */
141
142 .macro m_cbnz reg label
143#ifdef __thumb2__
144 cbnz \reg, \label
145#else /* not defined __thumb2__ */
146 cmp \reg, #0
147 bne \label
148#endif /* not defined __thumb2__ */
149 .endm /* m_cbnz */
150
151 .macro init
152 /* Macro to save temporary registers and prepare magic values. */
153 subs sp, sp, #16
154 strd r4, r5, [sp, #8]
155 strd r6, r7, [sp]
156 mvn r6, #0 /* all F */
157 mov r7, #0 /* all 0 */
158 .endm /* init */
159
160 .macro magic_compare_and_branch w1 w2 label
161 /* Macro to compare registers w1 and w2 and conditionally branch to label. */
162 cmp \w1, \w2 /* Are w1 and w2 the same? */
163 magic_find_zero_bytes \w1
164 it eq
165 cmpeq ip, #0 /* Is there a zero byte in w1? */
166 bne \label
167 .endm /* magic_compare_and_branch */
168
169 .macro magic_find_zero_bytes w1
170 /* Macro to find all-zero bytes in w1, result is in ip. */
171#if (defined (__ARM_FEATURE_DSP))
172 uadd8 ip, \w1, r6
173 sel ip, r7, r6
174#else /* not defined (__ARM_FEATURE_DSP) */
175 /* __ARM_FEATURE_DSP is not defined for some Cortex-M processors.
176 Coincidently, these processors only have Thumb-2 mode, where we can use the
177 the (large) magic constant available directly as an immediate in instructions.
178 Note that we cannot use the magic constant in ARM mode, where we need
179 to create the constant in a register. */
180 sub ip, \w1, #0x01010101
181 bic ip, ip, \w1
182 and ip, ip, #0x80808080
183#endif /* not defined (__ARM_FEATURE_DSP) */
184 .endm /* magic_find_zero_bytes */
185
186 .macro setup_return w1 w2
187#ifdef __ARMEB__
188 mov r1, \w1
189 mov r2, \w2
190#else /* not __ARMEB__ */
191 rev r1, \w1
192 rev r2, \w2
193#endif /* not __ARMEB__ */
194 .endm /* setup_return */
195
196 /*
197 optpld r0, #0
198 optpld r1, #0
199 */
200
201 /* Are both strings double-word aligned? */
202 orr ip, r0, r1
203 tst ip, #7
204 bne do_align
205
206 /* Fast path. */
207 init
208
209doubleword_aligned:
210
211 /* Get here when the strings to compare are double-word aligned. */
212 /* Compare two words in every iteration. */
213 .p2align 2
2142:
215 /*
216 optpld r0, #16
217 optpld r1, #16
218 */
219
220 /* Load the next double-word from each string. */
221 ldrd r2, r3, [r0], #8
222 ldrd r4, r5, [r1], #8
223
224 magic_compare_and_branch w1=r2, w2=r4, label=return_24
225 magic_compare_and_branch w1=r3, w2=r5, label=return_35
226 b 2b
227
228do_align:
229 /* Is the first string word-aligned? */
230 ands ip, r0, #3
231 beq word_aligned_r0
232
233 /* Fast compare byte by byte until the first string is word-aligned. */
234 /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
235 to read until the next word boudnary is 4-ip. */
236 bic r0, r0, #3
237 ldr r2, [r0], #4
238 lsls ip, ip, #31
239 beq byte2
240 bcs byte3
241
242byte1:
243 ldrb ip, [r1], #1
244 uxtb r3, r2, ror #BYTE1_OFFSET
245 subs ip, r3, ip
246 bne fast_return
247 m_cbz reg=r3, label=fast_return
248
249byte2:
250 ldrb ip, [r1], #1
251 uxtb r3, r2, ror #BYTE2_OFFSET
252 subs ip, r3, ip
253 bne fast_return
254 m_cbz reg=r3, label=fast_return
255
256byte3:
257 ldrb ip, [r1], #1
258 uxtb r3, r2, ror #BYTE3_OFFSET
259 subs ip, r3, ip
260 bne fast_return
261 m_cbnz reg=r3, label=word_aligned_r0
262
263fast_return:
264 mov r0, ip
265 bx lr
266
267word_aligned_r0:
268 init
269 /* The first string is word-aligned. */
270 /* Is the second string word-aligned? */
271 ands ip, r1, #3
272 bne strcmp_unaligned
273
274word_aligned:
275 /* The strings are word-aligned. */
276 /* Is the first string double-word aligned? */
277 tst r0, #4
278 beq doubleword_aligned_r0
279
280 /* If r0 is not double-word aligned yet, align it by loading
281 and comparing the next word from each string. */
282 ldr r2, [r0], #4
283 ldr r4, [r1], #4
284 magic_compare_and_branch w1=r2 w2=r4 label=return_24
285
286doubleword_aligned_r0:
287 /* Get here when r0 is double-word aligned. */
288 /* Is r1 doubleword_aligned? */
289 tst r1, #4
290 beq doubleword_aligned
291
292 /* Get here when the strings to compare are word-aligned,
293 r0 is double-word aligned, but r1 is not double-word aligned. */
294
295 /* Initialize the queue. */
296 ldr r5, [r1], #4
297
298 /* Compare two words in every iteration. */
299 .p2align 2
3003:
301 /*
302 optpld r0, #16
303 optpld r1, #16
304 */
305
306 /* Load the next double-word from each string and compare. */
307 ldrd r2, r3, [r0], #8
308 magic_compare_and_branch w1=r2 w2=r5 label=return_25
309 ldrd r4, r5, [r1], #8
310 magic_compare_and_branch w1=r3 w2=r4 label=return_34
311 b 3b
312
313 .macro miscmp_word offsetlo offsethi
314 /* Macro to compare misaligned strings. */
315 /* r0, r1 are word-aligned, and at least one of the strings
316 is not double-word aligned. */
317 /* Compare one word in every loop iteration. */
318 /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
319 OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */
320
321 /* Initialize the shift queue. */
322 ldr r5, [r1], #4
323
324 /* Compare one word from each string in every loop iteration. */
325 .p2align 2
3267:
327 ldr r3, [r0], #4
328 S2LOMEM r5, r5, #\offsetlo
329 magic_find_zero_bytes w1=r3
330 cmp r7, ip, S2HIMEM #\offsetlo
331 and r2, r3, r6, S2LOMEM #\offsetlo
332 it eq
333 cmpeq r2, r5
334 bne return_25
335 ldr r5, [r1], #4
336 cmp ip, #0
337 eor r3, r2, r3
338 S2HIMEM r2, r5, #\offsethi
339 it eq
340 cmpeq r3, r2
341 bne return_32
342 b 7b
343 .endm /* miscmp_word */
344
345strcmp_unaligned:
346 /* r0 is word-aligned, r1 is at offset ip from a word. */
347 /* Align r1 to the (previous) word-boundary. */
348 bic r1, r1, #3
349
350 /* Unaligned comparison word by word using LDRs. */
351 cmp ip, #2
352 beq miscmp_word_16 /* If ip == 2. */
353 bge miscmp_word_24 /* If ip == 3. */
354 miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */
355miscmp_word_16: miscmp_word offsetlo=16 offsethi=16
356miscmp_word_24: miscmp_word offsetlo=24 offsethi=8
357
358
359return_32:
360 setup_return w1=r3, w2=r2
361 b do_return
362return_34:
363 setup_return w1=r3, w2=r4
364 b do_return
365return_25:
366 setup_return w1=r2, w2=r5
367 b do_return
368return_35:
369 setup_return w1=r3, w2=r5
370 b do_return
371return_24:
372 setup_return w1=r2, w2=r4
373
374do_return:
375
376#ifdef __ARMEB__
377 mov r0, ip
378#else /* not __ARMEB__ */
379 rev r0, ip
380#endif /* not __ARMEB__ */
381
382 /* Restore temporaries early, before computing the return value. */
383 ldrd r6, r7, [sp]
384 ldrd r4, r5, [sp, #8]
385 adds sp, sp, #16
386
387 /* There is a zero or a different byte between r1 and r2. */
388 /* r0 contains a mask of all-zero bytes in r1. */
389 /* Using r0 and not ip here because cbz requires low register. */
390 m_cbz reg=r0, label=compute_return_value
391 clz r0, r0
392 /* r0 contains the number of bits on the left of the first all-zero byte in r1. */
393 rsb r0, r0, #24
394 /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */
395 lsr r1, r1, r0
396 lsr r2, r2, r0
397
398compute_return_value:
399 movs r0, #1
400 cmp r1, r2
401 /* The return value is computed as follows.
402 If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
403 If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
404 which means r0:=r0-r0-1 and r0 is #-1 at return.
405 If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
406 which means r0:=r0-r0 and r0 is #0 at return.
407 (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */
408 it ls
409 sbcls r0, r0, r0
410 bx lr
411
412
413#else /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)
414 defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) ||
415 (defined (__thumb__) && !defined (__thumb2__))) */
416
417 /* Use LDR whenever possible. */
418
419#ifdef __thumb2__
420#define magic1(REG) 0x01010101
421#define magic2(REG) 0x80808080
422#else
423#define magic1(REG) REG
424#define magic2(REG) REG, lsl #7
425#endif
426
427 optpld r0
428 optpld r1
429 eor r2, r0, r1
430 tst r2, #3
431 /* Strings not at same byte offset from a word boundary. */
432 bne strcmp_unaligned
433 ands r2, r0, #3
434 bic r0, r0, #3
435 bic r1, r1, #3
436 ldr ip, [r0], #4
437 it eq
438 ldreq r3, [r1], #4
439 beq 1f
440 /* Although s1 and s2 have identical initial alignment, they are
441 not currently word aligned. Rather than comparing bytes,
442 make sure that any bytes fetched from before the addressed
443 bytes are forced to 0xff. Then they will always compare
444 equal. */
445 eor r2, r2, #3
446 lsl r2, r2, #3
447 mvn r3, MSB
448 S2LOMEM r2, r3, r2
449 ldr r3, [r1], #4
450 orr ip, ip, r2
451 orr r3, r3, r2
4521:
453#ifndef __thumb2__
454 /* Load the 'magic' constant 0x01010101. */
455 str r4, [sp, #-4]!
456 mov r4, #1
457 orr r4, r4, r4, lsl #8
458 orr r4, r4, r4, lsl #16
459#endif
460 .p2align 2
4614:
462 optpld r0, #8
463 optpld r1, #8
464 sub r2, ip, magic1(r4)
465 cmp ip, r3
466 itttt eq
467 /* check for any zero bytes in first word */
468 biceq r2, r2, ip
469 tsteq r2, magic2(r4)
470 ldreq ip, [r0], #4
471 ldreq r3, [r1], #4
472 beq 4b
4732:
474 /* There's a zero or a different byte in the word */
475 S2HIMEM r0, ip, #24
476 S2LOMEM ip, ip, #8
477 cmp r0, #1
478 it cs
479 cmpcs r0, r3, S2HIMEM #24
480 it eq
481 S2LOMEMEQ r3, r3, #8
482 beq 2b
483 /* On a big-endian machine, r0 contains the desired byte in bits
484 0-7; on a little-endian machine they are in bits 24-31. In
485 both cases the other bits in r0 are all zero. For r3 the
486 interesting byte is at the other end of the word, but the
487 other bits are not necessarily zero. We need a signed result
488 representing the differnece in the unsigned bytes, so for the
489 little-endian case we can't just shift the interesting bits
490 up. */
491#ifdef __ARMEB__
492 sub r0, r0, r3, lsr #24
493#else
494 and r3, r3, #255
495#ifdef __thumb2__
496 /* No RSB instruction in Thumb2 */
497 lsr r0, r0, #24
498 sub r0, r0, r3
499#else
500 rsb r0, r3, r0, lsr #24
501#endif
502#endif
503#ifndef __thumb2__
504 ldr r4, [sp], #4
505#endif
506 RETURN
507
508
509strcmp_unaligned:
510
511#if 0
512 /* The assembly code below is based on the following alogrithm. */
513#ifdef __ARMEB__
514#define RSHIFT <<
515#define LSHIFT >>
516#else
517#define RSHIFT >>
518#define LSHIFT <<
519#endif
520
521#define body(shift) \
522 mask = 0xffffffffU RSHIFT shift; \
523 w1 = *wp1++; \
524 w2 = *wp2++; \
525 do \
526 { \
527 t1 = w1 & mask; \
528 if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
529 { \
530 w2 RSHIFT= shift; \
531 break; \
532 } \
533 if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
534 { \
535 /* See comment in assembler below re syndrome on big-endian */\
536 if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
537 w2 RSHIFT= shift; \
538 else \
539 { \
540 w2 = *wp2; \
541 t1 = w1 RSHIFT (32 - shift); \
542 w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
543 } \
544 break; \
545 } \
546 w2 = *wp2++; \
547 t1 ^= w1; \
548 if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
549 { \
550 t1 = w1 >> (32 - shift); \
551 w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
552 break; \
553 } \
554 w1 = *wp1++; \
555 } while (1)
556
557 const unsigned* wp1;
558 const unsigned* wp2;
559 unsigned w1, w2;
560 unsigned mask;
561 unsigned shift;
562 unsigned b1 = 0x01010101;
563 char c1, c2;
564 unsigned t1;
565
566 while (((unsigned) s1) & 3)
567 {
568 c1 = *s1++;
569 c2 = *s2++;
570 if (c1 == 0 || c1 != c2)
571 return c1 - (int)c2;
572 }
573 wp1 = (unsigned*) (((unsigned)s1) & ~3);
574 wp2 = (unsigned*) (((unsigned)s2) & ~3);
575 t1 = ((unsigned) s2) & 3;
576 if (t1 == 1)
577 {
578 body(8);
579 }
580 else if (t1 == 2)
581 {
582 body(16);
583 }
584 else
585 {
586 body (24);
587 }
588
589 do
590 {
591#ifdef __ARMEB__
592 c1 = (char) t1 >> 24;
593 c2 = (char) w2 >> 24;
594#else /* not __ARMEB__ */
595 c1 = (char) t1;
596 c2 = (char) w2;
597#endif /* not __ARMEB__ */
598 t1 RSHIFT= 8;
599 w2 RSHIFT= 8;
600 } while (c1 != 0 && c1 == c2);
601 return c1 - c2;
602#endif /* 0 */
603
604
605 wp1 .req r0
606 wp2 .req r1
607 b1 .req r2
608 w1 .req r4
609 w2 .req r5
610 t1 .req ip
611 @ r3 is scratch
612
613 /* First of all, compare bytes until wp1(sp1) is word-aligned. */
6141:
615 tst wp1, #3
616 beq 2f
617 ldrb r2, [wp1], #1
618 ldrb r3, [wp2], #1
619 cmp r2, #1
620 it cs
621 cmpcs r2, r3
622 beq 1b
623 sub r0, r2, r3
624 RETURN
625
6262:
627 str r5, [sp, #-4]!
628 str r4, [sp, #-4]!
629 //stmfd sp!, {r4, r5}
630 mov b1, #1
631 orr b1, b1, b1, lsl #8
632 orr b1, b1, b1, lsl #16
633
634 and t1, wp2, #3
635 bic wp2, wp2, #3
636 ldr w1, [wp1], #4
637 ldr w2, [wp2], #4
638 cmp t1, #2
639 beq 2f
640 bhi 3f
641
642 /* Critical inner Loop: Block with 3 bytes initial overlap */
643 .p2align 2
6441:
645 bic t1, w1, MSB
646 cmp t1, w2, S2LOMEM #8
647 sub r3, w1, b1
648 bic r3, r3, w1
649 bne 4f
650 ands r3, r3, b1, lsl #7
651 it eq
652 ldreq w2, [wp2], #4
653 bne 5f
654 eor t1, t1, w1
655 cmp t1, w2, S2HIMEM #24
656 bne 6f
657 ldr w1, [wp1], #4
658 b 1b
6594:
660 S2LOMEM w2, w2, #8
661 b 8f
662
6635:
664#ifdef __ARMEB__
665 /* The syndrome value may contain false ones if the string ends
666 with the bytes 0x01 0x00 */
667 tst w1, #0xff000000
668 itt ne
669 tstne w1, #0x00ff0000
670 tstne w1, #0x0000ff00
671 beq 7f
672#else
673 bics r3, r3, #0xff000000
674 bne 7f
675#endif
676 ldrb w2, [wp2]
677 S2LOMEM t1, w1, #24
678#ifdef __ARMEB__
679 lsl w2, w2, #24
680#endif
681 b 8f
682
6836:
684 S2LOMEM t1, w1, #24
685 and w2, w2, LSB
686 b 8f
687
688 /* Critical inner Loop: Block with 2 bytes initial overlap */
689 .p2align 2
6902:
691 S2HIMEM t1, w1, #16
692 sub r3, w1, b1
693 S2LOMEM t1, t1, #16
694 bic r3, r3, w1
695 cmp t1, w2, S2LOMEM #16
696 bne 4f
697 ands r3, r3, b1, lsl #7
698 it eq
699 ldreq w2, [wp2], #4
700 bne 5f
701 eor t1, t1, w1
702 cmp t1, w2, S2HIMEM #16
703 bne 6f
704 ldr w1, [wp1], #4
705 b 2b
706
7075:
708#ifdef __ARMEB__
709 /* The syndrome value may contain false ones if the string ends
710 with the bytes 0x01 0x00 */
711 tst w1, #0xff000000
712 it ne
713 tstne w1, #0x00ff0000
714 beq 7f
715#else
716 lsls r3, r3, #16
717 bne 7f
718#endif
719 ldrh w2, [wp2]
720 S2LOMEM t1, w1, #16
721#ifdef __ARMEB__
722 lsl w2, w2, #16
723#endif
724 b 8f
725
7266:
727 S2HIMEM w2, w2, #16
728 S2LOMEM t1, w1, #16
7294:
730 S2LOMEM w2, w2, #16
731 b 8f
732
733 /* Critical inner Loop: Block with 1 byte initial overlap */
734 .p2align 2
7353:
736 and t1, w1, LSB
737 cmp t1, w2, S2LOMEM #24
738 sub r3, w1, b1
739 bic r3, r3, w1
740 bne 4f
741 ands r3, r3, b1, lsl #7
742 it eq
743 ldreq w2, [wp2], #4
744 bne 5f
745 eor t1, t1, w1
746 cmp t1, w2, S2HIMEM #8
747 bne 6f
748 ldr w1, [wp1], #4
749 b 3b
7504:
751 S2LOMEM w2, w2, #24
752 b 8f
7535:
754 /* The syndrome value may contain false ones if the string ends
755 with the bytes 0x01 0x00 */
756 tst w1, LSB
757 beq 7f
758 ldr w2, [wp2], #4
7596:
760 S2LOMEM t1, w1, #8
761 bic w2, w2, MSB
762 b 8f
7637:
764 mov r0, #0
765 //ldmfd sp!, {r4, r5}
766 ldr r4, [sp], #4
767 ldr r5, [sp], #4
768 RETURN
7698:
770 and r2, t1, LSB
771 and r0, w2, LSB
772 cmp r0, #1
773 it cs
774 cmpcs r0, r2
775 itt eq
776 S2LOMEMEQ t1, t1, #8
777 S2LOMEMEQ w2, w2, #8
778 beq 8b
779 sub r0, r2, r0
780 //ldmfd sp!, {r4, r5}
781 ldr r4, [sp], #4
782 ldr r5, [sp], #4
783 RETURN
784
785#endif /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)
786 defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) ||
787 (defined (__thumb__) && !defined (__thumb2__))) */