blob: 69c6425beab4d57282dcde4c09e4d3fe0890b736 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare124a5422010-10-11 12:24:41 -070055#ifndef cfi_remember_state
56# define cfi_remember_state .cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state .cfi_restore_state
61#endif
62
Bruce Beare8ff1a272010-03-04 11:03:37 -080063#ifndef ENTRY
64# define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68name: \
69 cfi_startproc
70#endif
71
72#ifndef END
73# define END(name) \
74 cfi_endproc; \
75 .size name, .-name
76#endif
77
78#define CFI_PUSH(REG) \
79 cfi_adjust_cfa_offset (4); \
80 cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG) \
83 cfi_adjust_cfa_offset (-4); \
84 cfi_restore (REG)
85
86#define PUSH(REG) pushl REG; CFI_PUSH (REG)
87#define POP(REG) popl REG; CFI_POP (REG)
88
89#ifndef USE_AS_STRNCMP
90# define STR1 4
91# define STR2 STR1+4
92# define RETURN ret
93
94# define UPDATE_STRNCMP_COUNTER
95#else
96# define STR1 8
97# define STR2 STR1+4
98# define CNT STR2+4
99# define RETURN POP (%ebp); ret; CFI_PUSH (%ebp)
100
101# define UPDATE_STRNCMP_COUNTER \
102 /* calculate left number to compare */ \
103 mov $16, %esi; \
104 sub %ecx, %esi; \
105 cmp %esi, %ebp; \
106 jbe L(more8byteseq); \
107 sub %esi, %ebp
108#endif
109
110 .section .text.ssse3,"ax",@progbits
111ENTRY (ssse3_strcmp_latest)
112#ifdef USE_AS_STRNCMP
113 PUSH (%ebp)
114#endif
115 movl STR1(%esp), %edx
116 movl STR2(%esp), %eax
117#ifdef USE_AS_STRNCMP
118 movl CNT(%esp), %ebp
119 cmp $16, %ebp
120 jb L(less16bytes_sncmp)
121 jmp L(more16bytes)
122#endif
123
124 movzbl (%eax), %ecx
125 cmpb %cl, (%edx)
126 jne L(neq)
127 cmpl $0, %ecx
128 je L(eq)
129
130 movzbl 1(%eax), %ecx
131 cmpb %cl, 1(%edx)
132 jne L(neq)
133 cmpl $0, %ecx
134 je L(eq)
135
136 movzbl 2(%eax), %ecx
137 cmpb %cl, 2(%edx)
138 jne L(neq)
139 cmpl $0, %ecx
140 je L(eq)
141
142 movzbl 3(%eax), %ecx
143 cmpb %cl, 3(%edx)
144 jne L(neq)
145 cmpl $0, %ecx
146 je L(eq)
147
148 movzbl 4(%eax), %ecx
149 cmpb %cl, 4(%edx)
150 jne L(neq)
151 cmpl $0, %ecx
152 je L(eq)
153
154 movzbl 5(%eax), %ecx
155 cmpb %cl, 5(%edx)
156 jne L(neq)
157 cmpl $0, %ecx
158 je L(eq)
159
160 movzbl 6(%eax), %ecx
161 cmpb %cl, 6(%edx)
162 jne L(neq)
163 cmpl $0, %ecx
164 je L(eq)
165
166 movzbl 7(%eax), %ecx
167 cmpb %cl, 7(%edx)
168 jne L(neq)
169 cmpl $0, %ecx
170 je L(eq)
171
172 add $8, %edx
173 add $8, %eax
174#ifdef USE_AS_STRNCMP
175 cmp $8, %ebp
176 lea -8(%ebp), %ebp
177 je L(eq)
178L(more16bytes):
179#endif
180 movl %edx, %ecx
181 and $0xfff, %ecx
182 cmp $0xff0, %ecx
183 ja L(crosspage)
184 mov %eax, %ecx
185 and $0xfff, %ecx
186 cmp $0xff0, %ecx
187 ja L(crosspage)
188 pxor %xmm0, %xmm0
189 movlpd (%eax), %xmm1
190 movlpd (%edx), %xmm2
191 movhpd 8(%eax), %xmm1
192 movhpd 8(%edx), %xmm2
193 pcmpeqb %xmm1, %xmm0
194 pcmpeqb %xmm2, %xmm1
195 psubb %xmm0, %xmm1
196 pmovmskb %xmm1, %ecx
197 sub $0xffff, %ecx
198 jnz L(less16bytes)
199#ifdef USE_AS_STRNCMP
200 cmp $16, %ebp
201 lea -16(%ebp), %ebp
202 jbe L(eq)
203#endif
204 add $16, %eax
205 add $16, %edx
206
207L(crosspage):
208
209 PUSH (%ebx)
210 PUSH (%edi)
211 PUSH (%esi)
Bruce Beare124a5422010-10-11 12:24:41 -0700212#ifdef USE_AS_STRNCMP
213 cfi_remember_state
214#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800215
216 movl %edx, %edi
217 movl %eax, %ecx
218 and $0xf, %ecx
219 and $0xf, %edi
220 xor %ecx, %eax
221 xor %edi, %edx
222 xor %ebx, %ebx
223 cmp %edi, %ecx
224 je L(ashr_0)
225 ja L(bigger)
226 or $0x20, %ebx
227 xchg %edx, %eax
228 xchg %ecx, %edi
229L(bigger):
230 lea 15(%edi), %edi
231 sub %ecx, %edi
232 cmp $8, %edi
233 jle L(ashr_less_8)
234 cmp $14, %edi
235 je L(ashr_15)
236 cmp $13, %edi
237 je L(ashr_14)
238 cmp $12, %edi
239 je L(ashr_13)
240 cmp $11, %edi
241 je L(ashr_12)
242 cmp $10, %edi
243 je L(ashr_11)
244 cmp $9, %edi
245 je L(ashr_10)
246L(ashr_less_8):
247 je L(ashr_9)
248 cmp $7, %edi
249 je L(ashr_8)
250 cmp $6, %edi
251 je L(ashr_7)
252 cmp $5, %edi
253 je L(ashr_6)
254 cmp $4, %edi
255 je L(ashr_5)
256 cmp $3, %edi
257 je L(ashr_4)
258 cmp $2, %edi
259 je L(ashr_3)
260 cmp $1, %edi
261 je L(ashr_2)
262 cmp $0, %edi
263 je L(ashr_1)
264
265/*
266 * The following cases will be handled by ashr_0
267 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
268 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
269 */
270 .p2align 4
271L(ashr_0):
272 mov $0xffff, %esi
273 movdqa (%eax), %xmm1
274 pxor %xmm0, %xmm0
275 pcmpeqb %xmm1, %xmm0
276 pcmpeqb (%edx), %xmm1
277 psubb %xmm0, %xmm1
278 pmovmskb %xmm1, %edi
279 shr %cl, %esi
280 shr %cl, %edi
281 sub %edi, %esi
282 mov %ecx, %edi
283 jne L(less32bytes)
284 UPDATE_STRNCMP_COUNTER
285 mov $0x10, %ebx
286 mov $0x10, %ecx
287 pxor %xmm0, %xmm0
288 .p2align 4
289L(loop_ashr_0):
290 movdqa (%eax, %ecx), %xmm1
291 movdqa (%edx, %ecx), %xmm2
292
293 pcmpeqb %xmm1, %xmm0
294 pcmpeqb %xmm2, %xmm1
295 psubb %xmm0, %xmm1
296 pmovmskb %xmm1, %esi
297 sub $0xffff, %esi
298 jnz L(exit)
299#ifdef USE_AS_STRNCMP
300 cmp $16, %ebp
301 lea -16(%ebp), %ebp
302 jbe L(more8byteseq)
303#endif
304 add $16, %ecx
305 jmp L(loop_ashr_0)
306
307/*
308 * The following cases will be handled by ashr_1
309 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
310 * n(15) n -15 0(15 +(n-15) - n) ashr_1
311 */
312 .p2align 4
313L(ashr_1):
314 mov $0xffff, %esi
315 pxor %xmm0, %xmm0
316 movdqa (%edx), %xmm2
317 movdqa (%eax), %xmm1
318 pcmpeqb %xmm1, %xmm0
319 pslldq $15, %xmm2
320 pcmpeqb %xmm1, %xmm2
321 psubb %xmm0, %xmm2
322 pmovmskb %xmm2, %edi
323 shr %cl, %esi
324 shr %cl, %edi
325 sub %edi, %esi
326 lea -15(%ecx), %edi
327 jnz L(less32bytes)
328
329 UPDATE_STRNCMP_COUNTER
330
331 movdqa (%edx), %xmm3
332 pxor %xmm0, %xmm0
333 mov $16, %ecx
334 or $1, %ebx
335 lea 1(%edx), %edi
336 and $0xfff, %edi
337 sub $0x1000, %edi
338
339 .p2align 4
340L(loop_ashr_1):
341 add $16, %edi
342 jg L(nibble_ashr_1)
343
344L(gobble_ashr_1):
345 movdqa (%eax, %ecx), %xmm1
346 movdqa (%edx, %ecx), %xmm2
347 movdqa %xmm2, %xmm4
348
349 palignr $1, %xmm3, %xmm2
350
351 pcmpeqb %xmm1, %xmm0
352 pcmpeqb %xmm2, %xmm1
353 psubb %xmm0, %xmm1
354 pmovmskb %xmm1, %esi
355 sub $0xffff, %esi
356 jnz L(exit)
357#ifdef USE_AS_STRNCMP
358 cmp $16, %ebp
359 lea -16(%ebp), %ebp
360 jbe L(more8byteseq)
361#endif
362
363 add $16, %ecx
364 movdqa %xmm4, %xmm3
365
366 add $16, %edi
367 jg L(nibble_ashr_1)
368
369 movdqa (%eax, %ecx), %xmm1
370 movdqa (%edx, %ecx), %xmm2
371 movdqa %xmm2, %xmm4
372
373 palignr $1, %xmm3, %xmm2
374
375 pcmpeqb %xmm1, %xmm0
376 pcmpeqb %xmm2, %xmm1
377 psubb %xmm0, %xmm1
378 pmovmskb %xmm1, %esi
379 sub $0xffff, %esi
380 jnz L(exit)
381
382#ifdef USE_AS_STRNCMP
383 cmp $16, %ebp
384 lea -16(%ebp), %ebp
385 jbe L(more8byteseq)
386#endif
387 add $16, %ecx
388 movdqa %xmm4, %xmm3
389 jmp L(loop_ashr_1)
390
391 .p2align 4
392L(nibble_ashr_1):
393 pcmpeqb %xmm3, %xmm0
394 pmovmskb %xmm0, %esi
395 test $0xfffe, %esi
396 jnz L(ashr_1_exittail)
397
398#ifdef USE_AS_STRNCMP
399 cmp $15, %ebp
400 jbe L(ashr_1_exittail)
401#endif
402 pxor %xmm0, %xmm0
403 sub $0x1000, %edi
404 jmp L(gobble_ashr_1)
405
406 .p2align 4
407L(ashr_1_exittail):
408 movdqa (%eax, %ecx), %xmm1
409 psrldq $1, %xmm0
410 psrldq $1, %xmm3
411 jmp L(aftertail)
412
413/*
414 * The following cases will be handled by ashr_2
415 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
416 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
417 */
418 .p2align 4
419L(ashr_2):
420 mov $0xffff, %esi
421 pxor %xmm0, %xmm0
422 movdqa (%edx), %xmm2
423 movdqa (%eax), %xmm1
424 pcmpeqb %xmm1, %xmm0
425 pslldq $14, %xmm2
426 pcmpeqb %xmm1, %xmm2
427 psubb %xmm0, %xmm2
428 pmovmskb %xmm2, %edi
429 shr %cl, %esi
430 shr %cl, %edi
431 sub %edi, %esi
432 lea -14(%ecx), %edi
433 jnz L(less32bytes)
434
435 UPDATE_STRNCMP_COUNTER
436
437 movdqa (%edx), %xmm3
438 pxor %xmm0, %xmm0
439 mov $16, %ecx
440 or $2, %ebx
441 lea 2(%edx), %edi
442 and $0xfff, %edi
443 sub $0x1000, %edi
444
445 .p2align 4
446L(loop_ashr_2):
447 add $16, %edi
448 jg L(nibble_ashr_2)
449
450L(gobble_ashr_2):
451 movdqa (%eax, %ecx), %xmm1
452 movdqa (%edx, %ecx), %xmm2
453 movdqa %xmm2, %xmm4
454
455 palignr $2, %xmm3, %xmm2
456
457 pcmpeqb %xmm1, %xmm0
458 pcmpeqb %xmm2, %xmm1
459 psubb %xmm0, %xmm1
460 pmovmskb %xmm1, %esi
461 sub $0xffff, %esi
462 jnz L(exit)
463
464#ifdef USE_AS_STRNCMP
465 cmp $16, %ebp
466 lea -16(%ebp), %ebp
467 jbe L(more8byteseq)
468#endif
469 add $16, %ecx
470 movdqa %xmm4, %xmm3
471
472 add $16, %edi
473 jg L(nibble_ashr_2)
474
475 movdqa (%eax, %ecx), %xmm1
476 movdqa (%edx, %ecx), %xmm2
477 movdqa %xmm2, %xmm4
478
479 palignr $2, %xmm3, %xmm2
480
481 pcmpeqb %xmm1, %xmm0
482 pcmpeqb %xmm2, %xmm1
483 psubb %xmm0, %xmm1
484 pmovmskb %xmm1, %esi
485 sub $0xffff, %esi
486 jnz L(exit)
487
488#ifdef USE_AS_STRNCMP
489 cmp $16, %ebp
490 lea -16(%ebp), %ebp
491 jbe L(more8byteseq)
492#endif
493 add $16, %ecx
494 movdqa %xmm4, %xmm3
495 jmp L(loop_ashr_2)
496
497 .p2align 4
498L(nibble_ashr_2):
499 pcmpeqb %xmm3, %xmm0
500 pmovmskb %xmm0, %esi
501 test $0xfffc, %esi
502 jnz L(ashr_2_exittail)
503
504#ifdef USE_AS_STRNCMP
505 cmp $14, %ebp
506 jbe L(ashr_2_exittail)
507#endif
508
509 pxor %xmm0, %xmm0
510 sub $0x1000, %edi
511 jmp L(gobble_ashr_2)
512
513 .p2align 4
514L(ashr_2_exittail):
515 movdqa (%eax, %ecx), %xmm1
516 psrldq $2, %xmm0
517 psrldq $2, %xmm3
518 jmp L(aftertail)
519
520/*
521 * The following cases will be handled by ashr_3
522 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
523 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
524 */
525 .p2align 4
526L(ashr_3):
527 mov $0xffff, %esi
528 pxor %xmm0, %xmm0
529 movdqa (%edx), %xmm2
530 movdqa (%eax), %xmm1
531 pcmpeqb %xmm1, %xmm0
532 pslldq $13, %xmm2
533 pcmpeqb %xmm1, %xmm2
534 psubb %xmm0, %xmm2
535 pmovmskb %xmm2, %edi
536 shr %cl, %esi
537 shr %cl, %edi
538 sub %edi, %esi
539 lea -13(%ecx), %edi
540 jnz L(less32bytes)
541
542 UPDATE_STRNCMP_COUNTER
543
544 movdqa (%edx), %xmm3
545 pxor %xmm0, %xmm0
546 mov $16, %ecx
547 or $3, %ebx
548 lea 3(%edx), %edi
549 and $0xfff, %edi
550 sub $0x1000, %edi
551
552 .p2align 4
553L(loop_ashr_3):
554 add $16, %edi
555 jg L(nibble_ashr_3)
556
557L(gobble_ashr_3):
558 movdqa (%eax, %ecx), %xmm1
559 movdqa (%edx, %ecx), %xmm2
560 movdqa %xmm2, %xmm4
561
562 palignr $3, %xmm3, %xmm2
563
564 pcmpeqb %xmm1, %xmm0
565 pcmpeqb %xmm2, %xmm1
566 psubb %xmm0, %xmm1
567 pmovmskb %xmm1, %esi
568 sub $0xffff, %esi
569 jnz L(exit)
570
571#ifdef USE_AS_STRNCMP
572 cmp $16, %ebp
573 lea -16(%ebp), %ebp
574 jbe L(more8byteseq)
575#endif
576 add $16, %ecx
577 movdqa %xmm4, %xmm3
578
579 add $16, %edi
580 jg L(nibble_ashr_3)
581
582 movdqa (%eax, %ecx), %xmm1
583 movdqa (%edx, %ecx), %xmm2
584 movdqa %xmm2, %xmm4
585
586 palignr $3, %xmm3, %xmm2
587
588 pcmpeqb %xmm1, %xmm0
589 pcmpeqb %xmm2, %xmm1
590 psubb %xmm0, %xmm1
591 pmovmskb %xmm1, %esi
592 sub $0xffff, %esi
593 jnz L(exit)
594
595#ifdef USE_AS_STRNCMP
596 cmp $16, %ebp
597 lea -16(%ebp), %ebp
598 jbe L(more8byteseq)
599#endif
600 add $16, %ecx
601 movdqa %xmm4, %xmm3
602 jmp L(loop_ashr_3)
603
604 .p2align 4
605L(nibble_ashr_3):
606 pcmpeqb %xmm3, %xmm0
607 pmovmskb %xmm0, %esi
608 test $0xfff8, %esi
609 jnz L(ashr_3_exittail)
610
611#ifdef USE_AS_STRNCMP
612 cmp $13, %ebp
613 jbe L(ashr_3_exittail)
614#endif
615 pxor %xmm0, %xmm0
616 sub $0x1000, %edi
617 jmp L(gobble_ashr_3)
618
619 .p2align 4
620L(ashr_3_exittail):
621 movdqa (%eax, %ecx), %xmm1
622 psrldq $3, %xmm0
623 psrldq $3, %xmm3
624 jmp L(aftertail)
625
626/*
627 * The following cases will be handled by ashr_4
628 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
629 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
630 */
631 .p2align 4
632L(ashr_4):
633 mov $0xffff, %esi
634 pxor %xmm0, %xmm0
635 movdqa (%edx), %xmm2
636 movdqa (%eax), %xmm1
637 pcmpeqb %xmm1, %xmm0
638 pslldq $12, %xmm2
639 pcmpeqb %xmm1, %xmm2
640 psubb %xmm0, %xmm2
641 pmovmskb %xmm2, %edi
642 shr %cl, %esi
643 shr %cl, %edi
644 sub %edi, %esi
645 lea -12(%ecx), %edi
646 jnz L(less32bytes)
647
648 UPDATE_STRNCMP_COUNTER
649
650 movdqa (%edx), %xmm3
651 pxor %xmm0, %xmm0
652 mov $16, %ecx
653 or $4, %ebx
654 lea 4(%edx), %edi
655 and $0xfff, %edi
656 sub $0x1000, %edi
657
658 .p2align 4
659L(loop_ashr_4):
660 add $16, %edi
661 jg L(nibble_ashr_4)
662
663L(gobble_ashr_4):
664 movdqa (%eax, %ecx), %xmm1
665 movdqa (%edx, %ecx), %xmm2
666 movdqa %xmm2, %xmm4
667
668 palignr $4, %xmm3, %xmm2
669
670 pcmpeqb %xmm1, %xmm0
671 pcmpeqb %xmm2, %xmm1
672 psubb %xmm0, %xmm1
673 pmovmskb %xmm1, %esi
674 sub $0xffff, %esi
675 jnz L(exit)
676
677#ifdef USE_AS_STRNCMP
678 cmp $16, %ebp
679 lea -16(%ebp), %ebp
680 jbe L(more8byteseq)
681#endif
682
683 add $16, %ecx
684 movdqa %xmm4, %xmm3
685
686 add $16, %edi
687 jg L(nibble_ashr_4)
688
689 movdqa (%eax, %ecx), %xmm1
690 movdqa (%edx, %ecx), %xmm2
691 movdqa %xmm2, %xmm4
692
693 palignr $4, %xmm3, %xmm2
694
695 pcmpeqb %xmm1, %xmm0
696 pcmpeqb %xmm2, %xmm1
697 psubb %xmm0, %xmm1
698 pmovmskb %xmm1, %esi
699 sub $0xffff, %esi
700 jnz L(exit)
701
702#ifdef USE_AS_STRNCMP
703 cmp $16, %ebp
704 lea -16(%ebp), %ebp
705 jbe L(more8byteseq)
706#endif
707
708 add $16, %ecx
709 movdqa %xmm4, %xmm3
710 jmp L(loop_ashr_4)
711
712 .p2align 4
713L(nibble_ashr_4):
714 pcmpeqb %xmm3, %xmm0
715 pmovmskb %xmm0, %esi
716 test $0xfff0, %esi
717 jnz L(ashr_4_exittail)
718
719#ifdef USE_AS_STRNCMP
720 cmp $12, %ebp
721 jbe L(ashr_4_exittail)
722#endif
723
724 pxor %xmm0, %xmm0
725 sub $0x1000, %edi
726 jmp L(gobble_ashr_4)
727
728 .p2align 4
729L(ashr_4_exittail):
730 movdqa (%eax, %ecx), %xmm1
731 psrldq $4, %xmm0
732 psrldq $4, %xmm3
733 jmp L(aftertail)
734
735/*
736 * The following cases will be handled by ashr_5
737 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
738 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
739 */
740 .p2align 4
741L(ashr_5):
742 mov $0xffff, %esi
743 pxor %xmm0, %xmm0
744 movdqa (%edx), %xmm2
745 movdqa (%eax), %xmm1
746 pcmpeqb %xmm1, %xmm0
747 pslldq $11, %xmm2
748 pcmpeqb %xmm1, %xmm2
749 psubb %xmm0, %xmm2
750 pmovmskb %xmm2, %edi
751 shr %cl, %esi
752 shr %cl, %edi
753 sub %edi, %esi
754 lea -11(%ecx), %edi
755 jnz L(less32bytes)
756
757 UPDATE_STRNCMP_COUNTER
758
759 movdqa (%edx), %xmm3
760 pxor %xmm0, %xmm0
761 mov $16, %ecx
762 or $5, %ebx
763 lea 5(%edx), %edi
764 and $0xfff, %edi
765 sub $0x1000, %edi
766
767 .p2align 4
768L(loop_ashr_5):
769 add $16, %edi
770 jg L(nibble_ashr_5)
771
772L(gobble_ashr_5):
773 movdqa (%eax, %ecx), %xmm1
774 movdqa (%edx, %ecx), %xmm2
775 movdqa %xmm2, %xmm4
776
777 palignr $5, %xmm3, %xmm2
778
779 pcmpeqb %xmm1, %xmm0
780 pcmpeqb %xmm2, %xmm1
781 psubb %xmm0, %xmm1
782 pmovmskb %xmm1, %esi
783 sub $0xffff, %esi
784 jnz L(exit)
785
786#ifdef USE_AS_STRNCMP
787 cmp $16, %ebp
788 lea -16(%ebp), %ebp
789 jbe L(more8byteseq)
790#endif
791 add $16, %ecx
792 movdqa %xmm4, %xmm3
793
794 add $16, %edi
795 jg L(nibble_ashr_5)
796
797 movdqa (%eax, %ecx), %xmm1
798 movdqa (%edx, %ecx), %xmm2
799 movdqa %xmm2, %xmm4
800
801 palignr $5, %xmm3, %xmm2
802
803 pcmpeqb %xmm1, %xmm0
804 pcmpeqb %xmm2, %xmm1
805 psubb %xmm0, %xmm1
806 pmovmskb %xmm1, %esi
807 sub $0xffff, %esi
808 jnz L(exit)
809
810#ifdef USE_AS_STRNCMP
811 cmp $16, %ebp
812 lea -16(%ebp), %ebp
813 jbe L(more8byteseq)
814#endif
815 add $16, %ecx
816 movdqa %xmm4, %xmm3
817 jmp L(loop_ashr_5)
818
819 .p2align 4
820L(nibble_ashr_5):
821 pcmpeqb %xmm3, %xmm0
822 pmovmskb %xmm0, %esi
823 test $0xffe0, %esi
824 jnz L(ashr_5_exittail)
825
826#ifdef USE_AS_STRNCMP
827 cmp $11, %ebp
828 jbe L(ashr_5_exittail)
829#endif
830 pxor %xmm0, %xmm0
831 sub $0x1000, %edi
832 jmp L(gobble_ashr_5)
833
834 .p2align 4
835L(ashr_5_exittail):
836 movdqa (%eax, %ecx), %xmm1
837 psrldq $5, %xmm0
838 psrldq $5, %xmm3
839 jmp L(aftertail)
840
841/*
842 * The following cases will be handled by ashr_6
843 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
844 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
845 */
846
847 .p2align 4
848L(ashr_6):
849 mov $0xffff, %esi
850 pxor %xmm0, %xmm0
851 movdqa (%edx), %xmm2
852 movdqa (%eax), %xmm1
853 pcmpeqb %xmm1, %xmm0
854 pslldq $10, %xmm2
855 pcmpeqb %xmm1, %xmm2
856 psubb %xmm0, %xmm2
857 pmovmskb %xmm2, %edi
858 shr %cl, %esi
859 shr %cl, %edi
860 sub %edi, %esi
861 lea -10(%ecx), %edi
862 jnz L(less32bytes)
863
864 UPDATE_STRNCMP_COUNTER
865
866 movdqa (%edx), %xmm3
867 pxor %xmm0, %xmm0
868 mov $16, %ecx
869 or $6, %ebx
870 lea 6(%edx), %edi
871 and $0xfff, %edi
872 sub $0x1000, %edi
873
874 .p2align 4
875L(loop_ashr_6):
876 add $16, %edi
877 jg L(nibble_ashr_6)
878
879L(gobble_ashr_6):
880 movdqa (%eax, %ecx), %xmm1
881 movdqa (%edx, %ecx), %xmm2
882 movdqa %xmm2, %xmm4
883
884 palignr $6, %xmm3, %xmm2
885
886 pcmpeqb %xmm1, %xmm0
887 pcmpeqb %xmm2, %xmm1
888 psubb %xmm0, %xmm1
889 pmovmskb %xmm1, %esi
890 sub $0xffff, %esi
891 jnz L(exit)
892
893#ifdef USE_AS_STRNCMP
894 cmp $16, %ebp
895 lea -16(%ebp), %ebp
896 jbe L(more8byteseq)
897#endif
898
899 add $16, %ecx
900 movdqa %xmm4, %xmm3
901
902 add $16, %edi
903 jg L(nibble_ashr_6)
904
905 movdqa (%eax, %ecx), %xmm1
906 movdqa (%edx, %ecx), %xmm2
907 movdqa %xmm2, %xmm4
908
909 palignr $6, %xmm3, %xmm2
910
911 pcmpeqb %xmm1, %xmm0
912 pcmpeqb %xmm2, %xmm1
913 psubb %xmm0, %xmm1
914 pmovmskb %xmm1, %esi
915 sub $0xffff, %esi
916 jnz L(exit)
917#ifdef USE_AS_STRNCMP
918 cmp $16, %ebp
919 lea -16(%ebp), %ebp
920 jbe L(more8byteseq)
921#endif
922
923 add $16, %ecx
924 movdqa %xmm4, %xmm3
925 jmp L(loop_ashr_6)
926
927 .p2align 4
928L(nibble_ashr_6):
929 pcmpeqb %xmm3, %xmm0
930 pmovmskb %xmm0, %esi
931 test $0xffc0, %esi
932 jnz L(ashr_6_exittail)
933
934#ifdef USE_AS_STRNCMP
935 cmp $10, %ebp
936 jbe L(ashr_6_exittail)
937#endif
938 pxor %xmm0, %xmm0
939 sub $0x1000, %edi
940 jmp L(gobble_ashr_6)
941
942 .p2align 4
943L(ashr_6_exittail):
944 movdqa (%eax, %ecx), %xmm1
945 psrldq $6, %xmm0
946 psrldq $6, %xmm3
947 jmp L(aftertail)
948
949/*
950 * The following cases will be handled by ashr_7
951 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
952 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
953 */
954
955 .p2align 4
956L(ashr_7):
957 mov $0xffff, %esi
958 pxor %xmm0, %xmm0
959 movdqa (%edx), %xmm2
960 movdqa (%eax), %xmm1
961 pcmpeqb %xmm1, %xmm0
962 pslldq $9, %xmm2
963 pcmpeqb %xmm1, %xmm2
964 psubb %xmm0, %xmm2
965 pmovmskb %xmm2, %edi
966 shr %cl, %esi
967 shr %cl, %edi
968 sub %edi, %esi
969 lea -9(%ecx), %edi
970 jnz L(less32bytes)
971
972 UPDATE_STRNCMP_COUNTER
973
974 movdqa (%edx), %xmm3
975 pxor %xmm0, %xmm0
976 mov $16, %ecx
977 or $7, %ebx
978 lea 8(%edx), %edi
979 and $0xfff, %edi
980 sub $0x1000, %edi
981
982 .p2align 4
983L(loop_ashr_7):
984 add $16, %edi
985 jg L(nibble_ashr_7)
986
987L(gobble_ashr_7):
988 movdqa (%eax, %ecx), %xmm1
989 movdqa (%edx, %ecx), %xmm2
990 movdqa %xmm2, %xmm4
991
992 palignr $7, %xmm3, %xmm2
993
994 pcmpeqb %xmm1, %xmm0
995 pcmpeqb %xmm2, %xmm1
996 psubb %xmm0, %xmm1
997 pmovmskb %xmm1, %esi
998 sub $0xffff, %esi
999 jnz L(exit)
1000
1001#ifdef USE_AS_STRNCMP
1002 cmp $16, %ebp
1003 lea -16(%ebp), %ebp
1004 jbe L(more8byteseq)
1005#endif
1006
1007 add $16, %ecx
1008 movdqa %xmm4, %xmm3
1009
1010 add $16, %edi
1011 jg L(nibble_ashr_7)
1012
1013 movdqa (%eax, %ecx), %xmm1
1014 movdqa (%edx, %ecx), %xmm2
1015 movdqa %xmm2, %xmm4
1016
1017 palignr $7, %xmm3, %xmm2
1018
1019 pcmpeqb %xmm1, %xmm0
1020 pcmpeqb %xmm2, %xmm1
1021 psubb %xmm0, %xmm1
1022 pmovmskb %xmm1, %esi
1023 sub $0xffff, %esi
1024 jnz L(exit)
1025
1026#ifdef USE_AS_STRNCMP
1027 cmp $16, %ebp
1028 lea -16(%ebp), %ebp
1029 jbe L(more8byteseq)
1030#endif
1031
1032 add $16, %ecx
1033 movdqa %xmm4, %xmm3
1034 jmp L(loop_ashr_7)
1035
1036 .p2align 4
1037L(nibble_ashr_7):
1038 pcmpeqb %xmm3, %xmm0
1039 pmovmskb %xmm0, %esi
1040 test $0xff80, %esi
1041 jnz L(ashr_7_exittail)
1042
1043#ifdef USE_AS_STRNCMP
1044 cmp $9, %ebp
1045 jbe L(ashr_7_exittail)
1046#endif
1047 pxor %xmm0, %xmm0
1048 pxor %xmm0, %xmm0
1049 sub $0x1000, %edi
1050 jmp L(gobble_ashr_7)
1051
1052 .p2align 4
1053L(ashr_7_exittail):
1054 movdqa (%eax, %ecx), %xmm1
1055 psrldq $7, %xmm0
1056 psrldq $7, %xmm3
1057 jmp L(aftertail)
1058
1059/*
1060 * The following cases will be handled by ashr_8
1061 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1062 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1063 */
1064 .p2align 4
1065L(ashr_8):
1066 mov $0xffff, %esi
1067 pxor %xmm0, %xmm0
1068 movdqa (%edx), %xmm2
1069 movdqa (%eax), %xmm1
1070 pcmpeqb %xmm1, %xmm0
1071 pslldq $8, %xmm2
1072 pcmpeqb %xmm1, %xmm2
1073 psubb %xmm0, %xmm2
1074 pmovmskb %xmm2, %edi
1075 shr %cl, %esi
1076 shr %cl, %edi
1077 sub %edi, %esi
1078 lea -8(%ecx), %edi
1079 jnz L(less32bytes)
1080
1081 UPDATE_STRNCMP_COUNTER
1082
1083 movdqa (%edx), %xmm3
1084 pxor %xmm0, %xmm0
1085 mov $16, %ecx
1086 or $8, %ebx
1087 lea 8(%edx), %edi
1088 and $0xfff, %edi
1089 sub $0x1000, %edi
1090
1091 .p2align 4
1092L(loop_ashr_8):
1093 add $16, %edi
1094 jg L(nibble_ashr_8)
1095
1096L(gobble_ashr_8):
1097 movdqa (%eax, %ecx), %xmm1
1098 movdqa (%edx, %ecx), %xmm2
1099 movdqa %xmm2, %xmm4
1100
1101 palignr $8, %xmm3, %xmm2
1102
1103 pcmpeqb %xmm1, %xmm0
1104 pcmpeqb %xmm2, %xmm1
1105 psubb %xmm0, %xmm1
1106 pmovmskb %xmm1, %esi
1107 sub $0xffff, %esi
1108 jnz L(exit)
1109
1110#ifdef USE_AS_STRNCMP
1111 cmp $16, %ebp
1112 lea -16(%ebp), %ebp
1113 jbe L(more8byteseq)
1114#endif
1115 add $16, %ecx
1116 movdqa %xmm4, %xmm3
1117
1118 add $16, %edi
1119 jg L(nibble_ashr_8)
1120
1121 movdqa (%eax, %ecx), %xmm1
1122 movdqa (%edx, %ecx), %xmm2
1123 movdqa %xmm2, %xmm4
1124
1125 palignr $8, %xmm3, %xmm2
1126
1127 pcmpeqb %xmm1, %xmm0
1128 pcmpeqb %xmm2, %xmm1
1129 psubb %xmm0, %xmm1
1130 pmovmskb %xmm1, %esi
1131 sub $0xffff, %esi
1132 jnz L(exit)
1133
1134#ifdef USE_AS_STRNCMP
1135 cmp $16, %ebp
1136 lea -16(%ebp), %ebp
1137 jbe L(more8byteseq)
1138#endif
1139 add $16, %ecx
1140 movdqa %xmm4, %xmm3
1141 jmp L(loop_ashr_8)
1142
1143 .p2align 4
1144L(nibble_ashr_8):
1145 pcmpeqb %xmm3, %xmm0
1146 pmovmskb %xmm0, %esi
1147 test $0xff00, %esi
1148 jnz L(ashr_8_exittail)
1149
1150#ifdef USE_AS_STRNCMP
1151 cmp $8, %ebp
1152 jbe L(ashr_8_exittail)
1153#endif
1154 pxor %xmm0, %xmm0
1155 pxor %xmm0, %xmm0
1156 sub $0x1000, %edi
1157 jmp L(gobble_ashr_8)
1158
1159 .p2align 4
1160L(ashr_8_exittail):
1161 movdqa (%eax, %ecx), %xmm1
1162 psrldq $8, %xmm0
1163 psrldq $8, %xmm3
1164 jmp L(aftertail)
1165
1166/*
1167 * The following cases will be handled by ashr_9
1168 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1169 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1170 */
1171 .p2align 4
1172L(ashr_9):
1173 mov $0xffff, %esi
1174 pxor %xmm0, %xmm0
1175 movdqa (%edx), %xmm2
1176 movdqa (%eax), %xmm1
1177 pcmpeqb %xmm1, %xmm0
1178 pslldq $7, %xmm2
1179 pcmpeqb %xmm1, %xmm2
1180 psubb %xmm0, %xmm2
1181 pmovmskb %xmm2, %edi
1182 shr %cl, %esi
1183 shr %cl, %edi
1184 sub %edi, %esi
1185 lea -7(%ecx), %edi
1186 jnz L(less32bytes)
1187
1188 UPDATE_STRNCMP_COUNTER
1189
1190 movdqa (%edx), %xmm3
1191 pxor %xmm0, %xmm0
1192 mov $16, %ecx
1193 or $9, %ebx
1194 lea 9(%edx), %edi
1195 and $0xfff, %edi
1196 sub $0x1000, %edi
1197
1198 .p2align 4
1199L(loop_ashr_9):
1200 add $16, %edi
1201 jg L(nibble_ashr_9)
1202
1203L(gobble_ashr_9):
1204 movdqa (%eax, %ecx), %xmm1
1205 movdqa (%edx, %ecx), %xmm2
1206 movdqa %xmm2, %xmm4
1207
1208 palignr $9, %xmm3, %xmm2
1209
1210 pcmpeqb %xmm1, %xmm0
1211 pcmpeqb %xmm2, %xmm1
1212 psubb %xmm0, %xmm1
1213 pmovmskb %xmm1, %esi
1214 sub $0xffff, %esi
1215 jnz L(exit)
1216
1217#ifdef USE_AS_STRNCMP
1218 cmp $16, %ebp
1219 lea -16(%ebp), %ebp
1220 jbe L(more8byteseq)
1221#endif
1222 add $16, %ecx
1223 movdqa %xmm4, %xmm3
1224
1225 add $16, %edi
1226 jg L(nibble_ashr_9)
1227
1228 movdqa (%eax, %ecx), %xmm1
1229 movdqa (%edx, %ecx), %xmm2
1230 movdqa %xmm2, %xmm4
1231
1232 palignr $9, %xmm3, %xmm2
1233
1234 pcmpeqb %xmm1, %xmm0
1235 pcmpeqb %xmm2, %xmm1
1236 psubb %xmm0, %xmm1
1237 pmovmskb %xmm1, %esi
1238 sub $0xffff, %esi
1239 jnz L(exit)
1240
1241#ifdef USE_AS_STRNCMP
1242 cmp $16, %ebp
1243 lea -16(%ebp), %ebp
1244 jbe L(more8byteseq)
1245#endif
1246 add $16, %ecx
1247 movdqa %xmm4, %xmm3
1248 jmp L(loop_ashr_9)
1249
1250 .p2align 4
1251L(nibble_ashr_9):
1252 pcmpeqb %xmm3, %xmm0
1253 pmovmskb %xmm0, %esi
1254 test $0xfe00, %esi
1255 jnz L(ashr_9_exittail)
1256
1257#ifdef USE_AS_STRNCMP
1258 cmp $7, %ebp
1259 jbe L(ashr_9_exittail)
1260#endif
1261 pxor %xmm0, %xmm0
1262 sub $0x1000, %edi
1263 jmp L(gobble_ashr_9)
1264
1265 .p2align 4
1266L(ashr_9_exittail):
1267 movdqa (%eax, %ecx), %xmm1
1268 psrldq $9, %xmm0
1269 psrldq $9, %xmm3
1270 jmp L(aftertail)
1271
1272/*
1273 * The following cases will be handled by ashr_10
1274 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1275 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1276 */
1277 .p2align 4
1278L(ashr_10):
1279 mov $0xffff, %esi
1280 pxor %xmm0, %xmm0
1281 movdqa (%edx), %xmm2
1282 movdqa (%eax), %xmm1
1283 pcmpeqb %xmm1, %xmm0
1284 pslldq $6, %xmm2
1285 pcmpeqb %xmm1, %xmm2
1286 psubb %xmm0, %xmm2
1287 pmovmskb %xmm2, %edi
1288 shr %cl, %esi
1289 shr %cl, %edi
1290 sub %edi, %esi
1291 lea -6(%ecx), %edi
1292 jnz L(less32bytes)
1293
1294 UPDATE_STRNCMP_COUNTER
1295
1296 movdqa (%edx), %xmm3
1297 pxor %xmm0, %xmm0
1298 mov $16, %ecx
1299 or $10, %ebx
1300 lea 10(%edx), %edi
1301 and $0xfff, %edi
1302 sub $0x1000, %edi
1303
1304 .p2align 4
1305L(loop_ashr_10):
1306 add $16, %edi
1307 jg L(nibble_ashr_10)
1308
1309L(gobble_ashr_10):
1310 movdqa (%eax, %ecx), %xmm1
1311 movdqa (%edx, %ecx), %xmm2
1312 movdqa %xmm2, %xmm4
1313
1314 palignr $10, %xmm3, %xmm2
1315
1316 pcmpeqb %xmm1, %xmm0
1317 pcmpeqb %xmm2, %xmm1
1318 psubb %xmm0, %xmm1
1319 pmovmskb %xmm1, %esi
1320 sub $0xffff, %esi
1321 jnz L(exit)
1322
1323#ifdef USE_AS_STRNCMP
1324 cmp $16, %ebp
1325 lea -16(%ebp), %ebp
1326 jbe L(more8byteseq)
1327#endif
1328 add $16, %ecx
1329 movdqa %xmm4, %xmm3
1330
1331 add $16, %edi
1332 jg L(nibble_ashr_10)
1333
1334 movdqa (%eax, %ecx), %xmm1
1335 movdqa (%edx, %ecx), %xmm2
1336 movdqa %xmm2, %xmm4
1337
1338 palignr $10, %xmm3, %xmm2
1339
1340 pcmpeqb %xmm1, %xmm0
1341 pcmpeqb %xmm2, %xmm1
1342 psubb %xmm0, %xmm1
1343 pmovmskb %xmm1, %esi
1344 sub $0xffff, %esi
1345 jnz L(exit)
1346
1347#ifdef USE_AS_STRNCMP
1348 cmp $16, %ebp
1349 lea -16(%ebp), %ebp
1350 jbe L(more8byteseq)
1351#endif
1352 add $16, %ecx
1353 movdqa %xmm4, %xmm3
1354 jmp L(loop_ashr_10)
1355
1356 .p2align 4
1357L(nibble_ashr_10):
1358 pcmpeqb %xmm3, %xmm0
1359 pmovmskb %xmm0, %esi
1360 test $0xfc00, %esi
1361 jnz L(ashr_10_exittail)
1362
1363#ifdef USE_AS_STRNCMP
1364 cmp $6, %ebp
1365 jbe L(ashr_10_exittail)
1366#endif
1367 pxor %xmm0, %xmm0
1368 sub $0x1000, %edi
1369 jmp L(gobble_ashr_10)
1370
1371 .p2align 4
1372L(ashr_10_exittail):
1373 movdqa (%eax, %ecx), %xmm1
1374 psrldq $10, %xmm0
1375 psrldq $10, %xmm3
1376 jmp L(aftertail)
1377
1378/*
1379 * The following cases will be handled by ashr_11
1380 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1381 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1382 */
1383 .p2align 4
1384L(ashr_11):
1385 mov $0xffff, %esi
1386 pxor %xmm0, %xmm0
1387 movdqa (%edx), %xmm2
1388 movdqa (%eax), %xmm1
1389 pcmpeqb %xmm1, %xmm0
1390 pslldq $5, %xmm2
1391 pcmpeqb %xmm1, %xmm2
1392 psubb %xmm0, %xmm2
1393 pmovmskb %xmm2, %edi
1394 shr %cl, %esi
1395 shr %cl, %edi
1396 sub %edi, %esi
1397 lea -5(%ecx), %edi
1398 jnz L(less32bytes)
1399
1400 UPDATE_STRNCMP_COUNTER
1401
1402 movdqa (%edx), %xmm3
1403 pxor %xmm0, %xmm0
1404 mov $16, %ecx
1405 or $11, %ebx
1406 lea 11(%edx), %edi
1407 and $0xfff, %edi
1408 sub $0x1000, %edi
1409
1410 .p2align 4
1411L(loop_ashr_11):
1412 add $16, %edi
1413 jg L(nibble_ashr_11)
1414
1415L(gobble_ashr_11):
1416 movdqa (%eax, %ecx), %xmm1
1417 movdqa (%edx, %ecx), %xmm2
1418 movdqa %xmm2, %xmm4
1419
1420 palignr $11, %xmm3, %xmm2
1421
1422 pcmpeqb %xmm1, %xmm0
1423 pcmpeqb %xmm2, %xmm1
1424 psubb %xmm0, %xmm1
1425 pmovmskb %xmm1, %esi
1426 sub $0xffff, %esi
1427 jnz L(exit)
1428
1429#ifdef USE_AS_STRNCMP
1430 cmp $16, %ebp
1431 lea -16(%ebp), %ebp
1432 jbe L(more8byteseq)
1433#endif
1434 add $16, %ecx
1435 movdqa %xmm4, %xmm3
1436
1437 add $16, %edi
1438 jg L(nibble_ashr_11)
1439
1440 movdqa (%eax, %ecx), %xmm1
1441 movdqa (%edx, %ecx), %xmm2
1442 movdqa %xmm2, %xmm4
1443
1444 palignr $11, %xmm3, %xmm2
1445
1446 pcmpeqb %xmm1, %xmm0
1447 pcmpeqb %xmm2, %xmm1
1448 psubb %xmm0, %xmm1
1449 pmovmskb %xmm1, %esi
1450 sub $0xffff, %esi
1451 jnz L(exit)
1452
1453#ifdef USE_AS_STRNCMP
1454 cmp $16, %ebp
1455 lea -16(%ebp), %ebp
1456 jbe L(more8byteseq)
1457#endif
1458 add $16, %ecx
1459 movdqa %xmm4, %xmm3
1460 jmp L(loop_ashr_11)
1461
1462 .p2align 4
1463L(nibble_ashr_11):
1464 pcmpeqb %xmm3, %xmm0
1465 pmovmskb %xmm0, %esi
1466 test $0xf800, %esi
1467 jnz L(ashr_11_exittail)
1468
1469#ifdef USE_AS_STRNCMP
1470 cmp $5, %ebp
1471 jbe L(ashr_11_exittail)
1472#endif
1473 pxor %xmm0, %xmm0
1474 sub $0x1000, %edi
1475 jmp L(gobble_ashr_11)
1476
1477 .p2align 4
1478L(ashr_11_exittail):
1479 movdqa (%eax, %ecx), %xmm1
1480 psrldq $11, %xmm0
1481 psrldq $11, %xmm3
1482 jmp L(aftertail)
1483
1484/*
1485 * The following cases will be handled by ashr_12
1486 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1487 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1488 */
1489 .p2align 4
1490L(ashr_12):
1491 mov $0xffff, %esi
1492 pxor %xmm0, %xmm0
1493 movdqa (%edx), %xmm2
1494 movdqa (%eax), %xmm1
1495 pcmpeqb %xmm1, %xmm0
1496 pslldq $4, %xmm2
1497 pcmpeqb %xmm1, %xmm2
1498 psubb %xmm0, %xmm2
1499 pmovmskb %xmm2, %edi
1500 shr %cl, %esi
1501 shr %cl, %edi
1502 sub %edi, %esi
1503 lea -4(%ecx), %edi
1504 jnz L(less32bytes)
1505
1506 UPDATE_STRNCMP_COUNTER
1507
1508 movdqa (%edx), %xmm3
1509 pxor %xmm0, %xmm0
1510 mov $16, %ecx
1511 or $12, %ebx
1512 lea 12(%edx), %edi
1513 and $0xfff, %edi
1514 sub $0x1000, %edi
1515
1516 .p2align 4
1517L(loop_ashr_12):
1518 add $16, %edi
1519 jg L(nibble_ashr_12)
1520
1521L(gobble_ashr_12):
1522 movdqa (%eax, %ecx), %xmm1
1523 movdqa (%edx, %ecx), %xmm2
1524 movdqa %xmm2, %xmm4
1525
1526 palignr $12, %xmm3, %xmm2
1527
1528 pcmpeqb %xmm1, %xmm0
1529 pcmpeqb %xmm2, %xmm1
1530 psubb %xmm0, %xmm1
1531 pmovmskb %xmm1, %esi
1532 sub $0xffff, %esi
1533 jnz L(exit)
1534
Bruce Beare124a5422010-10-11 12:24:41 -07001535#ifdef USE_AS_STRNCMP
1536 cmp $16, %ebp
1537 lea -16(%ebp), %ebp
1538 jbe L(more8byteseq)
1539#endif
1540
Bruce Beare8ff1a272010-03-04 11:03:37 -08001541 add $16, %ecx
1542 movdqa %xmm4, %xmm3
1543
1544 add $16, %edi
1545 jg L(nibble_ashr_12)
1546
Bruce Beare8ff1a272010-03-04 11:03:37 -08001547 movdqa (%eax, %ecx), %xmm1
1548 movdqa (%edx, %ecx), %xmm2
1549 movdqa %xmm2, %xmm4
1550
1551 palignr $12, %xmm3, %xmm2
1552
1553 pcmpeqb %xmm1, %xmm0
1554 pcmpeqb %xmm2, %xmm1
1555 psubb %xmm0, %xmm1
1556 pmovmskb %xmm1, %esi
1557 sub $0xffff, %esi
1558 jnz L(exit)
1559
1560#ifdef USE_AS_STRNCMP
1561 cmp $16, %ebp
1562 lea -16(%ebp), %ebp
1563 jbe L(more8byteseq)
1564#endif
1565 add $16, %ecx
1566 movdqa %xmm4, %xmm3
1567 jmp L(loop_ashr_12)
1568
1569 .p2align 4
1570L(nibble_ashr_12):
1571 pcmpeqb %xmm3, %xmm0
1572 pmovmskb %xmm0, %esi
1573 test $0xf000, %esi
1574 jnz L(ashr_12_exittail)
1575
1576#ifdef USE_AS_STRNCMP
1577 cmp $4, %ebp
1578 jbe L(ashr_12_exittail)
1579#endif
1580 pxor %xmm0, %xmm0
1581 sub $0x1000, %edi
1582 jmp L(gobble_ashr_12)
1583
1584 .p2align 4
1585L(ashr_12_exittail):
1586 movdqa (%eax, %ecx), %xmm1
1587 psrldq $12, %xmm0
1588 psrldq $12, %xmm3
1589 jmp L(aftertail)
1590
1591/*
1592 * The following cases will be handled by ashr_13
1593 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1594 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1595 */
1596 .p2align 4
1597L(ashr_13):
1598 mov $0xffff, %esi
1599 pxor %xmm0, %xmm0
1600 movdqa (%edx), %xmm2
1601 movdqa (%eax), %xmm1
1602 pcmpeqb %xmm1, %xmm0
1603 pslldq $3, %xmm2
1604 pcmpeqb %xmm1, %xmm2
1605 psubb %xmm0, %xmm2
1606 pmovmskb %xmm2, %edi
1607 shr %cl, %esi
1608 shr %cl, %edi
1609 sub %edi, %esi
1610 lea -3(%ecx), %edi
1611 jnz L(less32bytes)
1612
1613 UPDATE_STRNCMP_COUNTER
1614
1615 movdqa (%edx), %xmm3
1616 pxor %xmm0, %xmm0
1617 mov $16, %ecx
1618 or $13, %ebx
1619 lea 13(%edx), %edi
1620 and $0xfff, %edi
1621 sub $0x1000, %edi
1622
1623 .p2align 4
1624L(loop_ashr_13):
1625 add $16, %edi
1626 jg L(nibble_ashr_13)
1627
1628L(gobble_ashr_13):
1629 movdqa (%eax, %ecx), %xmm1
1630 movdqa (%edx, %ecx), %xmm2
1631 movdqa %xmm2, %xmm4
1632
1633 palignr $13, %xmm3, %xmm2
1634
1635 pcmpeqb %xmm1, %xmm0
1636 pcmpeqb %xmm2, %xmm1
1637 psubb %xmm0, %xmm1
1638 pmovmskb %xmm1, %esi
1639 sub $0xffff, %esi
1640 jnz L(exit)
1641
1642#ifdef USE_AS_STRNCMP
1643 cmp $16, %ebp
1644 lea -16(%ebp), %ebp
1645 jbe L(more8byteseq)
1646#endif
1647 add $16, %ecx
1648 movdqa %xmm4, %xmm3
1649
1650 add $16, %edi
1651 jg L(nibble_ashr_13)
1652
1653 movdqa (%eax, %ecx), %xmm1
1654 movdqa (%edx, %ecx), %xmm2
1655 movdqa %xmm2, %xmm4
1656
1657 palignr $13, %xmm3, %xmm2
1658
1659 pcmpeqb %xmm1, %xmm0
1660 pcmpeqb %xmm2, %xmm1
1661 psubb %xmm0, %xmm1
1662 pmovmskb %xmm1, %esi
1663 sub $0xffff, %esi
1664 jnz L(exit)
1665
1666#ifdef USE_AS_STRNCMP
1667 cmp $16, %ebp
1668 lea -16(%ebp), %ebp
1669 jbe L(more8byteseq)
1670#endif
1671 add $16, %ecx
1672 movdqa %xmm4, %xmm3
1673 jmp L(loop_ashr_13)
1674
1675 .p2align 4
1676L(nibble_ashr_13):
1677 pcmpeqb %xmm3, %xmm0
1678 pmovmskb %xmm0, %esi
1679 test $0xe000, %esi
1680 jnz L(ashr_13_exittail)
1681
1682#ifdef USE_AS_STRNCMP
1683 cmp $3, %ebp
1684 jbe L(ashr_13_exittail)
1685#endif
1686 pxor %xmm0, %xmm0
1687 sub $0x1000, %edi
1688 jmp L(gobble_ashr_13)
1689
1690 .p2align 4
1691L(ashr_13_exittail):
1692 movdqa (%eax, %ecx), %xmm1
1693 psrldq $13, %xmm0
1694 psrldq $13, %xmm3
1695 jmp L(aftertail)
1696
1697/*
1698 * The following cases will be handled by ashr_14
1699 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1700 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1701 */
1702 .p2align 4
1703L(ashr_14):
1704 mov $0xffff, %esi
1705 pxor %xmm0, %xmm0
1706 movdqa (%edx), %xmm2
1707 movdqa (%eax), %xmm1
1708 pcmpeqb %xmm1, %xmm0
1709 pslldq $2, %xmm2
1710 pcmpeqb %xmm1, %xmm2
1711 psubb %xmm0, %xmm2
1712 pmovmskb %xmm2, %edi
1713 shr %cl, %esi
1714 shr %cl, %edi
1715 sub %edi, %esi
1716 lea -2(%ecx), %edi
1717 jnz L(less32bytes)
1718
1719 UPDATE_STRNCMP_COUNTER
1720
1721 movdqa (%edx), %xmm3
1722 pxor %xmm0, %xmm0
1723 mov $16, %ecx
1724 or $14, %ebx
1725 lea 14(%edx), %edi
1726 and $0xfff, %edi
1727 sub $0x1000, %edi
1728
1729 .p2align 4
1730L(loop_ashr_14):
1731 add $16, %edi
1732 jg L(nibble_ashr_14)
1733
1734L(gobble_ashr_14):
1735 movdqa (%eax, %ecx), %xmm1
1736 movdqa (%edx, %ecx), %xmm2
1737 movdqa %xmm2, %xmm4
1738
1739 palignr $14, %xmm3, %xmm2
1740
1741 pcmpeqb %xmm1, %xmm0
1742 pcmpeqb %xmm2, %xmm1
1743 psubb %xmm0, %xmm1
1744 pmovmskb %xmm1, %esi
1745 sub $0xffff, %esi
1746 jnz L(exit)
1747
1748#ifdef USE_AS_STRNCMP
1749 cmp $16, %ebp
1750 lea -16(%ebp), %ebp
1751 jbe L(more8byteseq)
1752#endif
1753 add $16, %ecx
1754 movdqa %xmm4, %xmm3
1755
1756 add $16, %edi
1757 jg L(nibble_ashr_14)
1758
1759 movdqa (%eax, %ecx), %xmm1
1760 movdqa (%edx, %ecx), %xmm2
1761 movdqa %xmm2, %xmm4
1762
1763 palignr $14, %xmm3, %xmm2
1764
1765 pcmpeqb %xmm1, %xmm0
1766 pcmpeqb %xmm2, %xmm1
1767 psubb %xmm0, %xmm1
1768 pmovmskb %xmm1, %esi
1769 sub $0xffff, %esi
1770 jnz L(exit)
1771
1772#ifdef USE_AS_STRNCMP
1773 cmp $16, %ebp
1774 lea -16(%ebp), %ebp
1775 jbe L(more8byteseq)
1776#endif
1777 add $16, %ecx
1778 movdqa %xmm4, %xmm3
1779 jmp L(loop_ashr_14)
1780
1781 .p2align 4
1782L(nibble_ashr_14):
1783 pcmpeqb %xmm3, %xmm0
1784 pmovmskb %xmm0, %esi
1785 test $0xc000, %esi
1786 jnz L(ashr_14_exittail)
1787
1788#ifdef USE_AS_STRNCMP
1789 cmp $2, %ebp
1790 jbe L(ashr_14_exittail)
1791#endif
1792 pxor %xmm0, %xmm0
1793 sub $0x1000, %edi
1794 jmp L(gobble_ashr_14)
1795
1796 .p2align 4
1797L(ashr_14_exittail):
1798 movdqa (%eax, %ecx), %xmm1
1799 psrldq $14, %xmm0
1800 psrldq $14, %xmm3
1801 jmp L(aftertail)
1802
1803/*
1804 * The following cases will be handled by ashr_14
1805 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1806 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1807 */
1808
1809 .p2align 4
1810L(ashr_15):
1811 mov $0xffff, %esi
1812 pxor %xmm0, %xmm0
1813 movdqa (%edx), %xmm2
1814 movdqa (%eax), %xmm1
1815 pcmpeqb %xmm1, %xmm0
1816 pslldq $1, %xmm2
1817 pcmpeqb %xmm1, %xmm2
1818 psubb %xmm0, %xmm2
1819 pmovmskb %xmm2, %edi
1820 shr %cl, %esi
1821 shr %cl, %edi
1822 sub %edi, %esi
1823 lea -1(%ecx), %edi
1824 jnz L(less32bytes)
1825
1826 UPDATE_STRNCMP_COUNTER
1827
1828 movdqa (%edx), %xmm3
1829 pxor %xmm0, %xmm0
1830 mov $16, %ecx
1831 or $15, %ebx
1832 lea 15(%edx), %edi
1833 and $0xfff, %edi
1834 sub $0x1000, %edi
1835
1836 .p2align 4
1837L(loop_ashr_15):
1838 add $16, %edi
1839 jg L(nibble_ashr_15)
1840
1841L(gobble_ashr_15):
1842 movdqa (%eax, %ecx), %xmm1
1843 movdqa (%edx, %ecx), %xmm2
1844 movdqa %xmm2, %xmm4
1845
1846 palignr $15, %xmm3, %xmm2
1847
1848 pcmpeqb %xmm1, %xmm0
1849 pcmpeqb %xmm2, %xmm1
1850 psubb %xmm0, %xmm1
1851 pmovmskb %xmm1, %esi
1852 sub $0xffff, %esi
1853 jnz L(exit)
1854
1855#ifdef USE_AS_STRNCMP
1856 cmp $16, %ebp
1857 lea -16(%ebp), %ebp
1858 jbe L(more8byteseq)
1859#endif
1860 add $16, %ecx
1861 movdqa %xmm4, %xmm3
1862
1863 add $16, %edi
1864 jg L(nibble_ashr_15)
1865
1866 movdqa (%eax, %ecx), %xmm1
1867 movdqa (%edx, %ecx), %xmm2
1868 movdqa %xmm2, %xmm4
1869
1870 palignr $15, %xmm3, %xmm2
1871
1872 pcmpeqb %xmm1, %xmm0
1873 pcmpeqb %xmm2, %xmm1
1874 psubb %xmm0, %xmm1
1875 pmovmskb %xmm1, %esi
1876 sub $0xffff, %esi
1877 jnz L(exit)
1878
1879#ifdef USE_AS_STRNCMP
1880 cmp $16, %ebp
1881 lea -16(%ebp), %ebp
1882 jbe L(more8byteseq)
1883#endif
1884 add $16, %ecx
1885 movdqa %xmm4, %xmm3
1886 jmp L(loop_ashr_15)
1887
1888 .p2align 4
1889L(nibble_ashr_15):
1890 pcmpeqb %xmm3, %xmm0
1891 pmovmskb %xmm0, %esi
1892 test $0x8000, %esi
1893 jnz L(ashr_15_exittail)
1894
1895#ifdef USE_AS_STRNCMP
1896 cmp $1, %ebp
1897 jbe L(ashr_15_exittail)
1898#endif
1899 pxor %xmm0, %xmm0
1900 sub $0x1000, %edi
1901 jmp L(gobble_ashr_15)
1902
1903 .p2align 4
1904L(ashr_15_exittail):
1905 movdqa (%eax, %ecx), %xmm1
1906 psrldq $15, %xmm0
1907 psrldq $15, %xmm3
1908 jmp L(aftertail)
1909
1910 .p2align 4
1911L(aftertail):
1912 pcmpeqb %xmm3, %xmm1
1913 psubb %xmm0, %xmm1
1914 pmovmskb %xmm1, %esi
1915 not %esi
1916L(exit):
1917 mov %ebx, %edi
1918 and $0x1f, %edi
1919 lea -16(%edi, %ecx), %edi
1920L(less32bytes):
1921 add %edi, %edx
1922 add %ecx, %eax
1923 test $0x20, %ebx
1924 jz L(ret2)
1925 xchg %eax, %edx
1926
1927 .p2align 4
1928L(ret2):
1929 mov %esi, %ecx
1930 POP (%esi)
1931 POP (%edi)
1932 POP (%ebx)
1933L(less16bytes):
1934 test %cl, %cl
1935 jz L(2next_8_bytes)
1936
1937 test $0x01, %cl
1938 jnz L(Byte0)
1939
1940 test $0x02, %cl
1941 jnz L(Byte1)
1942
1943 test $0x04, %cl
1944 jnz L(Byte2)
1945
1946 test $0x08, %cl
1947 jnz L(Byte3)
1948
1949 test $0x10, %cl
1950 jnz L(Byte4)
1951
1952 test $0x20, %cl
1953 jnz L(Byte5)
1954
1955 test $0x40, %cl
1956 jnz L(Byte6)
1957#ifdef USE_AS_STRNCMP
1958 cmp $7, %ebp
1959 jbe L(eq)
1960#endif
1961
1962 movzx 7(%eax), %ecx
1963 movzx 7(%edx), %eax
1964
1965 sub %ecx, %eax
1966 RETURN
1967
1968 .p2align 4
1969L(Byte0):
1970#ifdef USE_AS_STRNCMP
1971 cmp $0, %ebp
1972 jbe L(eq)
1973#endif
1974 movzx (%eax), %ecx
1975 movzx (%edx), %eax
1976
1977 sub %ecx, %eax
1978 RETURN
1979
1980 .p2align 4
1981L(Byte1):
1982#ifdef USE_AS_STRNCMP
1983 cmp $1, %ebp
1984 jbe L(eq)
1985#endif
1986 movzx 1(%eax), %ecx
1987 movzx 1(%edx), %eax
1988
1989 sub %ecx, %eax
1990 RETURN
1991
1992 .p2align 4
1993L(Byte2):
1994#ifdef USE_AS_STRNCMP
1995 cmp $2, %ebp
1996 jbe L(eq)
1997#endif
1998 movzx 2(%eax), %ecx
1999 movzx 2(%edx), %eax
2000
2001 sub %ecx, %eax
2002 RETURN
2003
2004 .p2align 4
2005L(Byte3):
2006#ifdef USE_AS_STRNCMP
2007 cmp $3, %ebp
2008 jbe L(eq)
2009#endif
2010 movzx 3(%eax), %ecx
2011 movzx 3(%edx), %eax
2012
2013 sub %ecx, %eax
2014 RETURN
2015
2016 .p2align 4
2017L(Byte4):
2018#ifdef USE_AS_STRNCMP
2019 cmp $4, %ebp
2020 jbe L(eq)
2021#endif
2022 movzx 4(%eax), %ecx
2023 movzx 4(%edx), %eax
2024
2025 sub %ecx, %eax
2026 RETURN
2027
2028 .p2align 4
2029L(Byte5):
2030#ifdef USE_AS_STRNCMP
2031 cmp $5, %ebp
2032 jbe L(eq)
2033#endif
2034 movzx 5(%eax), %ecx
2035 movzx 5(%edx), %eax
2036
2037 sub %ecx, %eax
2038 RETURN
2039
2040 .p2align 4
2041L(Byte6):
2042#ifdef USE_AS_STRNCMP
2043 cmp $6, %ebp
2044 jbe L(eq)
2045#endif
2046 movzx 6(%eax), %ecx
2047 movzx 6(%edx), %eax
2048
2049 sub %ecx, %eax
2050 RETURN
2051
2052 .p2align 4
2053L(2next_8_bytes):
2054 add $8, %eax
2055 add $8, %edx
2056#ifdef USE_AS_STRNCMP
2057 cmp $8, %ebp
2058 lea -8(%ebp), %ebp
2059 jbe L(eq)
2060#endif
2061
2062 test $0x01, %ch
2063 jnz L(Byte0)
2064
2065 test $0x02, %ch
2066 jnz L(Byte1)
2067
2068 test $0x04, %ch
2069 jnz L(Byte2)
2070
2071 test $0x08, %ch
2072 jnz L(Byte3)
2073
2074 test $0x10, %ch
2075 jnz L(Byte4)
2076
2077 test $0x20, %ch
2078 jnz L(Byte5)
2079
2080 test $0x40, %ch
2081 jnz L(Byte6)
2082
2083#ifdef USE_AS_STRNCMP
2084 cmp $7, %ebp
2085 jbe L(eq)
2086#endif
2087 movzx 7(%eax), %ecx
2088 movzx 7(%edx), %eax
2089
2090 sub %ecx, %eax
2091 RETURN
2092
2093 .p2align 4
2094L(neq):
2095 mov $1, %eax
2096 ja L(neq_bigger)
2097 neg %eax
2098L(neq_bigger):
2099 RETURN
2100
2101#ifdef USE_AS_STRNCMP
Bruce Beare124a5422010-10-11 12:24:41 -07002102 cfi_restore_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08002103 .p2align 4
2104L(more8byteseq):
2105 POP (%esi)
2106 POP (%edi)
2107 POP (%ebx)
2108#endif
2109
2110L(eq):
2111
2112#ifdef USE_AS_STRNCMP
2113 POP (%ebp)
2114#endif
2115 xorl %eax, %eax
2116 ret
2117
2118#ifdef USE_AS_STRNCMP
2119 CFI_PUSH (%ebp)
2120
2121 .p2align 4
2122L(less16bytes_sncmp):
2123 test %ebp, %ebp
2124 jz L(eq)
2125
2126 movzbl (%eax), %ecx
2127 cmpb %cl, (%edx)
2128 jne L(neq)
2129 test %cl, %cl
2130 je L(eq)
2131
2132 cmp $1, %ebp
2133 je L(eq)
2134
2135 movzbl 1(%eax), %ecx
2136 cmpb %cl, 1(%edx)
2137 jne L(neq)
2138 test %cl, %cl
2139 je L(eq)
2140
2141 cmp $2, %ebp
2142 je L(eq)
2143
2144 movzbl 2(%eax), %ecx
2145 cmpb %cl, 2(%edx)
2146 jne L(neq)
2147 test %cl, %cl
2148 je L(eq)
2149
2150 cmp $3, %ebp
2151 je L(eq)
2152
2153 movzbl 3(%eax), %ecx
2154 cmpb %cl, 3(%edx)
2155 jne L(neq)
2156 test %cl, %cl
2157 je L(eq)
2158
2159 cmp $4, %ebp
2160 je L(eq)
2161
2162 movzbl 4(%eax), %ecx
2163 cmpb %cl, 4(%edx)
2164 jne L(neq)
2165 test %cl, %cl
2166 je L(eq)
2167
2168 cmp $5, %ebp
2169 je L(eq)
2170
2171 movzbl 5(%eax), %ecx
2172 cmpb %cl, 5(%edx)
2173 jne L(neq)
2174 test %cl, %cl
2175 je L(eq)
2176
2177 cmp $6, %ebp
2178 je L(eq)
2179
2180 movzbl 6(%eax), %ecx
2181 cmpb %cl, 6(%edx)
2182 jne L(neq)
2183 test %cl, %cl
2184 je L(eq)
2185
2186 cmp $7, %ebp
2187 je L(eq)
2188
2189 movzbl 7(%eax), %ecx
2190 cmpb %cl, 7(%edx)
2191 jne L(neq)
2192 test %cl, %cl
2193 je L(eq)
2194
2195
2196 cmp $8, %ebp
2197 je L(eq)
2198
2199 movzbl 8(%eax), %ecx
2200 cmpb %cl, 8(%edx)
2201 jne L(neq)
2202 test %cl, %cl
2203 je L(eq)
2204
2205 cmp $9, %ebp
2206 je L(eq)
2207
2208 movzbl 9(%eax), %ecx
2209 cmpb %cl, 9(%edx)
2210 jne L(neq)
2211 test %cl, %cl
2212 je L(eq)
2213
2214 cmp $10, %ebp
2215 je L(eq)
2216
2217 movzbl 10(%eax), %ecx
2218 cmpb %cl, 10(%edx)
2219 jne L(neq)
2220 test %cl, %cl
2221 je L(eq)
2222
2223 cmp $11, %ebp
2224 je L(eq)
2225
2226 movzbl 11(%eax), %ecx
2227 cmpb %cl, 11(%edx)
2228 jne L(neq)
2229 test %cl, %cl
2230 je L(eq)
2231
2232
2233 cmp $12, %ebp
2234 je L(eq)
2235
2236 movzbl 12(%eax), %ecx
2237 cmpb %cl, 12(%edx)
2238 jne L(neq)
2239 test %cl, %cl
2240 je L(eq)
2241
2242 cmp $13, %ebp
2243 je L(eq)
2244
2245 movzbl 13(%eax), %ecx
2246 cmpb %cl, 13(%edx)
2247 jne L(neq)
2248 test %cl, %cl
2249 je L(eq)
2250
2251 cmp $14, %ebp
2252 je L(eq)
2253
2254 movzbl 14(%eax), %ecx
2255 cmpb %cl, 14(%edx)
2256 jne L(neq)
2257 test %cl, %cl
2258 je L(eq)
2259
2260 cmp $15, %ebp
2261 je L(eq)
2262
2263 movzbl 15(%eax), %ecx
2264 cmpb %cl, 15(%edx)
2265 jne L(neq)
2266 test %cl, %cl
2267 je L(eq)
2268
2269 POP (%ebp)
2270 xor %eax, %eax
2271 ret
2272#endif
2273
2274END (ssse3_strcmp_latest)