blob: cfb2e9ff2b5384e482540b3d6708d2e383590355 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore (reg)
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60name: \
61 cfi_startproc
62#endif
63
64#ifndef END
65# define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68#endif
69
70#define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#ifndef USE_AS_STRNCMP
82# define STR1 4
83# define STR2 STR1+4
84# define RETURN ret
85
86# define UPDATE_STRNCMP_COUNTER
87#else
88# define STR1 8
89# define STR2 STR1+4
90# define CNT STR2+4
91# define RETURN POP (%ebp); ret; CFI_PUSH (%ebp)
92
93# define UPDATE_STRNCMP_COUNTER \
94 /* calculate left number to compare */ \
95 mov $16, %esi; \
96 sub %ecx, %esi; \
97 cmp %esi, %ebp; \
98 jbe L(more8byteseq); \
99 sub %esi, %ebp
100#endif
101
102 .section .text.ssse3,"ax",@progbits
103ENTRY (ssse3_strcmp_latest)
104#ifdef USE_AS_STRNCMP
105 PUSH (%ebp)
106#endif
107 movl STR1(%esp), %edx
108 movl STR2(%esp), %eax
109#ifdef USE_AS_STRNCMP
110 movl CNT(%esp), %ebp
111 cmp $16, %ebp
112 jb L(less16bytes_sncmp)
113 jmp L(more16bytes)
114#endif
115
116 movzbl (%eax), %ecx
117 cmpb %cl, (%edx)
118 jne L(neq)
119 cmpl $0, %ecx
120 je L(eq)
121
122 movzbl 1(%eax), %ecx
123 cmpb %cl, 1(%edx)
124 jne L(neq)
125 cmpl $0, %ecx
126 je L(eq)
127
128 movzbl 2(%eax), %ecx
129 cmpb %cl, 2(%edx)
130 jne L(neq)
131 cmpl $0, %ecx
132 je L(eq)
133
134 movzbl 3(%eax), %ecx
135 cmpb %cl, 3(%edx)
136 jne L(neq)
137 cmpl $0, %ecx
138 je L(eq)
139
140 movzbl 4(%eax), %ecx
141 cmpb %cl, 4(%edx)
142 jne L(neq)
143 cmpl $0, %ecx
144 je L(eq)
145
146 movzbl 5(%eax), %ecx
147 cmpb %cl, 5(%edx)
148 jne L(neq)
149 cmpl $0, %ecx
150 je L(eq)
151
152 movzbl 6(%eax), %ecx
153 cmpb %cl, 6(%edx)
154 jne L(neq)
155 cmpl $0, %ecx
156 je L(eq)
157
158 movzbl 7(%eax), %ecx
159 cmpb %cl, 7(%edx)
160 jne L(neq)
161 cmpl $0, %ecx
162 je L(eq)
163
164 add $8, %edx
165 add $8, %eax
166#ifdef USE_AS_STRNCMP
167 cmp $8, %ebp
168 lea -8(%ebp), %ebp
169 je L(eq)
170L(more16bytes):
171#endif
172 movl %edx, %ecx
173 and $0xfff, %ecx
174 cmp $0xff0, %ecx
175 ja L(crosspage)
176 mov %eax, %ecx
177 and $0xfff, %ecx
178 cmp $0xff0, %ecx
179 ja L(crosspage)
180 pxor %xmm0, %xmm0
181 movlpd (%eax), %xmm1
182 movlpd (%edx), %xmm2
183 movhpd 8(%eax), %xmm1
184 movhpd 8(%edx), %xmm2
185 pcmpeqb %xmm1, %xmm0
186 pcmpeqb %xmm2, %xmm1
187 psubb %xmm0, %xmm1
188 pmovmskb %xmm1, %ecx
189 sub $0xffff, %ecx
190 jnz L(less16bytes)
191#ifdef USE_AS_STRNCMP
192 cmp $16, %ebp
193 lea -16(%ebp), %ebp
194 jbe L(eq)
195#endif
196 add $16, %eax
197 add $16, %edx
198
199L(crosspage):
200
201 PUSH (%ebx)
202 PUSH (%edi)
203 PUSH (%esi)
204
205 movl %edx, %edi
206 movl %eax, %ecx
207 and $0xf, %ecx
208 and $0xf, %edi
209 xor %ecx, %eax
210 xor %edi, %edx
211 xor %ebx, %ebx
212 cmp %edi, %ecx
213 je L(ashr_0)
214 ja L(bigger)
215 or $0x20, %ebx
216 xchg %edx, %eax
217 xchg %ecx, %edi
218L(bigger):
219 lea 15(%edi), %edi
220 sub %ecx, %edi
221 cmp $8, %edi
222 jle L(ashr_less_8)
223 cmp $14, %edi
224 je L(ashr_15)
225 cmp $13, %edi
226 je L(ashr_14)
227 cmp $12, %edi
228 je L(ashr_13)
229 cmp $11, %edi
230 je L(ashr_12)
231 cmp $10, %edi
232 je L(ashr_11)
233 cmp $9, %edi
234 je L(ashr_10)
235L(ashr_less_8):
236 je L(ashr_9)
237 cmp $7, %edi
238 je L(ashr_8)
239 cmp $6, %edi
240 je L(ashr_7)
241 cmp $5, %edi
242 je L(ashr_6)
243 cmp $4, %edi
244 je L(ashr_5)
245 cmp $3, %edi
246 je L(ashr_4)
247 cmp $2, %edi
248 je L(ashr_3)
249 cmp $1, %edi
250 je L(ashr_2)
251 cmp $0, %edi
252 je L(ashr_1)
253
254/*
255 * The following cases will be handled by ashr_0
256 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
257 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
258 */
259 .p2align 4
260L(ashr_0):
261 mov $0xffff, %esi
262 movdqa (%eax), %xmm1
263 pxor %xmm0, %xmm0
264 pcmpeqb %xmm1, %xmm0
265 pcmpeqb (%edx), %xmm1
266 psubb %xmm0, %xmm1
267 pmovmskb %xmm1, %edi
268 shr %cl, %esi
269 shr %cl, %edi
270 sub %edi, %esi
271 mov %ecx, %edi
272 jne L(less32bytes)
273 UPDATE_STRNCMP_COUNTER
274 mov $0x10, %ebx
275 mov $0x10, %ecx
276 pxor %xmm0, %xmm0
277 .p2align 4
278L(loop_ashr_0):
279 movdqa (%eax, %ecx), %xmm1
280 movdqa (%edx, %ecx), %xmm2
281
282 pcmpeqb %xmm1, %xmm0
283 pcmpeqb %xmm2, %xmm1
284 psubb %xmm0, %xmm1
285 pmovmskb %xmm1, %esi
286 sub $0xffff, %esi
287 jnz L(exit)
288#ifdef USE_AS_STRNCMP
289 cmp $16, %ebp
290 lea -16(%ebp), %ebp
291 jbe L(more8byteseq)
292#endif
293 add $16, %ecx
294 jmp L(loop_ashr_0)
295
296/*
297 * The following cases will be handled by ashr_1
298 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
299 * n(15) n -15 0(15 +(n-15) - n) ashr_1
300 */
301 .p2align 4
302L(ashr_1):
303 mov $0xffff, %esi
304 pxor %xmm0, %xmm0
305 movdqa (%edx), %xmm2
306 movdqa (%eax), %xmm1
307 pcmpeqb %xmm1, %xmm0
308 pslldq $15, %xmm2
309 pcmpeqb %xmm1, %xmm2
310 psubb %xmm0, %xmm2
311 pmovmskb %xmm2, %edi
312 shr %cl, %esi
313 shr %cl, %edi
314 sub %edi, %esi
315 lea -15(%ecx), %edi
316 jnz L(less32bytes)
317
318 UPDATE_STRNCMP_COUNTER
319
320 movdqa (%edx), %xmm3
321 pxor %xmm0, %xmm0
322 mov $16, %ecx
323 or $1, %ebx
324 lea 1(%edx), %edi
325 and $0xfff, %edi
326 sub $0x1000, %edi
327
328 .p2align 4
329L(loop_ashr_1):
330 add $16, %edi
331 jg L(nibble_ashr_1)
332
333L(gobble_ashr_1):
334 movdqa (%eax, %ecx), %xmm1
335 movdqa (%edx, %ecx), %xmm2
336 movdqa %xmm2, %xmm4
337
338 palignr $1, %xmm3, %xmm2
339
340 pcmpeqb %xmm1, %xmm0
341 pcmpeqb %xmm2, %xmm1
342 psubb %xmm0, %xmm1
343 pmovmskb %xmm1, %esi
344 sub $0xffff, %esi
345 jnz L(exit)
346#ifdef USE_AS_STRNCMP
347 cmp $16, %ebp
348 lea -16(%ebp), %ebp
349 jbe L(more8byteseq)
350#endif
351
352 add $16, %ecx
353 movdqa %xmm4, %xmm3
354
355 add $16, %edi
356 jg L(nibble_ashr_1)
357
358 movdqa (%eax, %ecx), %xmm1
359 movdqa (%edx, %ecx), %xmm2
360 movdqa %xmm2, %xmm4
361
362 palignr $1, %xmm3, %xmm2
363
364 pcmpeqb %xmm1, %xmm0
365 pcmpeqb %xmm2, %xmm1
366 psubb %xmm0, %xmm1
367 pmovmskb %xmm1, %esi
368 sub $0xffff, %esi
369 jnz L(exit)
370
371#ifdef USE_AS_STRNCMP
372 cmp $16, %ebp
373 lea -16(%ebp), %ebp
374 jbe L(more8byteseq)
375#endif
376 add $16, %ecx
377 movdqa %xmm4, %xmm3
378 jmp L(loop_ashr_1)
379
380 .p2align 4
381L(nibble_ashr_1):
382 pcmpeqb %xmm3, %xmm0
383 pmovmskb %xmm0, %esi
384 test $0xfffe, %esi
385 jnz L(ashr_1_exittail)
386
387#ifdef USE_AS_STRNCMP
388 cmp $15, %ebp
389 jbe L(ashr_1_exittail)
390#endif
391 pxor %xmm0, %xmm0
392 sub $0x1000, %edi
393 jmp L(gobble_ashr_1)
394
395 .p2align 4
396L(ashr_1_exittail):
397 movdqa (%eax, %ecx), %xmm1
398 psrldq $1, %xmm0
399 psrldq $1, %xmm3
400 jmp L(aftertail)
401
402/*
403 * The following cases will be handled by ashr_2
404 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
405 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
406 */
407 .p2align 4
408L(ashr_2):
409 mov $0xffff, %esi
410 pxor %xmm0, %xmm0
411 movdqa (%edx), %xmm2
412 movdqa (%eax), %xmm1
413 pcmpeqb %xmm1, %xmm0
414 pslldq $14, %xmm2
415 pcmpeqb %xmm1, %xmm2
416 psubb %xmm0, %xmm2
417 pmovmskb %xmm2, %edi
418 shr %cl, %esi
419 shr %cl, %edi
420 sub %edi, %esi
421 lea -14(%ecx), %edi
422 jnz L(less32bytes)
423
424 UPDATE_STRNCMP_COUNTER
425
426 movdqa (%edx), %xmm3
427 pxor %xmm0, %xmm0
428 mov $16, %ecx
429 or $2, %ebx
430 lea 2(%edx), %edi
431 and $0xfff, %edi
432 sub $0x1000, %edi
433
434 .p2align 4
435L(loop_ashr_2):
436 add $16, %edi
437 jg L(nibble_ashr_2)
438
439L(gobble_ashr_2):
440 movdqa (%eax, %ecx), %xmm1
441 movdqa (%edx, %ecx), %xmm2
442 movdqa %xmm2, %xmm4
443
444 palignr $2, %xmm3, %xmm2
445
446 pcmpeqb %xmm1, %xmm0
447 pcmpeqb %xmm2, %xmm1
448 psubb %xmm0, %xmm1
449 pmovmskb %xmm1, %esi
450 sub $0xffff, %esi
451 jnz L(exit)
452
453#ifdef USE_AS_STRNCMP
454 cmp $16, %ebp
455 lea -16(%ebp), %ebp
456 jbe L(more8byteseq)
457#endif
458 add $16, %ecx
459 movdqa %xmm4, %xmm3
460
461 add $16, %edi
462 jg L(nibble_ashr_2)
463
464 movdqa (%eax, %ecx), %xmm1
465 movdqa (%edx, %ecx), %xmm2
466 movdqa %xmm2, %xmm4
467
468 palignr $2, %xmm3, %xmm2
469
470 pcmpeqb %xmm1, %xmm0
471 pcmpeqb %xmm2, %xmm1
472 psubb %xmm0, %xmm1
473 pmovmskb %xmm1, %esi
474 sub $0xffff, %esi
475 jnz L(exit)
476
477#ifdef USE_AS_STRNCMP
478 cmp $16, %ebp
479 lea -16(%ebp), %ebp
480 jbe L(more8byteseq)
481#endif
482 add $16, %ecx
483 movdqa %xmm4, %xmm3
484 jmp L(loop_ashr_2)
485
486 .p2align 4
487L(nibble_ashr_2):
488 pcmpeqb %xmm3, %xmm0
489 pmovmskb %xmm0, %esi
490 test $0xfffc, %esi
491 jnz L(ashr_2_exittail)
492
493#ifdef USE_AS_STRNCMP
494 cmp $14, %ebp
495 jbe L(ashr_2_exittail)
496#endif
497
498 pxor %xmm0, %xmm0
499 sub $0x1000, %edi
500 jmp L(gobble_ashr_2)
501
502 .p2align 4
503L(ashr_2_exittail):
504 movdqa (%eax, %ecx), %xmm1
505 psrldq $2, %xmm0
506 psrldq $2, %xmm3
507 jmp L(aftertail)
508
509/*
510 * The following cases will be handled by ashr_3
511 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
512 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
513 */
514 .p2align 4
515L(ashr_3):
516 mov $0xffff, %esi
517 pxor %xmm0, %xmm0
518 movdqa (%edx), %xmm2
519 movdqa (%eax), %xmm1
520 pcmpeqb %xmm1, %xmm0
521 pslldq $13, %xmm2
522 pcmpeqb %xmm1, %xmm2
523 psubb %xmm0, %xmm2
524 pmovmskb %xmm2, %edi
525 shr %cl, %esi
526 shr %cl, %edi
527 sub %edi, %esi
528 lea -13(%ecx), %edi
529 jnz L(less32bytes)
530
531 UPDATE_STRNCMP_COUNTER
532
533 movdqa (%edx), %xmm3
534 pxor %xmm0, %xmm0
535 mov $16, %ecx
536 or $3, %ebx
537 lea 3(%edx), %edi
538 and $0xfff, %edi
539 sub $0x1000, %edi
540
541 .p2align 4
542L(loop_ashr_3):
543 add $16, %edi
544 jg L(nibble_ashr_3)
545
546L(gobble_ashr_3):
547 movdqa (%eax, %ecx), %xmm1
548 movdqa (%edx, %ecx), %xmm2
549 movdqa %xmm2, %xmm4
550
551 palignr $3, %xmm3, %xmm2
552
553 pcmpeqb %xmm1, %xmm0
554 pcmpeqb %xmm2, %xmm1
555 psubb %xmm0, %xmm1
556 pmovmskb %xmm1, %esi
557 sub $0xffff, %esi
558 jnz L(exit)
559
560#ifdef USE_AS_STRNCMP
561 cmp $16, %ebp
562 lea -16(%ebp), %ebp
563 jbe L(more8byteseq)
564#endif
565 add $16, %ecx
566 movdqa %xmm4, %xmm3
567
568 add $16, %edi
569 jg L(nibble_ashr_3)
570
571 movdqa (%eax, %ecx), %xmm1
572 movdqa (%edx, %ecx), %xmm2
573 movdqa %xmm2, %xmm4
574
575 palignr $3, %xmm3, %xmm2
576
577 pcmpeqb %xmm1, %xmm0
578 pcmpeqb %xmm2, %xmm1
579 psubb %xmm0, %xmm1
580 pmovmskb %xmm1, %esi
581 sub $0xffff, %esi
582 jnz L(exit)
583
584#ifdef USE_AS_STRNCMP
585 cmp $16, %ebp
586 lea -16(%ebp), %ebp
587 jbe L(more8byteseq)
588#endif
589 add $16, %ecx
590 movdqa %xmm4, %xmm3
591 jmp L(loop_ashr_3)
592
593 .p2align 4
594L(nibble_ashr_3):
595 pcmpeqb %xmm3, %xmm0
596 pmovmskb %xmm0, %esi
597 test $0xfff8, %esi
598 jnz L(ashr_3_exittail)
599
600#ifdef USE_AS_STRNCMP
601 cmp $13, %ebp
602 jbe L(ashr_3_exittail)
603#endif
604 pxor %xmm0, %xmm0
605 sub $0x1000, %edi
606 jmp L(gobble_ashr_3)
607
608 .p2align 4
609L(ashr_3_exittail):
610 movdqa (%eax, %ecx), %xmm1
611 psrldq $3, %xmm0
612 psrldq $3, %xmm3
613 jmp L(aftertail)
614
615/*
616 * The following cases will be handled by ashr_4
617 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
618 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
619 */
620 .p2align 4
621L(ashr_4):
622 mov $0xffff, %esi
623 pxor %xmm0, %xmm0
624 movdqa (%edx), %xmm2
625 movdqa (%eax), %xmm1
626 pcmpeqb %xmm1, %xmm0
627 pslldq $12, %xmm2
628 pcmpeqb %xmm1, %xmm2
629 psubb %xmm0, %xmm2
630 pmovmskb %xmm2, %edi
631 shr %cl, %esi
632 shr %cl, %edi
633 sub %edi, %esi
634 lea -12(%ecx), %edi
635 jnz L(less32bytes)
636
637 UPDATE_STRNCMP_COUNTER
638
639 movdqa (%edx), %xmm3
640 pxor %xmm0, %xmm0
641 mov $16, %ecx
642 or $4, %ebx
643 lea 4(%edx), %edi
644 and $0xfff, %edi
645 sub $0x1000, %edi
646
647 .p2align 4
648L(loop_ashr_4):
649 add $16, %edi
650 jg L(nibble_ashr_4)
651
652L(gobble_ashr_4):
653 movdqa (%eax, %ecx), %xmm1
654 movdqa (%edx, %ecx), %xmm2
655 movdqa %xmm2, %xmm4
656
657 palignr $4, %xmm3, %xmm2
658
659 pcmpeqb %xmm1, %xmm0
660 pcmpeqb %xmm2, %xmm1
661 psubb %xmm0, %xmm1
662 pmovmskb %xmm1, %esi
663 sub $0xffff, %esi
664 jnz L(exit)
665
666#ifdef USE_AS_STRNCMP
667 cmp $16, %ebp
668 lea -16(%ebp), %ebp
669 jbe L(more8byteseq)
670#endif
671
672 add $16, %ecx
673 movdqa %xmm4, %xmm3
674
675 add $16, %edi
676 jg L(nibble_ashr_4)
677
678 movdqa (%eax, %ecx), %xmm1
679 movdqa (%edx, %ecx), %xmm2
680 movdqa %xmm2, %xmm4
681
682 palignr $4, %xmm3, %xmm2
683
684 pcmpeqb %xmm1, %xmm0
685 pcmpeqb %xmm2, %xmm1
686 psubb %xmm0, %xmm1
687 pmovmskb %xmm1, %esi
688 sub $0xffff, %esi
689 jnz L(exit)
690
691#ifdef USE_AS_STRNCMP
692 cmp $16, %ebp
693 lea -16(%ebp), %ebp
694 jbe L(more8byteseq)
695#endif
696
697 add $16, %ecx
698 movdqa %xmm4, %xmm3
699 jmp L(loop_ashr_4)
700
701 .p2align 4
702L(nibble_ashr_4):
703 pcmpeqb %xmm3, %xmm0
704 pmovmskb %xmm0, %esi
705 test $0xfff0, %esi
706 jnz L(ashr_4_exittail)
707
708#ifdef USE_AS_STRNCMP
709 cmp $12, %ebp
710 jbe L(ashr_4_exittail)
711#endif
712
713 pxor %xmm0, %xmm0
714 sub $0x1000, %edi
715 jmp L(gobble_ashr_4)
716
717 .p2align 4
718L(ashr_4_exittail):
719 movdqa (%eax, %ecx), %xmm1
720 psrldq $4, %xmm0
721 psrldq $4, %xmm3
722 jmp L(aftertail)
723
724/*
725 * The following cases will be handled by ashr_5
726 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
727 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
728 */
729 .p2align 4
730L(ashr_5):
731 mov $0xffff, %esi
732 pxor %xmm0, %xmm0
733 movdqa (%edx), %xmm2
734 movdqa (%eax), %xmm1
735 pcmpeqb %xmm1, %xmm0
736 pslldq $11, %xmm2
737 pcmpeqb %xmm1, %xmm2
738 psubb %xmm0, %xmm2
739 pmovmskb %xmm2, %edi
740 shr %cl, %esi
741 shr %cl, %edi
742 sub %edi, %esi
743 lea -11(%ecx), %edi
744 jnz L(less32bytes)
745
746 UPDATE_STRNCMP_COUNTER
747
748 movdqa (%edx), %xmm3
749 pxor %xmm0, %xmm0
750 mov $16, %ecx
751 or $5, %ebx
752 lea 5(%edx), %edi
753 and $0xfff, %edi
754 sub $0x1000, %edi
755
756 .p2align 4
757L(loop_ashr_5):
758 add $16, %edi
759 jg L(nibble_ashr_5)
760
761L(gobble_ashr_5):
762 movdqa (%eax, %ecx), %xmm1
763 movdqa (%edx, %ecx), %xmm2
764 movdqa %xmm2, %xmm4
765
766 palignr $5, %xmm3, %xmm2
767
768 pcmpeqb %xmm1, %xmm0
769 pcmpeqb %xmm2, %xmm1
770 psubb %xmm0, %xmm1
771 pmovmskb %xmm1, %esi
772 sub $0xffff, %esi
773 jnz L(exit)
774
775#ifdef USE_AS_STRNCMP
776 cmp $16, %ebp
777 lea -16(%ebp), %ebp
778 jbe L(more8byteseq)
779#endif
780 add $16, %ecx
781 movdqa %xmm4, %xmm3
782
783 add $16, %edi
784 jg L(nibble_ashr_5)
785
786 movdqa (%eax, %ecx), %xmm1
787 movdqa (%edx, %ecx), %xmm2
788 movdqa %xmm2, %xmm4
789
790 palignr $5, %xmm3, %xmm2
791
792 pcmpeqb %xmm1, %xmm0
793 pcmpeqb %xmm2, %xmm1
794 psubb %xmm0, %xmm1
795 pmovmskb %xmm1, %esi
796 sub $0xffff, %esi
797 jnz L(exit)
798
799#ifdef USE_AS_STRNCMP
800 cmp $16, %ebp
801 lea -16(%ebp), %ebp
802 jbe L(more8byteseq)
803#endif
804 add $16, %ecx
805 movdqa %xmm4, %xmm3
806 jmp L(loop_ashr_5)
807
808 .p2align 4
809L(nibble_ashr_5):
810 pcmpeqb %xmm3, %xmm0
811 pmovmskb %xmm0, %esi
812 test $0xffe0, %esi
813 jnz L(ashr_5_exittail)
814
815#ifdef USE_AS_STRNCMP
816 cmp $11, %ebp
817 jbe L(ashr_5_exittail)
818#endif
819 pxor %xmm0, %xmm0
820 sub $0x1000, %edi
821 jmp L(gobble_ashr_5)
822
823 .p2align 4
824L(ashr_5_exittail):
825 movdqa (%eax, %ecx), %xmm1
826 psrldq $5, %xmm0
827 psrldq $5, %xmm3
828 jmp L(aftertail)
829
830/*
831 * The following cases will be handled by ashr_6
832 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
833 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
834 */
835
836 .p2align 4
837L(ashr_6):
838 mov $0xffff, %esi
839 pxor %xmm0, %xmm0
840 movdqa (%edx), %xmm2
841 movdqa (%eax), %xmm1
842 pcmpeqb %xmm1, %xmm0
843 pslldq $10, %xmm2
844 pcmpeqb %xmm1, %xmm2
845 psubb %xmm0, %xmm2
846 pmovmskb %xmm2, %edi
847 shr %cl, %esi
848 shr %cl, %edi
849 sub %edi, %esi
850 lea -10(%ecx), %edi
851 jnz L(less32bytes)
852
853 UPDATE_STRNCMP_COUNTER
854
855 movdqa (%edx), %xmm3
856 pxor %xmm0, %xmm0
857 mov $16, %ecx
858 or $6, %ebx
859 lea 6(%edx), %edi
860 and $0xfff, %edi
861 sub $0x1000, %edi
862
863 .p2align 4
864L(loop_ashr_6):
865 add $16, %edi
866 jg L(nibble_ashr_6)
867
868L(gobble_ashr_6):
869 movdqa (%eax, %ecx), %xmm1
870 movdqa (%edx, %ecx), %xmm2
871 movdqa %xmm2, %xmm4
872
873 palignr $6, %xmm3, %xmm2
874
875 pcmpeqb %xmm1, %xmm0
876 pcmpeqb %xmm2, %xmm1
877 psubb %xmm0, %xmm1
878 pmovmskb %xmm1, %esi
879 sub $0xffff, %esi
880 jnz L(exit)
881
882#ifdef USE_AS_STRNCMP
883 cmp $16, %ebp
884 lea -16(%ebp), %ebp
885 jbe L(more8byteseq)
886#endif
887
888 add $16, %ecx
889 movdqa %xmm4, %xmm3
890
891 add $16, %edi
892 jg L(nibble_ashr_6)
893
894 movdqa (%eax, %ecx), %xmm1
895 movdqa (%edx, %ecx), %xmm2
896 movdqa %xmm2, %xmm4
897
898 palignr $6, %xmm3, %xmm2
899
900 pcmpeqb %xmm1, %xmm0
901 pcmpeqb %xmm2, %xmm1
902 psubb %xmm0, %xmm1
903 pmovmskb %xmm1, %esi
904 sub $0xffff, %esi
905 jnz L(exit)
906#ifdef USE_AS_STRNCMP
907 cmp $16, %ebp
908 lea -16(%ebp), %ebp
909 jbe L(more8byteseq)
910#endif
911
912 add $16, %ecx
913 movdqa %xmm4, %xmm3
914 jmp L(loop_ashr_6)
915
916 .p2align 4
917L(nibble_ashr_6):
918 pcmpeqb %xmm3, %xmm0
919 pmovmskb %xmm0, %esi
920 test $0xffc0, %esi
921 jnz L(ashr_6_exittail)
922
923#ifdef USE_AS_STRNCMP
924 cmp $10, %ebp
925 jbe L(ashr_6_exittail)
926#endif
927 pxor %xmm0, %xmm0
928 sub $0x1000, %edi
929 jmp L(gobble_ashr_6)
930
931 .p2align 4
932L(ashr_6_exittail):
933 movdqa (%eax, %ecx), %xmm1
934 psrldq $6, %xmm0
935 psrldq $6, %xmm3
936 jmp L(aftertail)
937
938/*
939 * The following cases will be handled by ashr_7
940 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
941 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
942 */
943
944 .p2align 4
945L(ashr_7):
946 mov $0xffff, %esi
947 pxor %xmm0, %xmm0
948 movdqa (%edx), %xmm2
949 movdqa (%eax), %xmm1
950 pcmpeqb %xmm1, %xmm0
951 pslldq $9, %xmm2
952 pcmpeqb %xmm1, %xmm2
953 psubb %xmm0, %xmm2
954 pmovmskb %xmm2, %edi
955 shr %cl, %esi
956 shr %cl, %edi
957 sub %edi, %esi
958 lea -9(%ecx), %edi
959 jnz L(less32bytes)
960
961 UPDATE_STRNCMP_COUNTER
962
963 movdqa (%edx), %xmm3
964 pxor %xmm0, %xmm0
965 mov $16, %ecx
966 or $7, %ebx
967 lea 8(%edx), %edi
968 and $0xfff, %edi
969 sub $0x1000, %edi
970
971 .p2align 4
972L(loop_ashr_7):
973 add $16, %edi
974 jg L(nibble_ashr_7)
975
976L(gobble_ashr_7):
977 movdqa (%eax, %ecx), %xmm1
978 movdqa (%edx, %ecx), %xmm2
979 movdqa %xmm2, %xmm4
980
981 palignr $7, %xmm3, %xmm2
982
983 pcmpeqb %xmm1, %xmm0
984 pcmpeqb %xmm2, %xmm1
985 psubb %xmm0, %xmm1
986 pmovmskb %xmm1, %esi
987 sub $0xffff, %esi
988 jnz L(exit)
989
990#ifdef USE_AS_STRNCMP
991 cmp $16, %ebp
992 lea -16(%ebp), %ebp
993 jbe L(more8byteseq)
994#endif
995
996 add $16, %ecx
997 movdqa %xmm4, %xmm3
998
999 add $16, %edi
1000 jg L(nibble_ashr_7)
1001
1002 movdqa (%eax, %ecx), %xmm1
1003 movdqa (%edx, %ecx), %xmm2
1004 movdqa %xmm2, %xmm4
1005
1006 palignr $7, %xmm3, %xmm2
1007
1008 pcmpeqb %xmm1, %xmm0
1009 pcmpeqb %xmm2, %xmm1
1010 psubb %xmm0, %xmm1
1011 pmovmskb %xmm1, %esi
1012 sub $0xffff, %esi
1013 jnz L(exit)
1014
1015#ifdef USE_AS_STRNCMP
1016 cmp $16, %ebp
1017 lea -16(%ebp), %ebp
1018 jbe L(more8byteseq)
1019#endif
1020
1021 add $16, %ecx
1022 movdqa %xmm4, %xmm3
1023 jmp L(loop_ashr_7)
1024
1025 .p2align 4
1026L(nibble_ashr_7):
1027 pcmpeqb %xmm3, %xmm0
1028 pmovmskb %xmm0, %esi
1029 test $0xff80, %esi
1030 jnz L(ashr_7_exittail)
1031
1032#ifdef USE_AS_STRNCMP
1033 cmp $9, %ebp
1034 jbe L(ashr_7_exittail)
1035#endif
1036 pxor %xmm0, %xmm0
1037 pxor %xmm0, %xmm0
1038 sub $0x1000, %edi
1039 jmp L(gobble_ashr_7)
1040
1041 .p2align 4
1042L(ashr_7_exittail):
1043 movdqa (%eax, %ecx), %xmm1
1044 psrldq $7, %xmm0
1045 psrldq $7, %xmm3
1046 jmp L(aftertail)
1047
1048/*
1049 * The following cases will be handled by ashr_8
1050 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1051 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1052 */
1053 .p2align 4
1054L(ashr_8):
1055 mov $0xffff, %esi
1056 pxor %xmm0, %xmm0
1057 movdqa (%edx), %xmm2
1058 movdqa (%eax), %xmm1
1059 pcmpeqb %xmm1, %xmm0
1060 pslldq $8, %xmm2
1061 pcmpeqb %xmm1, %xmm2
1062 psubb %xmm0, %xmm2
1063 pmovmskb %xmm2, %edi
1064 shr %cl, %esi
1065 shr %cl, %edi
1066 sub %edi, %esi
1067 lea -8(%ecx), %edi
1068 jnz L(less32bytes)
1069
1070 UPDATE_STRNCMP_COUNTER
1071
1072 movdqa (%edx), %xmm3
1073 pxor %xmm0, %xmm0
1074 mov $16, %ecx
1075 or $8, %ebx
1076 lea 8(%edx), %edi
1077 and $0xfff, %edi
1078 sub $0x1000, %edi
1079
1080 .p2align 4
1081L(loop_ashr_8):
1082 add $16, %edi
1083 jg L(nibble_ashr_8)
1084
1085L(gobble_ashr_8):
1086 movdqa (%eax, %ecx), %xmm1
1087 movdqa (%edx, %ecx), %xmm2
1088 movdqa %xmm2, %xmm4
1089
1090 palignr $8, %xmm3, %xmm2
1091
1092 pcmpeqb %xmm1, %xmm0
1093 pcmpeqb %xmm2, %xmm1
1094 psubb %xmm0, %xmm1
1095 pmovmskb %xmm1, %esi
1096 sub $0xffff, %esi
1097 jnz L(exit)
1098
1099#ifdef USE_AS_STRNCMP
1100 cmp $16, %ebp
1101 lea -16(%ebp), %ebp
1102 jbe L(more8byteseq)
1103#endif
1104 add $16, %ecx
1105 movdqa %xmm4, %xmm3
1106
1107 add $16, %edi
1108 jg L(nibble_ashr_8)
1109
1110 movdqa (%eax, %ecx), %xmm1
1111 movdqa (%edx, %ecx), %xmm2
1112 movdqa %xmm2, %xmm4
1113
1114 palignr $8, %xmm3, %xmm2
1115
1116 pcmpeqb %xmm1, %xmm0
1117 pcmpeqb %xmm2, %xmm1
1118 psubb %xmm0, %xmm1
1119 pmovmskb %xmm1, %esi
1120 sub $0xffff, %esi
1121 jnz L(exit)
1122
1123#ifdef USE_AS_STRNCMP
1124 cmp $16, %ebp
1125 lea -16(%ebp), %ebp
1126 jbe L(more8byteseq)
1127#endif
1128 add $16, %ecx
1129 movdqa %xmm4, %xmm3
1130 jmp L(loop_ashr_8)
1131
1132 .p2align 4
1133L(nibble_ashr_8):
1134 pcmpeqb %xmm3, %xmm0
1135 pmovmskb %xmm0, %esi
1136 test $0xff00, %esi
1137 jnz L(ashr_8_exittail)
1138
1139#ifdef USE_AS_STRNCMP
1140 cmp $8, %ebp
1141 jbe L(ashr_8_exittail)
1142#endif
1143 pxor %xmm0, %xmm0
1144 pxor %xmm0, %xmm0
1145 sub $0x1000, %edi
1146 jmp L(gobble_ashr_8)
1147
1148 .p2align 4
1149L(ashr_8_exittail):
1150 movdqa (%eax, %ecx), %xmm1
1151 psrldq $8, %xmm0
1152 psrldq $8, %xmm3
1153 jmp L(aftertail)
1154
1155/*
1156 * The following cases will be handled by ashr_9
1157 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1158 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1159 */
1160 .p2align 4
1161L(ashr_9):
1162 mov $0xffff, %esi
1163 pxor %xmm0, %xmm0
1164 movdqa (%edx), %xmm2
1165 movdqa (%eax), %xmm1
1166 pcmpeqb %xmm1, %xmm0
1167 pslldq $7, %xmm2
1168 pcmpeqb %xmm1, %xmm2
1169 psubb %xmm0, %xmm2
1170 pmovmskb %xmm2, %edi
1171 shr %cl, %esi
1172 shr %cl, %edi
1173 sub %edi, %esi
1174 lea -7(%ecx), %edi
1175 jnz L(less32bytes)
1176
1177 UPDATE_STRNCMP_COUNTER
1178
1179 movdqa (%edx), %xmm3
1180 pxor %xmm0, %xmm0
1181 mov $16, %ecx
1182 or $9, %ebx
1183 lea 9(%edx), %edi
1184 and $0xfff, %edi
1185 sub $0x1000, %edi
1186
1187 .p2align 4
1188L(loop_ashr_9):
1189 add $16, %edi
1190 jg L(nibble_ashr_9)
1191
1192L(gobble_ashr_9):
1193 movdqa (%eax, %ecx), %xmm1
1194 movdqa (%edx, %ecx), %xmm2
1195 movdqa %xmm2, %xmm4
1196
1197 palignr $9, %xmm3, %xmm2
1198
1199 pcmpeqb %xmm1, %xmm0
1200 pcmpeqb %xmm2, %xmm1
1201 psubb %xmm0, %xmm1
1202 pmovmskb %xmm1, %esi
1203 sub $0xffff, %esi
1204 jnz L(exit)
1205
1206#ifdef USE_AS_STRNCMP
1207 cmp $16, %ebp
1208 lea -16(%ebp), %ebp
1209 jbe L(more8byteseq)
1210#endif
1211 add $16, %ecx
1212 movdqa %xmm4, %xmm3
1213
1214 add $16, %edi
1215 jg L(nibble_ashr_9)
1216
1217 movdqa (%eax, %ecx), %xmm1
1218 movdqa (%edx, %ecx), %xmm2
1219 movdqa %xmm2, %xmm4
1220
1221 palignr $9, %xmm3, %xmm2
1222
1223 pcmpeqb %xmm1, %xmm0
1224 pcmpeqb %xmm2, %xmm1
1225 psubb %xmm0, %xmm1
1226 pmovmskb %xmm1, %esi
1227 sub $0xffff, %esi
1228 jnz L(exit)
1229
1230#ifdef USE_AS_STRNCMP
1231 cmp $16, %ebp
1232 lea -16(%ebp), %ebp
1233 jbe L(more8byteseq)
1234#endif
1235 add $16, %ecx
1236 movdqa %xmm4, %xmm3
1237 jmp L(loop_ashr_9)
1238
1239 .p2align 4
1240L(nibble_ashr_9):
1241 pcmpeqb %xmm3, %xmm0
1242 pmovmskb %xmm0, %esi
1243 test $0xfe00, %esi
1244 jnz L(ashr_9_exittail)
1245
1246#ifdef USE_AS_STRNCMP
1247 cmp $7, %ebp
1248 jbe L(ashr_9_exittail)
1249#endif
1250 pxor %xmm0, %xmm0
1251 sub $0x1000, %edi
1252 jmp L(gobble_ashr_9)
1253
1254 .p2align 4
1255L(ashr_9_exittail):
1256 movdqa (%eax, %ecx), %xmm1
1257 psrldq $9, %xmm0
1258 psrldq $9, %xmm3
1259 jmp L(aftertail)
1260
1261/*
1262 * The following cases will be handled by ashr_10
1263 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1264 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1265 */
1266 .p2align 4
1267L(ashr_10):
1268 mov $0xffff, %esi
1269 pxor %xmm0, %xmm0
1270 movdqa (%edx), %xmm2
1271 movdqa (%eax), %xmm1
1272 pcmpeqb %xmm1, %xmm0
1273 pslldq $6, %xmm2
1274 pcmpeqb %xmm1, %xmm2
1275 psubb %xmm0, %xmm2
1276 pmovmskb %xmm2, %edi
1277 shr %cl, %esi
1278 shr %cl, %edi
1279 sub %edi, %esi
1280 lea -6(%ecx), %edi
1281 jnz L(less32bytes)
1282
1283 UPDATE_STRNCMP_COUNTER
1284
1285 movdqa (%edx), %xmm3
1286 pxor %xmm0, %xmm0
1287 mov $16, %ecx
1288 or $10, %ebx
1289 lea 10(%edx), %edi
1290 and $0xfff, %edi
1291 sub $0x1000, %edi
1292
1293 .p2align 4
1294L(loop_ashr_10):
1295 add $16, %edi
1296 jg L(nibble_ashr_10)
1297
1298L(gobble_ashr_10):
1299 movdqa (%eax, %ecx), %xmm1
1300 movdqa (%edx, %ecx), %xmm2
1301 movdqa %xmm2, %xmm4
1302
1303 palignr $10, %xmm3, %xmm2
1304
1305 pcmpeqb %xmm1, %xmm0
1306 pcmpeqb %xmm2, %xmm1
1307 psubb %xmm0, %xmm1
1308 pmovmskb %xmm1, %esi
1309 sub $0xffff, %esi
1310 jnz L(exit)
1311
1312#ifdef USE_AS_STRNCMP
1313 cmp $16, %ebp
1314 lea -16(%ebp), %ebp
1315 jbe L(more8byteseq)
1316#endif
1317 add $16, %ecx
1318 movdqa %xmm4, %xmm3
1319
1320 add $16, %edi
1321 jg L(nibble_ashr_10)
1322
1323 movdqa (%eax, %ecx), %xmm1
1324 movdqa (%edx, %ecx), %xmm2
1325 movdqa %xmm2, %xmm4
1326
1327 palignr $10, %xmm3, %xmm2
1328
1329 pcmpeqb %xmm1, %xmm0
1330 pcmpeqb %xmm2, %xmm1
1331 psubb %xmm0, %xmm1
1332 pmovmskb %xmm1, %esi
1333 sub $0xffff, %esi
1334 jnz L(exit)
1335
1336#ifdef USE_AS_STRNCMP
1337 cmp $16, %ebp
1338 lea -16(%ebp), %ebp
1339 jbe L(more8byteseq)
1340#endif
1341 add $16, %ecx
1342 movdqa %xmm4, %xmm3
1343 jmp L(loop_ashr_10)
1344
1345 .p2align 4
1346L(nibble_ashr_10):
1347 pcmpeqb %xmm3, %xmm0
1348 pmovmskb %xmm0, %esi
1349 test $0xfc00, %esi
1350 jnz L(ashr_10_exittail)
1351
1352#ifdef USE_AS_STRNCMP
1353 cmp $6, %ebp
1354 jbe L(ashr_10_exittail)
1355#endif
1356 pxor %xmm0, %xmm0
1357 sub $0x1000, %edi
1358 jmp L(gobble_ashr_10)
1359
1360 .p2align 4
1361L(ashr_10_exittail):
1362 movdqa (%eax, %ecx), %xmm1
1363 psrldq $10, %xmm0
1364 psrldq $10, %xmm3
1365 jmp L(aftertail)
1366
1367/*
1368 * The following cases will be handled by ashr_11
1369 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1370 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1371 */
1372 .p2align 4
1373L(ashr_11):
1374 mov $0xffff, %esi
1375 pxor %xmm0, %xmm0
1376 movdqa (%edx), %xmm2
1377 movdqa (%eax), %xmm1
1378 pcmpeqb %xmm1, %xmm0
1379 pslldq $5, %xmm2
1380 pcmpeqb %xmm1, %xmm2
1381 psubb %xmm0, %xmm2
1382 pmovmskb %xmm2, %edi
1383 shr %cl, %esi
1384 shr %cl, %edi
1385 sub %edi, %esi
1386 lea -5(%ecx), %edi
1387 jnz L(less32bytes)
1388
1389 UPDATE_STRNCMP_COUNTER
1390
1391 movdqa (%edx), %xmm3
1392 pxor %xmm0, %xmm0
1393 mov $16, %ecx
1394 or $11, %ebx
1395 lea 11(%edx), %edi
1396 and $0xfff, %edi
1397 sub $0x1000, %edi
1398
1399 .p2align 4
1400L(loop_ashr_11):
1401 add $16, %edi
1402 jg L(nibble_ashr_11)
1403
1404L(gobble_ashr_11):
1405 movdqa (%eax, %ecx), %xmm1
1406 movdqa (%edx, %ecx), %xmm2
1407 movdqa %xmm2, %xmm4
1408
1409 palignr $11, %xmm3, %xmm2
1410
1411 pcmpeqb %xmm1, %xmm0
1412 pcmpeqb %xmm2, %xmm1
1413 psubb %xmm0, %xmm1
1414 pmovmskb %xmm1, %esi
1415 sub $0xffff, %esi
1416 jnz L(exit)
1417
1418#ifdef USE_AS_STRNCMP
1419 cmp $16, %ebp
1420 lea -16(%ebp), %ebp
1421 jbe L(more8byteseq)
1422#endif
1423 add $16, %ecx
1424 movdqa %xmm4, %xmm3
1425
1426 add $16, %edi
1427 jg L(nibble_ashr_11)
1428
1429 movdqa (%eax, %ecx), %xmm1
1430 movdqa (%edx, %ecx), %xmm2
1431 movdqa %xmm2, %xmm4
1432
1433 palignr $11, %xmm3, %xmm2
1434
1435 pcmpeqb %xmm1, %xmm0
1436 pcmpeqb %xmm2, %xmm1
1437 psubb %xmm0, %xmm1
1438 pmovmskb %xmm1, %esi
1439 sub $0xffff, %esi
1440 jnz L(exit)
1441
1442#ifdef USE_AS_STRNCMP
1443 cmp $16, %ebp
1444 lea -16(%ebp), %ebp
1445 jbe L(more8byteseq)
1446#endif
1447 add $16, %ecx
1448 movdqa %xmm4, %xmm3
1449 jmp L(loop_ashr_11)
1450
1451 .p2align 4
1452L(nibble_ashr_11):
1453 pcmpeqb %xmm3, %xmm0
1454 pmovmskb %xmm0, %esi
1455 test $0xf800, %esi
1456 jnz L(ashr_11_exittail)
1457
1458#ifdef USE_AS_STRNCMP
1459 cmp $5, %ebp
1460 jbe L(ashr_11_exittail)
1461#endif
1462 pxor %xmm0, %xmm0
1463 sub $0x1000, %edi
1464 jmp L(gobble_ashr_11)
1465
1466 .p2align 4
1467L(ashr_11_exittail):
1468 movdqa (%eax, %ecx), %xmm1
1469 psrldq $11, %xmm0
1470 psrldq $11, %xmm3
1471 jmp L(aftertail)
1472
1473/*
1474 * The following cases will be handled by ashr_12
1475 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1476 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1477 */
1478 .p2align 4
1479L(ashr_12):
1480 mov $0xffff, %esi
1481 pxor %xmm0, %xmm0
1482 movdqa (%edx), %xmm2
1483 movdqa (%eax), %xmm1
1484 pcmpeqb %xmm1, %xmm0
1485 pslldq $4, %xmm2
1486 pcmpeqb %xmm1, %xmm2
1487 psubb %xmm0, %xmm2
1488 pmovmskb %xmm2, %edi
1489 shr %cl, %esi
1490 shr %cl, %edi
1491 sub %edi, %esi
1492 lea -4(%ecx), %edi
1493 jnz L(less32bytes)
1494
1495 UPDATE_STRNCMP_COUNTER
1496
1497 movdqa (%edx), %xmm3
1498 pxor %xmm0, %xmm0
1499 mov $16, %ecx
1500 or $12, %ebx
1501 lea 12(%edx), %edi
1502 and $0xfff, %edi
1503 sub $0x1000, %edi
1504
1505 .p2align 4
1506L(loop_ashr_12):
1507 add $16, %edi
1508 jg L(nibble_ashr_12)
1509
1510L(gobble_ashr_12):
1511 movdqa (%eax, %ecx), %xmm1
1512 movdqa (%edx, %ecx), %xmm2
1513 movdqa %xmm2, %xmm4
1514
1515 palignr $12, %xmm3, %xmm2
1516
1517 pcmpeqb %xmm1, %xmm0
1518 pcmpeqb %xmm2, %xmm1
1519 psubb %xmm0, %xmm1
1520 pmovmskb %xmm1, %esi
1521 sub $0xffff, %esi
1522 jnz L(exit)
1523
1524 add $16, %ecx
1525 movdqa %xmm4, %xmm3
1526
1527 add $16, %edi
1528 jg L(nibble_ashr_12)
1529
1530#ifdef USE_AS_STRNCMP
1531 cmp $16, %ebp
1532 lea -16(%ebp), %ebp
1533 jbe L(more8byteseq)
1534#endif
1535 movdqa (%eax, %ecx), %xmm1
1536 movdqa (%edx, %ecx), %xmm2
1537 movdqa %xmm2, %xmm4
1538
1539 palignr $12, %xmm3, %xmm2
1540
1541 pcmpeqb %xmm1, %xmm0
1542 pcmpeqb %xmm2, %xmm1
1543 psubb %xmm0, %xmm1
1544 pmovmskb %xmm1, %esi
1545 sub $0xffff, %esi
1546 jnz L(exit)
1547
1548#ifdef USE_AS_STRNCMP
1549 cmp $16, %ebp
1550 lea -16(%ebp), %ebp
1551 jbe L(more8byteseq)
1552#endif
1553 add $16, %ecx
1554 movdqa %xmm4, %xmm3
1555 jmp L(loop_ashr_12)
1556
1557 .p2align 4
1558L(nibble_ashr_12):
1559 pcmpeqb %xmm3, %xmm0
1560 pmovmskb %xmm0, %esi
1561 test $0xf000, %esi
1562 jnz L(ashr_12_exittail)
1563
1564#ifdef USE_AS_STRNCMP
1565 cmp $4, %ebp
1566 jbe L(ashr_12_exittail)
1567#endif
1568 pxor %xmm0, %xmm0
1569 sub $0x1000, %edi
1570 jmp L(gobble_ashr_12)
1571
1572 .p2align 4
1573L(ashr_12_exittail):
1574 movdqa (%eax, %ecx), %xmm1
1575 psrldq $12, %xmm0
1576 psrldq $12, %xmm3
1577 jmp L(aftertail)
1578
1579/*
1580 * The following cases will be handled by ashr_13
1581 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1582 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1583 */
1584 .p2align 4
1585L(ashr_13):
1586 mov $0xffff, %esi
1587 pxor %xmm0, %xmm0
1588 movdqa (%edx), %xmm2
1589 movdqa (%eax), %xmm1
1590 pcmpeqb %xmm1, %xmm0
1591 pslldq $3, %xmm2
1592 pcmpeqb %xmm1, %xmm2
1593 psubb %xmm0, %xmm2
1594 pmovmskb %xmm2, %edi
1595 shr %cl, %esi
1596 shr %cl, %edi
1597 sub %edi, %esi
1598 lea -3(%ecx), %edi
1599 jnz L(less32bytes)
1600
1601 UPDATE_STRNCMP_COUNTER
1602
1603 movdqa (%edx), %xmm3
1604 pxor %xmm0, %xmm0
1605 mov $16, %ecx
1606 or $13, %ebx
1607 lea 13(%edx), %edi
1608 and $0xfff, %edi
1609 sub $0x1000, %edi
1610
1611 .p2align 4
1612L(loop_ashr_13):
1613 add $16, %edi
1614 jg L(nibble_ashr_13)
1615
1616L(gobble_ashr_13):
1617 movdqa (%eax, %ecx), %xmm1
1618 movdqa (%edx, %ecx), %xmm2
1619 movdqa %xmm2, %xmm4
1620
1621 palignr $13, %xmm3, %xmm2
1622
1623 pcmpeqb %xmm1, %xmm0
1624 pcmpeqb %xmm2, %xmm1
1625 psubb %xmm0, %xmm1
1626 pmovmskb %xmm1, %esi
1627 sub $0xffff, %esi
1628 jnz L(exit)
1629
1630#ifdef USE_AS_STRNCMP
1631 cmp $16, %ebp
1632 lea -16(%ebp), %ebp
1633 jbe L(more8byteseq)
1634#endif
1635 add $16, %ecx
1636 movdqa %xmm4, %xmm3
1637
1638 add $16, %edi
1639 jg L(nibble_ashr_13)
1640
1641 movdqa (%eax, %ecx), %xmm1
1642 movdqa (%edx, %ecx), %xmm2
1643 movdqa %xmm2, %xmm4
1644
1645 palignr $13, %xmm3, %xmm2
1646
1647 pcmpeqb %xmm1, %xmm0
1648 pcmpeqb %xmm2, %xmm1
1649 psubb %xmm0, %xmm1
1650 pmovmskb %xmm1, %esi
1651 sub $0xffff, %esi
1652 jnz L(exit)
1653
1654#ifdef USE_AS_STRNCMP
1655 cmp $16, %ebp
1656 lea -16(%ebp), %ebp
1657 jbe L(more8byteseq)
1658#endif
1659 add $16, %ecx
1660 movdqa %xmm4, %xmm3
1661 jmp L(loop_ashr_13)
1662
1663 .p2align 4
1664L(nibble_ashr_13):
1665 pcmpeqb %xmm3, %xmm0
1666 pmovmskb %xmm0, %esi
1667 test $0xe000, %esi
1668 jnz L(ashr_13_exittail)
1669
1670#ifdef USE_AS_STRNCMP
1671 cmp $3, %ebp
1672 jbe L(ashr_13_exittail)
1673#endif
1674 pxor %xmm0, %xmm0
1675 sub $0x1000, %edi
1676 jmp L(gobble_ashr_13)
1677
1678 .p2align 4
1679L(ashr_13_exittail):
1680 movdqa (%eax, %ecx), %xmm1
1681 psrldq $13, %xmm0
1682 psrldq $13, %xmm3
1683 jmp L(aftertail)
1684
1685/*
1686 * The following cases will be handled by ashr_14
1687 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1688 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1689 */
1690 .p2align 4
1691L(ashr_14):
1692 mov $0xffff, %esi
1693 pxor %xmm0, %xmm0
1694 movdqa (%edx), %xmm2
1695 movdqa (%eax), %xmm1
1696 pcmpeqb %xmm1, %xmm0
1697 pslldq $2, %xmm2
1698 pcmpeqb %xmm1, %xmm2
1699 psubb %xmm0, %xmm2
1700 pmovmskb %xmm2, %edi
1701 shr %cl, %esi
1702 shr %cl, %edi
1703 sub %edi, %esi
1704 lea -2(%ecx), %edi
1705 jnz L(less32bytes)
1706
1707 UPDATE_STRNCMP_COUNTER
1708
1709 movdqa (%edx), %xmm3
1710 pxor %xmm0, %xmm0
1711 mov $16, %ecx
1712 or $14, %ebx
1713 lea 14(%edx), %edi
1714 and $0xfff, %edi
1715 sub $0x1000, %edi
1716
1717 .p2align 4
1718L(loop_ashr_14):
1719 add $16, %edi
1720 jg L(nibble_ashr_14)
1721
1722L(gobble_ashr_14):
1723 movdqa (%eax, %ecx), %xmm1
1724 movdqa (%edx, %ecx), %xmm2
1725 movdqa %xmm2, %xmm4
1726
1727 palignr $14, %xmm3, %xmm2
1728
1729 pcmpeqb %xmm1, %xmm0
1730 pcmpeqb %xmm2, %xmm1
1731 psubb %xmm0, %xmm1
1732 pmovmskb %xmm1, %esi
1733 sub $0xffff, %esi
1734 jnz L(exit)
1735
1736#ifdef USE_AS_STRNCMP
1737 cmp $16, %ebp
1738 lea -16(%ebp), %ebp
1739 jbe L(more8byteseq)
1740#endif
1741 add $16, %ecx
1742 movdqa %xmm4, %xmm3
1743
1744 add $16, %edi
1745 jg L(nibble_ashr_14)
1746
1747 movdqa (%eax, %ecx), %xmm1
1748 movdqa (%edx, %ecx), %xmm2
1749 movdqa %xmm2, %xmm4
1750
1751 palignr $14, %xmm3, %xmm2
1752
1753 pcmpeqb %xmm1, %xmm0
1754 pcmpeqb %xmm2, %xmm1
1755 psubb %xmm0, %xmm1
1756 pmovmskb %xmm1, %esi
1757 sub $0xffff, %esi
1758 jnz L(exit)
1759
1760#ifdef USE_AS_STRNCMP
1761 cmp $16, %ebp
1762 lea -16(%ebp), %ebp
1763 jbe L(more8byteseq)
1764#endif
1765 add $16, %ecx
1766 movdqa %xmm4, %xmm3
1767 jmp L(loop_ashr_14)
1768
1769 .p2align 4
1770L(nibble_ashr_14):
1771 pcmpeqb %xmm3, %xmm0
1772 pmovmskb %xmm0, %esi
1773 test $0xc000, %esi
1774 jnz L(ashr_14_exittail)
1775
1776#ifdef USE_AS_STRNCMP
1777 cmp $2, %ebp
1778 jbe L(ashr_14_exittail)
1779#endif
1780 pxor %xmm0, %xmm0
1781 sub $0x1000, %edi
1782 jmp L(gobble_ashr_14)
1783
1784 .p2align 4
1785L(ashr_14_exittail):
1786 movdqa (%eax, %ecx), %xmm1
1787 psrldq $14, %xmm0
1788 psrldq $14, %xmm3
1789 jmp L(aftertail)
1790
1791/*
1792 * The following cases will be handled by ashr_14
1793 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1794 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1795 */
1796
1797 .p2align 4
1798L(ashr_15):
1799 mov $0xffff, %esi
1800 pxor %xmm0, %xmm0
1801 movdqa (%edx), %xmm2
1802 movdqa (%eax), %xmm1
1803 pcmpeqb %xmm1, %xmm0
1804 pslldq $1, %xmm2
1805 pcmpeqb %xmm1, %xmm2
1806 psubb %xmm0, %xmm2
1807 pmovmskb %xmm2, %edi
1808 shr %cl, %esi
1809 shr %cl, %edi
1810 sub %edi, %esi
1811 lea -1(%ecx), %edi
1812 jnz L(less32bytes)
1813
1814 UPDATE_STRNCMP_COUNTER
1815
1816 movdqa (%edx), %xmm3
1817 pxor %xmm0, %xmm0
1818 mov $16, %ecx
1819 or $15, %ebx
1820 lea 15(%edx), %edi
1821 and $0xfff, %edi
1822 sub $0x1000, %edi
1823
1824 .p2align 4
1825L(loop_ashr_15):
1826 add $16, %edi
1827 jg L(nibble_ashr_15)
1828
1829L(gobble_ashr_15):
1830 movdqa (%eax, %ecx), %xmm1
1831 movdqa (%edx, %ecx), %xmm2
1832 movdqa %xmm2, %xmm4
1833
1834 palignr $15, %xmm3, %xmm2
1835
1836 pcmpeqb %xmm1, %xmm0
1837 pcmpeqb %xmm2, %xmm1
1838 psubb %xmm0, %xmm1
1839 pmovmskb %xmm1, %esi
1840 sub $0xffff, %esi
1841 jnz L(exit)
1842
1843#ifdef USE_AS_STRNCMP
1844 cmp $16, %ebp
1845 lea -16(%ebp), %ebp
1846 jbe L(more8byteseq)
1847#endif
1848 add $16, %ecx
1849 movdqa %xmm4, %xmm3
1850
1851 add $16, %edi
1852 jg L(nibble_ashr_15)
1853
1854 movdqa (%eax, %ecx), %xmm1
1855 movdqa (%edx, %ecx), %xmm2
1856 movdqa %xmm2, %xmm4
1857
1858 palignr $15, %xmm3, %xmm2
1859
1860 pcmpeqb %xmm1, %xmm0
1861 pcmpeqb %xmm2, %xmm1
1862 psubb %xmm0, %xmm1
1863 pmovmskb %xmm1, %esi
1864 sub $0xffff, %esi
1865 jnz L(exit)
1866
1867#ifdef USE_AS_STRNCMP
1868 cmp $16, %ebp
1869 lea -16(%ebp), %ebp
1870 jbe L(more8byteseq)
1871#endif
1872 add $16, %ecx
1873 movdqa %xmm4, %xmm3
1874 jmp L(loop_ashr_15)
1875
1876 .p2align 4
1877L(nibble_ashr_15):
1878 pcmpeqb %xmm3, %xmm0
1879 pmovmskb %xmm0, %esi
1880 test $0x8000, %esi
1881 jnz L(ashr_15_exittail)
1882
1883#ifdef USE_AS_STRNCMP
1884 cmp $1, %ebp
1885 jbe L(ashr_15_exittail)
1886#endif
1887 pxor %xmm0, %xmm0
1888 sub $0x1000, %edi
1889 jmp L(gobble_ashr_15)
1890
1891 .p2align 4
1892L(ashr_15_exittail):
1893 movdqa (%eax, %ecx), %xmm1
1894 psrldq $15, %xmm0
1895 psrldq $15, %xmm3
1896 jmp L(aftertail)
1897
1898 .p2align 4
1899L(aftertail):
1900 pcmpeqb %xmm3, %xmm1
1901 psubb %xmm0, %xmm1
1902 pmovmskb %xmm1, %esi
1903 not %esi
1904L(exit):
1905 mov %ebx, %edi
1906 and $0x1f, %edi
1907 lea -16(%edi, %ecx), %edi
1908L(less32bytes):
1909 add %edi, %edx
1910 add %ecx, %eax
1911 test $0x20, %ebx
1912 jz L(ret2)
1913 xchg %eax, %edx
1914
1915 .p2align 4
1916L(ret2):
1917 mov %esi, %ecx
1918 POP (%esi)
1919 POP (%edi)
1920 POP (%ebx)
1921L(less16bytes):
1922 test %cl, %cl
1923 jz L(2next_8_bytes)
1924
1925 test $0x01, %cl
1926 jnz L(Byte0)
1927
1928 test $0x02, %cl
1929 jnz L(Byte1)
1930
1931 test $0x04, %cl
1932 jnz L(Byte2)
1933
1934 test $0x08, %cl
1935 jnz L(Byte3)
1936
1937 test $0x10, %cl
1938 jnz L(Byte4)
1939
1940 test $0x20, %cl
1941 jnz L(Byte5)
1942
1943 test $0x40, %cl
1944 jnz L(Byte6)
1945#ifdef USE_AS_STRNCMP
1946 cmp $7, %ebp
1947 jbe L(eq)
1948#endif
1949
1950 movzx 7(%eax), %ecx
1951 movzx 7(%edx), %eax
1952
1953 sub %ecx, %eax
1954 RETURN
1955
1956 .p2align 4
1957L(Byte0):
1958#ifdef USE_AS_STRNCMP
1959 cmp $0, %ebp
1960 jbe L(eq)
1961#endif
1962 movzx (%eax), %ecx
1963 movzx (%edx), %eax
1964
1965 sub %ecx, %eax
1966 RETURN
1967
1968 .p2align 4
1969L(Byte1):
1970#ifdef USE_AS_STRNCMP
1971 cmp $1, %ebp
1972 jbe L(eq)
1973#endif
1974 movzx 1(%eax), %ecx
1975 movzx 1(%edx), %eax
1976
1977 sub %ecx, %eax
1978 RETURN
1979
1980 .p2align 4
1981L(Byte2):
1982#ifdef USE_AS_STRNCMP
1983 cmp $2, %ebp
1984 jbe L(eq)
1985#endif
1986 movzx 2(%eax), %ecx
1987 movzx 2(%edx), %eax
1988
1989 sub %ecx, %eax
1990 RETURN
1991
1992 .p2align 4
1993L(Byte3):
1994#ifdef USE_AS_STRNCMP
1995 cmp $3, %ebp
1996 jbe L(eq)
1997#endif
1998 movzx 3(%eax), %ecx
1999 movzx 3(%edx), %eax
2000
2001 sub %ecx, %eax
2002 RETURN
2003
2004 .p2align 4
2005L(Byte4):
2006#ifdef USE_AS_STRNCMP
2007 cmp $4, %ebp
2008 jbe L(eq)
2009#endif
2010 movzx 4(%eax), %ecx
2011 movzx 4(%edx), %eax
2012
2013 sub %ecx, %eax
2014 RETURN
2015
2016 .p2align 4
2017L(Byte5):
2018#ifdef USE_AS_STRNCMP
2019 cmp $5, %ebp
2020 jbe L(eq)
2021#endif
2022 movzx 5(%eax), %ecx
2023 movzx 5(%edx), %eax
2024
2025 sub %ecx, %eax
2026 RETURN
2027
2028 .p2align 4
2029L(Byte6):
2030#ifdef USE_AS_STRNCMP
2031 cmp $6, %ebp
2032 jbe L(eq)
2033#endif
2034 movzx 6(%eax), %ecx
2035 movzx 6(%edx), %eax
2036
2037 sub %ecx, %eax
2038 RETURN
2039
2040 .p2align 4
2041L(2next_8_bytes):
2042 add $8, %eax
2043 add $8, %edx
2044#ifdef USE_AS_STRNCMP
2045 cmp $8, %ebp
2046 lea -8(%ebp), %ebp
2047 jbe L(eq)
2048#endif
2049
2050 test $0x01, %ch
2051 jnz L(Byte0)
2052
2053 test $0x02, %ch
2054 jnz L(Byte1)
2055
2056 test $0x04, %ch
2057 jnz L(Byte2)
2058
2059 test $0x08, %ch
2060 jnz L(Byte3)
2061
2062 test $0x10, %ch
2063 jnz L(Byte4)
2064
2065 test $0x20, %ch
2066 jnz L(Byte5)
2067
2068 test $0x40, %ch
2069 jnz L(Byte6)
2070
2071#ifdef USE_AS_STRNCMP
2072 cmp $7, %ebp
2073 jbe L(eq)
2074#endif
2075 movzx 7(%eax), %ecx
2076 movzx 7(%edx), %eax
2077
2078 sub %ecx, %eax
2079 RETURN
2080
2081 .p2align 4
2082L(neq):
2083 mov $1, %eax
2084 ja L(neq_bigger)
2085 neg %eax
2086L(neq_bigger):
2087 RETURN
2088
2089#ifdef USE_AS_STRNCMP
2090 CFI_PUSH (%ebx)
2091 CFI_PUSH (%edi)
2092 CFI_PUSH (%esi)
2093
2094 .p2align 4
2095L(more8byteseq):
2096 POP (%esi)
2097 POP (%edi)
2098 POP (%ebx)
2099#endif
2100
2101L(eq):
2102
2103#ifdef USE_AS_STRNCMP
2104 POP (%ebp)
2105#endif
2106 xorl %eax, %eax
2107 ret
2108
2109#ifdef USE_AS_STRNCMP
2110 CFI_PUSH (%ebp)
2111
2112 .p2align 4
2113L(less16bytes_sncmp):
2114 test %ebp, %ebp
2115 jz L(eq)
2116
2117 movzbl (%eax), %ecx
2118 cmpb %cl, (%edx)
2119 jne L(neq)
2120 test %cl, %cl
2121 je L(eq)
2122
2123 cmp $1, %ebp
2124 je L(eq)
2125
2126 movzbl 1(%eax), %ecx
2127 cmpb %cl, 1(%edx)
2128 jne L(neq)
2129 test %cl, %cl
2130 je L(eq)
2131
2132 cmp $2, %ebp
2133 je L(eq)
2134
2135 movzbl 2(%eax), %ecx
2136 cmpb %cl, 2(%edx)
2137 jne L(neq)
2138 test %cl, %cl
2139 je L(eq)
2140
2141 cmp $3, %ebp
2142 je L(eq)
2143
2144 movzbl 3(%eax), %ecx
2145 cmpb %cl, 3(%edx)
2146 jne L(neq)
2147 test %cl, %cl
2148 je L(eq)
2149
2150 cmp $4, %ebp
2151 je L(eq)
2152
2153 movzbl 4(%eax), %ecx
2154 cmpb %cl, 4(%edx)
2155 jne L(neq)
2156 test %cl, %cl
2157 je L(eq)
2158
2159 cmp $5, %ebp
2160 je L(eq)
2161
2162 movzbl 5(%eax), %ecx
2163 cmpb %cl, 5(%edx)
2164 jne L(neq)
2165 test %cl, %cl
2166 je L(eq)
2167
2168 cmp $6, %ebp
2169 je L(eq)
2170
2171 movzbl 6(%eax), %ecx
2172 cmpb %cl, 6(%edx)
2173 jne L(neq)
2174 test %cl, %cl
2175 je L(eq)
2176
2177 cmp $7, %ebp
2178 je L(eq)
2179
2180 movzbl 7(%eax), %ecx
2181 cmpb %cl, 7(%edx)
2182 jne L(neq)
2183 test %cl, %cl
2184 je L(eq)
2185
2186
2187 cmp $8, %ebp
2188 je L(eq)
2189
2190 movzbl 8(%eax), %ecx
2191 cmpb %cl, 8(%edx)
2192 jne L(neq)
2193 test %cl, %cl
2194 je L(eq)
2195
2196 cmp $9, %ebp
2197 je L(eq)
2198
2199 movzbl 9(%eax), %ecx
2200 cmpb %cl, 9(%edx)
2201 jne L(neq)
2202 test %cl, %cl
2203 je L(eq)
2204
2205 cmp $10, %ebp
2206 je L(eq)
2207
2208 movzbl 10(%eax), %ecx
2209 cmpb %cl, 10(%edx)
2210 jne L(neq)
2211 test %cl, %cl
2212 je L(eq)
2213
2214 cmp $11, %ebp
2215 je L(eq)
2216
2217 movzbl 11(%eax), %ecx
2218 cmpb %cl, 11(%edx)
2219 jne L(neq)
2220 test %cl, %cl
2221 je L(eq)
2222
2223
2224 cmp $12, %ebp
2225 je L(eq)
2226
2227 movzbl 12(%eax), %ecx
2228 cmpb %cl, 12(%edx)
2229 jne L(neq)
2230 test %cl, %cl
2231 je L(eq)
2232
2233 cmp $13, %ebp
2234 je L(eq)
2235
2236 movzbl 13(%eax), %ecx
2237 cmpb %cl, 13(%edx)
2238 jne L(neq)
2239 test %cl, %cl
2240 je L(eq)
2241
2242 cmp $14, %ebp
2243 je L(eq)
2244
2245 movzbl 14(%eax), %ecx
2246 cmpb %cl, 14(%edx)
2247 jne L(neq)
2248 test %cl, %cl
2249 je L(eq)
2250
2251 cmp $15, %ebp
2252 je L(eq)
2253
2254 movzbl 15(%eax), %ecx
2255 cmpb %cl, 15(%edx)
2256 jne L(neq)
2257 test %cl, %cl
2258 je L(eq)
2259
2260 POP (%ebp)
2261 xor %eax, %eax
2262 ret
2263#endif
2264
2265END (ssse3_strcmp_latest)