blob: 12753790dbe52c877133b37b8805742f0608b5c9 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare124a5422010-10-11 12:24:41 -070055#ifndef cfi_remember_state
56# define cfi_remember_state .cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state .cfi_restore_state
61#endif
62
Bruce Beare8ff1a272010-03-04 11:03:37 -080063#ifndef ENTRY
64# define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68name: \
69 cfi_startproc
70#endif
71
72#ifndef END
73# define END(name) \
74 cfi_endproc; \
75 .size name, .-name
76#endif
77
78#define CFI_PUSH(REG) \
79 cfi_adjust_cfa_offset (4); \
80 cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG) \
83 cfi_adjust_cfa_offset (-4); \
84 cfi_restore (REG)
85
86#define PUSH(REG) pushl REG; CFI_PUSH (REG)
87#define POP(REG) popl REG; CFI_POP (REG)
88
89#ifndef USE_AS_STRNCMP
90# define STR1 4
91# define STR2 STR1+4
92# define RETURN ret
93
94# define UPDATE_STRNCMP_COUNTER
95#else
96# define STR1 8
97# define STR2 STR1+4
98# define CNT STR2+4
99# define RETURN POP (%ebp); ret; CFI_PUSH (%ebp)
100
101# define UPDATE_STRNCMP_COUNTER \
102 /* calculate left number to compare */ \
103 mov $16, %esi; \
104 sub %ecx, %esi; \
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400105 cmpl %esi, %ebp; \
Bruce Beare8ff1a272010-03-04 11:03:37 -0800106 jbe L(more8byteseq); \
107 sub %esi, %ebp
108#endif
109
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400110#ifndef STRCMP
111# define STRCMP strcmp
112#endif
113
Bruce Beare8ff1a272010-03-04 11:03:37 -0800114 .section .text.ssse3,"ax",@progbits
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400115ENTRY (STRCMP)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800116#ifdef USE_AS_STRNCMP
117 PUSH (%ebp)
118#endif
119 movl STR1(%esp), %edx
120 movl STR2(%esp), %eax
121#ifdef USE_AS_STRNCMP
122 movl CNT(%esp), %ebp
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400123 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800124 jb L(less16bytes_sncmp)
125 jmp L(more16bytes)
126#endif
127
128 movzbl (%eax), %ecx
129 cmpb %cl, (%edx)
130 jne L(neq)
131 cmpl $0, %ecx
132 je L(eq)
133
134 movzbl 1(%eax), %ecx
135 cmpb %cl, 1(%edx)
136 jne L(neq)
137 cmpl $0, %ecx
138 je L(eq)
139
140 movzbl 2(%eax), %ecx
141 cmpb %cl, 2(%edx)
142 jne L(neq)
143 cmpl $0, %ecx
144 je L(eq)
145
146 movzbl 3(%eax), %ecx
147 cmpb %cl, 3(%edx)
148 jne L(neq)
149 cmpl $0, %ecx
150 je L(eq)
151
152 movzbl 4(%eax), %ecx
153 cmpb %cl, 4(%edx)
154 jne L(neq)
155 cmpl $0, %ecx
156 je L(eq)
157
158 movzbl 5(%eax), %ecx
159 cmpb %cl, 5(%edx)
160 jne L(neq)
161 cmpl $0, %ecx
162 je L(eq)
163
164 movzbl 6(%eax), %ecx
165 cmpb %cl, 6(%edx)
166 jne L(neq)
167 cmpl $0, %ecx
168 je L(eq)
169
170 movzbl 7(%eax), %ecx
171 cmpb %cl, 7(%edx)
172 jne L(neq)
173 cmpl $0, %ecx
174 je L(eq)
175
176 add $8, %edx
177 add $8, %eax
178#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400179 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800180 lea -8(%ebp), %ebp
181 je L(eq)
182L(more16bytes):
183#endif
184 movl %edx, %ecx
185 and $0xfff, %ecx
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400186 cmpl $0xff0, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800187 ja L(crosspage)
188 mov %eax, %ecx
189 and $0xfff, %ecx
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400190 cmpl $0xff0, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800191 ja L(crosspage)
192 pxor %xmm0, %xmm0
193 movlpd (%eax), %xmm1
194 movlpd (%edx), %xmm2
195 movhpd 8(%eax), %xmm1
196 movhpd 8(%edx), %xmm2
197 pcmpeqb %xmm1, %xmm0
198 pcmpeqb %xmm2, %xmm1
199 psubb %xmm0, %xmm1
200 pmovmskb %xmm1, %ecx
201 sub $0xffff, %ecx
202 jnz L(less16bytes)
203#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400204 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800205 lea -16(%ebp), %ebp
206 jbe L(eq)
207#endif
208 add $16, %eax
209 add $16, %edx
210
211L(crosspage):
212
213 PUSH (%ebx)
214 PUSH (%edi)
215 PUSH (%esi)
Bruce Beare124a5422010-10-11 12:24:41 -0700216#ifdef USE_AS_STRNCMP
217 cfi_remember_state
218#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800219
220 movl %edx, %edi
221 movl %eax, %ecx
222 and $0xf, %ecx
223 and $0xf, %edi
224 xor %ecx, %eax
225 xor %edi, %edx
226 xor %ebx, %ebx
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400227 cmpl %edi, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800228 je L(ashr_0)
229 ja L(bigger)
230 or $0x20, %ebx
231 xchg %edx, %eax
232 xchg %ecx, %edi
233L(bigger):
234 lea 15(%edi), %edi
235 sub %ecx, %edi
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400236 cmpl $8, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800237 jle L(ashr_less_8)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400238 cmpl $14, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800239 je L(ashr_15)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400240 cmpl $13, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800241 je L(ashr_14)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400242 cmpl $12, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800243 je L(ashr_13)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400244 cmpl $11, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800245 je L(ashr_12)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400246 cmpl $10, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800247 je L(ashr_11)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400248 cmpl $9, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800249 je L(ashr_10)
250L(ashr_less_8):
251 je L(ashr_9)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400252 cmpl $7, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800253 je L(ashr_8)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400254 cmpl $6, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800255 je L(ashr_7)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400256 cmpl $5, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800257 je L(ashr_6)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400258 cmpl $4, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800259 je L(ashr_5)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400260 cmpl $3, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800261 je L(ashr_4)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400262 cmpl $2, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800263 je L(ashr_3)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400264 cmpl $1, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800265 je L(ashr_2)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400266 cmpl $0, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800267 je L(ashr_1)
268
269/*
270 * The following cases will be handled by ashr_0
271 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
272 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
273 */
274 .p2align 4
275L(ashr_0):
276 mov $0xffff, %esi
277 movdqa (%eax), %xmm1
278 pxor %xmm0, %xmm0
279 pcmpeqb %xmm1, %xmm0
280 pcmpeqb (%edx), %xmm1
281 psubb %xmm0, %xmm1
282 pmovmskb %xmm1, %edi
283 shr %cl, %esi
284 shr %cl, %edi
285 sub %edi, %esi
286 mov %ecx, %edi
287 jne L(less32bytes)
288 UPDATE_STRNCMP_COUNTER
289 mov $0x10, %ebx
290 mov $0x10, %ecx
291 pxor %xmm0, %xmm0
292 .p2align 4
293L(loop_ashr_0):
294 movdqa (%eax, %ecx), %xmm1
295 movdqa (%edx, %ecx), %xmm2
296
297 pcmpeqb %xmm1, %xmm0
298 pcmpeqb %xmm2, %xmm1
299 psubb %xmm0, %xmm1
300 pmovmskb %xmm1, %esi
301 sub $0xffff, %esi
302 jnz L(exit)
303#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400304 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800305 lea -16(%ebp), %ebp
306 jbe L(more8byteseq)
307#endif
308 add $16, %ecx
309 jmp L(loop_ashr_0)
310
311/*
312 * The following cases will be handled by ashr_1
313 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
314 * n(15) n -15 0(15 +(n-15) - n) ashr_1
315 */
316 .p2align 4
317L(ashr_1):
318 mov $0xffff, %esi
319 pxor %xmm0, %xmm0
320 movdqa (%edx), %xmm2
321 movdqa (%eax), %xmm1
322 pcmpeqb %xmm1, %xmm0
323 pslldq $15, %xmm2
324 pcmpeqb %xmm1, %xmm2
325 psubb %xmm0, %xmm2
326 pmovmskb %xmm2, %edi
327 shr %cl, %esi
328 shr %cl, %edi
329 sub %edi, %esi
330 lea -15(%ecx), %edi
331 jnz L(less32bytes)
332
333 UPDATE_STRNCMP_COUNTER
334
335 movdqa (%edx), %xmm3
336 pxor %xmm0, %xmm0
337 mov $16, %ecx
338 or $1, %ebx
339 lea 1(%edx), %edi
340 and $0xfff, %edi
341 sub $0x1000, %edi
342
343 .p2align 4
344L(loop_ashr_1):
345 add $16, %edi
346 jg L(nibble_ashr_1)
347
348L(gobble_ashr_1):
349 movdqa (%eax, %ecx), %xmm1
350 movdqa (%edx, %ecx), %xmm2
351 movdqa %xmm2, %xmm4
352
353 palignr $1, %xmm3, %xmm2
354
355 pcmpeqb %xmm1, %xmm0
356 pcmpeqb %xmm2, %xmm1
357 psubb %xmm0, %xmm1
358 pmovmskb %xmm1, %esi
359 sub $0xffff, %esi
360 jnz L(exit)
361#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400362 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800363 lea -16(%ebp), %ebp
364 jbe L(more8byteseq)
365#endif
366
367 add $16, %ecx
368 movdqa %xmm4, %xmm3
369
370 add $16, %edi
371 jg L(nibble_ashr_1)
372
373 movdqa (%eax, %ecx), %xmm1
374 movdqa (%edx, %ecx), %xmm2
375 movdqa %xmm2, %xmm4
376
377 palignr $1, %xmm3, %xmm2
378
379 pcmpeqb %xmm1, %xmm0
380 pcmpeqb %xmm2, %xmm1
381 psubb %xmm0, %xmm1
382 pmovmskb %xmm1, %esi
383 sub $0xffff, %esi
384 jnz L(exit)
385
386#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400387 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800388 lea -16(%ebp), %ebp
389 jbe L(more8byteseq)
390#endif
391 add $16, %ecx
392 movdqa %xmm4, %xmm3
393 jmp L(loop_ashr_1)
394
395 .p2align 4
396L(nibble_ashr_1):
397 pcmpeqb %xmm3, %xmm0
398 pmovmskb %xmm0, %esi
399 test $0xfffe, %esi
400 jnz L(ashr_1_exittail)
401
402#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400403 cmpl $15, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800404 jbe L(ashr_1_exittail)
405#endif
406 pxor %xmm0, %xmm0
407 sub $0x1000, %edi
408 jmp L(gobble_ashr_1)
409
410 .p2align 4
411L(ashr_1_exittail):
412 movdqa (%eax, %ecx), %xmm1
413 psrldq $1, %xmm0
414 psrldq $1, %xmm3
415 jmp L(aftertail)
416
417/*
418 * The following cases will be handled by ashr_2
419 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
420 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
421 */
422 .p2align 4
423L(ashr_2):
424 mov $0xffff, %esi
425 pxor %xmm0, %xmm0
426 movdqa (%edx), %xmm2
427 movdqa (%eax), %xmm1
428 pcmpeqb %xmm1, %xmm0
429 pslldq $14, %xmm2
430 pcmpeqb %xmm1, %xmm2
431 psubb %xmm0, %xmm2
432 pmovmskb %xmm2, %edi
433 shr %cl, %esi
434 shr %cl, %edi
435 sub %edi, %esi
436 lea -14(%ecx), %edi
437 jnz L(less32bytes)
438
439 UPDATE_STRNCMP_COUNTER
440
441 movdqa (%edx), %xmm3
442 pxor %xmm0, %xmm0
443 mov $16, %ecx
444 or $2, %ebx
445 lea 2(%edx), %edi
446 and $0xfff, %edi
447 sub $0x1000, %edi
448
449 .p2align 4
450L(loop_ashr_2):
451 add $16, %edi
452 jg L(nibble_ashr_2)
453
454L(gobble_ashr_2):
455 movdqa (%eax, %ecx), %xmm1
456 movdqa (%edx, %ecx), %xmm2
457 movdqa %xmm2, %xmm4
458
459 palignr $2, %xmm3, %xmm2
460
461 pcmpeqb %xmm1, %xmm0
462 pcmpeqb %xmm2, %xmm1
463 psubb %xmm0, %xmm1
464 pmovmskb %xmm1, %esi
465 sub $0xffff, %esi
466 jnz L(exit)
467
468#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400469 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800470 lea -16(%ebp), %ebp
471 jbe L(more8byteseq)
472#endif
473 add $16, %ecx
474 movdqa %xmm4, %xmm3
475
476 add $16, %edi
477 jg L(nibble_ashr_2)
478
479 movdqa (%eax, %ecx), %xmm1
480 movdqa (%edx, %ecx), %xmm2
481 movdqa %xmm2, %xmm4
482
483 palignr $2, %xmm3, %xmm2
484
485 pcmpeqb %xmm1, %xmm0
486 pcmpeqb %xmm2, %xmm1
487 psubb %xmm0, %xmm1
488 pmovmskb %xmm1, %esi
489 sub $0xffff, %esi
490 jnz L(exit)
491
492#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400493 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800494 lea -16(%ebp), %ebp
495 jbe L(more8byteseq)
496#endif
497 add $16, %ecx
498 movdqa %xmm4, %xmm3
499 jmp L(loop_ashr_2)
500
501 .p2align 4
502L(nibble_ashr_2):
503 pcmpeqb %xmm3, %xmm0
504 pmovmskb %xmm0, %esi
505 test $0xfffc, %esi
506 jnz L(ashr_2_exittail)
507
508#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400509 cmpl $14, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800510 jbe L(ashr_2_exittail)
511#endif
512
513 pxor %xmm0, %xmm0
514 sub $0x1000, %edi
515 jmp L(gobble_ashr_2)
516
517 .p2align 4
518L(ashr_2_exittail):
519 movdqa (%eax, %ecx), %xmm1
520 psrldq $2, %xmm0
521 psrldq $2, %xmm3
522 jmp L(aftertail)
523
524/*
525 * The following cases will be handled by ashr_3
526 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
527 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
528 */
529 .p2align 4
530L(ashr_3):
531 mov $0xffff, %esi
532 pxor %xmm0, %xmm0
533 movdqa (%edx), %xmm2
534 movdqa (%eax), %xmm1
535 pcmpeqb %xmm1, %xmm0
536 pslldq $13, %xmm2
537 pcmpeqb %xmm1, %xmm2
538 psubb %xmm0, %xmm2
539 pmovmskb %xmm2, %edi
540 shr %cl, %esi
541 shr %cl, %edi
542 sub %edi, %esi
543 lea -13(%ecx), %edi
544 jnz L(less32bytes)
545
546 UPDATE_STRNCMP_COUNTER
547
548 movdqa (%edx), %xmm3
549 pxor %xmm0, %xmm0
550 mov $16, %ecx
551 or $3, %ebx
552 lea 3(%edx), %edi
553 and $0xfff, %edi
554 sub $0x1000, %edi
555
556 .p2align 4
557L(loop_ashr_3):
558 add $16, %edi
559 jg L(nibble_ashr_3)
560
561L(gobble_ashr_3):
562 movdqa (%eax, %ecx), %xmm1
563 movdqa (%edx, %ecx), %xmm2
564 movdqa %xmm2, %xmm4
565
566 palignr $3, %xmm3, %xmm2
567
568 pcmpeqb %xmm1, %xmm0
569 pcmpeqb %xmm2, %xmm1
570 psubb %xmm0, %xmm1
571 pmovmskb %xmm1, %esi
572 sub $0xffff, %esi
573 jnz L(exit)
574
575#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400576 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800577 lea -16(%ebp), %ebp
578 jbe L(more8byteseq)
579#endif
580 add $16, %ecx
581 movdqa %xmm4, %xmm3
582
583 add $16, %edi
584 jg L(nibble_ashr_3)
585
586 movdqa (%eax, %ecx), %xmm1
587 movdqa (%edx, %ecx), %xmm2
588 movdqa %xmm2, %xmm4
589
590 palignr $3, %xmm3, %xmm2
591
592 pcmpeqb %xmm1, %xmm0
593 pcmpeqb %xmm2, %xmm1
594 psubb %xmm0, %xmm1
595 pmovmskb %xmm1, %esi
596 sub $0xffff, %esi
597 jnz L(exit)
598
599#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400600 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800601 lea -16(%ebp), %ebp
602 jbe L(more8byteseq)
603#endif
604 add $16, %ecx
605 movdqa %xmm4, %xmm3
606 jmp L(loop_ashr_3)
607
608 .p2align 4
609L(nibble_ashr_3):
610 pcmpeqb %xmm3, %xmm0
611 pmovmskb %xmm0, %esi
612 test $0xfff8, %esi
613 jnz L(ashr_3_exittail)
614
615#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400616 cmpl $13, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800617 jbe L(ashr_3_exittail)
618#endif
619 pxor %xmm0, %xmm0
620 sub $0x1000, %edi
621 jmp L(gobble_ashr_3)
622
623 .p2align 4
624L(ashr_3_exittail):
625 movdqa (%eax, %ecx), %xmm1
626 psrldq $3, %xmm0
627 psrldq $3, %xmm3
628 jmp L(aftertail)
629
630/*
631 * The following cases will be handled by ashr_4
632 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
633 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
634 */
635 .p2align 4
636L(ashr_4):
637 mov $0xffff, %esi
638 pxor %xmm0, %xmm0
639 movdqa (%edx), %xmm2
640 movdqa (%eax), %xmm1
641 pcmpeqb %xmm1, %xmm0
642 pslldq $12, %xmm2
643 pcmpeqb %xmm1, %xmm2
644 psubb %xmm0, %xmm2
645 pmovmskb %xmm2, %edi
646 shr %cl, %esi
647 shr %cl, %edi
648 sub %edi, %esi
649 lea -12(%ecx), %edi
650 jnz L(less32bytes)
651
652 UPDATE_STRNCMP_COUNTER
653
654 movdqa (%edx), %xmm3
655 pxor %xmm0, %xmm0
656 mov $16, %ecx
657 or $4, %ebx
658 lea 4(%edx), %edi
659 and $0xfff, %edi
660 sub $0x1000, %edi
661
662 .p2align 4
663L(loop_ashr_4):
664 add $16, %edi
665 jg L(nibble_ashr_4)
666
667L(gobble_ashr_4):
668 movdqa (%eax, %ecx), %xmm1
669 movdqa (%edx, %ecx), %xmm2
670 movdqa %xmm2, %xmm4
671
672 palignr $4, %xmm3, %xmm2
673
674 pcmpeqb %xmm1, %xmm0
675 pcmpeqb %xmm2, %xmm1
676 psubb %xmm0, %xmm1
677 pmovmskb %xmm1, %esi
678 sub $0xffff, %esi
679 jnz L(exit)
680
681#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400682 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800683 lea -16(%ebp), %ebp
684 jbe L(more8byteseq)
685#endif
686
687 add $16, %ecx
688 movdqa %xmm4, %xmm3
689
690 add $16, %edi
691 jg L(nibble_ashr_4)
692
693 movdqa (%eax, %ecx), %xmm1
694 movdqa (%edx, %ecx), %xmm2
695 movdqa %xmm2, %xmm4
696
697 palignr $4, %xmm3, %xmm2
698
699 pcmpeqb %xmm1, %xmm0
700 pcmpeqb %xmm2, %xmm1
701 psubb %xmm0, %xmm1
702 pmovmskb %xmm1, %esi
703 sub $0xffff, %esi
704 jnz L(exit)
705
706#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400707 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800708 lea -16(%ebp), %ebp
709 jbe L(more8byteseq)
710#endif
711
712 add $16, %ecx
713 movdqa %xmm4, %xmm3
714 jmp L(loop_ashr_4)
715
716 .p2align 4
717L(nibble_ashr_4):
718 pcmpeqb %xmm3, %xmm0
719 pmovmskb %xmm0, %esi
720 test $0xfff0, %esi
721 jnz L(ashr_4_exittail)
722
723#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400724 cmpl $12, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800725 jbe L(ashr_4_exittail)
726#endif
727
728 pxor %xmm0, %xmm0
729 sub $0x1000, %edi
730 jmp L(gobble_ashr_4)
731
732 .p2align 4
733L(ashr_4_exittail):
734 movdqa (%eax, %ecx), %xmm1
735 psrldq $4, %xmm0
736 psrldq $4, %xmm3
737 jmp L(aftertail)
738
739/*
740 * The following cases will be handled by ashr_5
741 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
742 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
743 */
744 .p2align 4
745L(ashr_5):
746 mov $0xffff, %esi
747 pxor %xmm0, %xmm0
748 movdqa (%edx), %xmm2
749 movdqa (%eax), %xmm1
750 pcmpeqb %xmm1, %xmm0
751 pslldq $11, %xmm2
752 pcmpeqb %xmm1, %xmm2
753 psubb %xmm0, %xmm2
754 pmovmskb %xmm2, %edi
755 shr %cl, %esi
756 shr %cl, %edi
757 sub %edi, %esi
758 lea -11(%ecx), %edi
759 jnz L(less32bytes)
760
761 UPDATE_STRNCMP_COUNTER
762
763 movdqa (%edx), %xmm3
764 pxor %xmm0, %xmm0
765 mov $16, %ecx
766 or $5, %ebx
767 lea 5(%edx), %edi
768 and $0xfff, %edi
769 sub $0x1000, %edi
770
771 .p2align 4
772L(loop_ashr_5):
773 add $16, %edi
774 jg L(nibble_ashr_5)
775
776L(gobble_ashr_5):
777 movdqa (%eax, %ecx), %xmm1
778 movdqa (%edx, %ecx), %xmm2
779 movdqa %xmm2, %xmm4
780
781 palignr $5, %xmm3, %xmm2
782
783 pcmpeqb %xmm1, %xmm0
784 pcmpeqb %xmm2, %xmm1
785 psubb %xmm0, %xmm1
786 pmovmskb %xmm1, %esi
787 sub $0xffff, %esi
788 jnz L(exit)
789
790#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400791 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800792 lea -16(%ebp), %ebp
793 jbe L(more8byteseq)
794#endif
795 add $16, %ecx
796 movdqa %xmm4, %xmm3
797
798 add $16, %edi
799 jg L(nibble_ashr_5)
800
801 movdqa (%eax, %ecx), %xmm1
802 movdqa (%edx, %ecx), %xmm2
803 movdqa %xmm2, %xmm4
804
805 palignr $5, %xmm3, %xmm2
806
807 pcmpeqb %xmm1, %xmm0
808 pcmpeqb %xmm2, %xmm1
809 psubb %xmm0, %xmm1
810 pmovmskb %xmm1, %esi
811 sub $0xffff, %esi
812 jnz L(exit)
813
814#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400815 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800816 lea -16(%ebp), %ebp
817 jbe L(more8byteseq)
818#endif
819 add $16, %ecx
820 movdqa %xmm4, %xmm3
821 jmp L(loop_ashr_5)
822
823 .p2align 4
824L(nibble_ashr_5):
825 pcmpeqb %xmm3, %xmm0
826 pmovmskb %xmm0, %esi
827 test $0xffe0, %esi
828 jnz L(ashr_5_exittail)
829
830#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400831 cmpl $11, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800832 jbe L(ashr_5_exittail)
833#endif
834 pxor %xmm0, %xmm0
835 sub $0x1000, %edi
836 jmp L(gobble_ashr_5)
837
838 .p2align 4
839L(ashr_5_exittail):
840 movdqa (%eax, %ecx), %xmm1
841 psrldq $5, %xmm0
842 psrldq $5, %xmm3
843 jmp L(aftertail)
844
845/*
846 * The following cases will be handled by ashr_6
847 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
848 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
849 */
850
851 .p2align 4
852L(ashr_6):
853 mov $0xffff, %esi
854 pxor %xmm0, %xmm0
855 movdqa (%edx), %xmm2
856 movdqa (%eax), %xmm1
857 pcmpeqb %xmm1, %xmm0
858 pslldq $10, %xmm2
859 pcmpeqb %xmm1, %xmm2
860 psubb %xmm0, %xmm2
861 pmovmskb %xmm2, %edi
862 shr %cl, %esi
863 shr %cl, %edi
864 sub %edi, %esi
865 lea -10(%ecx), %edi
866 jnz L(less32bytes)
867
868 UPDATE_STRNCMP_COUNTER
869
870 movdqa (%edx), %xmm3
871 pxor %xmm0, %xmm0
872 mov $16, %ecx
873 or $6, %ebx
874 lea 6(%edx), %edi
875 and $0xfff, %edi
876 sub $0x1000, %edi
877
878 .p2align 4
879L(loop_ashr_6):
880 add $16, %edi
881 jg L(nibble_ashr_6)
882
883L(gobble_ashr_6):
884 movdqa (%eax, %ecx), %xmm1
885 movdqa (%edx, %ecx), %xmm2
886 movdqa %xmm2, %xmm4
887
888 palignr $6, %xmm3, %xmm2
889
890 pcmpeqb %xmm1, %xmm0
891 pcmpeqb %xmm2, %xmm1
892 psubb %xmm0, %xmm1
893 pmovmskb %xmm1, %esi
894 sub $0xffff, %esi
895 jnz L(exit)
896
897#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400898 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800899 lea -16(%ebp), %ebp
900 jbe L(more8byteseq)
901#endif
902
903 add $16, %ecx
904 movdqa %xmm4, %xmm3
905
906 add $16, %edi
907 jg L(nibble_ashr_6)
908
909 movdqa (%eax, %ecx), %xmm1
910 movdqa (%edx, %ecx), %xmm2
911 movdqa %xmm2, %xmm4
912
913 palignr $6, %xmm3, %xmm2
914
915 pcmpeqb %xmm1, %xmm0
916 pcmpeqb %xmm2, %xmm1
917 psubb %xmm0, %xmm1
918 pmovmskb %xmm1, %esi
919 sub $0xffff, %esi
920 jnz L(exit)
921#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400922 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800923 lea -16(%ebp), %ebp
924 jbe L(more8byteseq)
925#endif
926
927 add $16, %ecx
928 movdqa %xmm4, %xmm3
929 jmp L(loop_ashr_6)
930
931 .p2align 4
932L(nibble_ashr_6):
933 pcmpeqb %xmm3, %xmm0
934 pmovmskb %xmm0, %esi
935 test $0xffc0, %esi
936 jnz L(ashr_6_exittail)
937
938#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400939 cmpl $10, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800940 jbe L(ashr_6_exittail)
941#endif
942 pxor %xmm0, %xmm0
943 sub $0x1000, %edi
944 jmp L(gobble_ashr_6)
945
946 .p2align 4
947L(ashr_6_exittail):
948 movdqa (%eax, %ecx), %xmm1
949 psrldq $6, %xmm0
950 psrldq $6, %xmm3
951 jmp L(aftertail)
952
953/*
954 * The following cases will be handled by ashr_7
955 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
956 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
957 */
958
959 .p2align 4
960L(ashr_7):
961 mov $0xffff, %esi
962 pxor %xmm0, %xmm0
963 movdqa (%edx), %xmm2
964 movdqa (%eax), %xmm1
965 pcmpeqb %xmm1, %xmm0
966 pslldq $9, %xmm2
967 pcmpeqb %xmm1, %xmm2
968 psubb %xmm0, %xmm2
969 pmovmskb %xmm2, %edi
970 shr %cl, %esi
971 shr %cl, %edi
972 sub %edi, %esi
973 lea -9(%ecx), %edi
974 jnz L(less32bytes)
975
976 UPDATE_STRNCMP_COUNTER
977
978 movdqa (%edx), %xmm3
979 pxor %xmm0, %xmm0
980 mov $16, %ecx
981 or $7, %ebx
982 lea 8(%edx), %edi
983 and $0xfff, %edi
984 sub $0x1000, %edi
985
986 .p2align 4
987L(loop_ashr_7):
988 add $16, %edi
989 jg L(nibble_ashr_7)
990
991L(gobble_ashr_7):
992 movdqa (%eax, %ecx), %xmm1
993 movdqa (%edx, %ecx), %xmm2
994 movdqa %xmm2, %xmm4
995
996 palignr $7, %xmm3, %xmm2
997
998 pcmpeqb %xmm1, %xmm0
999 pcmpeqb %xmm2, %xmm1
1000 psubb %xmm0, %xmm1
1001 pmovmskb %xmm1, %esi
1002 sub $0xffff, %esi
1003 jnz L(exit)
1004
1005#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001006 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001007 lea -16(%ebp), %ebp
1008 jbe L(more8byteseq)
1009#endif
1010
1011 add $16, %ecx
1012 movdqa %xmm4, %xmm3
1013
1014 add $16, %edi
1015 jg L(nibble_ashr_7)
1016
1017 movdqa (%eax, %ecx), %xmm1
1018 movdqa (%edx, %ecx), %xmm2
1019 movdqa %xmm2, %xmm4
1020
1021 palignr $7, %xmm3, %xmm2
1022
1023 pcmpeqb %xmm1, %xmm0
1024 pcmpeqb %xmm2, %xmm1
1025 psubb %xmm0, %xmm1
1026 pmovmskb %xmm1, %esi
1027 sub $0xffff, %esi
1028 jnz L(exit)
1029
1030#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001031 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001032 lea -16(%ebp), %ebp
1033 jbe L(more8byteseq)
1034#endif
1035
1036 add $16, %ecx
1037 movdqa %xmm4, %xmm3
1038 jmp L(loop_ashr_7)
1039
1040 .p2align 4
1041L(nibble_ashr_7):
1042 pcmpeqb %xmm3, %xmm0
1043 pmovmskb %xmm0, %esi
1044 test $0xff80, %esi
1045 jnz L(ashr_7_exittail)
1046
1047#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001048 cmpl $9, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001049 jbe L(ashr_7_exittail)
1050#endif
1051 pxor %xmm0, %xmm0
1052 pxor %xmm0, %xmm0
1053 sub $0x1000, %edi
1054 jmp L(gobble_ashr_7)
1055
1056 .p2align 4
1057L(ashr_7_exittail):
1058 movdqa (%eax, %ecx), %xmm1
1059 psrldq $7, %xmm0
1060 psrldq $7, %xmm3
1061 jmp L(aftertail)
1062
1063/*
1064 * The following cases will be handled by ashr_8
1065 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1066 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1067 */
1068 .p2align 4
1069L(ashr_8):
1070 mov $0xffff, %esi
1071 pxor %xmm0, %xmm0
1072 movdqa (%edx), %xmm2
1073 movdqa (%eax), %xmm1
1074 pcmpeqb %xmm1, %xmm0
1075 pslldq $8, %xmm2
1076 pcmpeqb %xmm1, %xmm2
1077 psubb %xmm0, %xmm2
1078 pmovmskb %xmm2, %edi
1079 shr %cl, %esi
1080 shr %cl, %edi
1081 sub %edi, %esi
1082 lea -8(%ecx), %edi
1083 jnz L(less32bytes)
1084
1085 UPDATE_STRNCMP_COUNTER
1086
1087 movdqa (%edx), %xmm3
1088 pxor %xmm0, %xmm0
1089 mov $16, %ecx
1090 or $8, %ebx
1091 lea 8(%edx), %edi
1092 and $0xfff, %edi
1093 sub $0x1000, %edi
1094
1095 .p2align 4
1096L(loop_ashr_8):
1097 add $16, %edi
1098 jg L(nibble_ashr_8)
1099
1100L(gobble_ashr_8):
1101 movdqa (%eax, %ecx), %xmm1
1102 movdqa (%edx, %ecx), %xmm2
1103 movdqa %xmm2, %xmm4
1104
1105 palignr $8, %xmm3, %xmm2
1106
1107 pcmpeqb %xmm1, %xmm0
1108 pcmpeqb %xmm2, %xmm1
1109 psubb %xmm0, %xmm1
1110 pmovmskb %xmm1, %esi
1111 sub $0xffff, %esi
1112 jnz L(exit)
1113
1114#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001115 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001116 lea -16(%ebp), %ebp
1117 jbe L(more8byteseq)
1118#endif
1119 add $16, %ecx
1120 movdqa %xmm4, %xmm3
1121
1122 add $16, %edi
1123 jg L(nibble_ashr_8)
1124
1125 movdqa (%eax, %ecx), %xmm1
1126 movdqa (%edx, %ecx), %xmm2
1127 movdqa %xmm2, %xmm4
1128
1129 palignr $8, %xmm3, %xmm2
1130
1131 pcmpeqb %xmm1, %xmm0
1132 pcmpeqb %xmm2, %xmm1
1133 psubb %xmm0, %xmm1
1134 pmovmskb %xmm1, %esi
1135 sub $0xffff, %esi
1136 jnz L(exit)
1137
1138#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001139 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001140 lea -16(%ebp), %ebp
1141 jbe L(more8byteseq)
1142#endif
1143 add $16, %ecx
1144 movdqa %xmm4, %xmm3
1145 jmp L(loop_ashr_8)
1146
1147 .p2align 4
1148L(nibble_ashr_8):
1149 pcmpeqb %xmm3, %xmm0
1150 pmovmskb %xmm0, %esi
1151 test $0xff00, %esi
1152 jnz L(ashr_8_exittail)
1153
1154#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001155 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001156 jbe L(ashr_8_exittail)
1157#endif
1158 pxor %xmm0, %xmm0
1159 pxor %xmm0, %xmm0
1160 sub $0x1000, %edi
1161 jmp L(gobble_ashr_8)
1162
1163 .p2align 4
1164L(ashr_8_exittail):
1165 movdqa (%eax, %ecx), %xmm1
1166 psrldq $8, %xmm0
1167 psrldq $8, %xmm3
1168 jmp L(aftertail)
1169
1170/*
1171 * The following cases will be handled by ashr_9
1172 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1173 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1174 */
1175 .p2align 4
1176L(ashr_9):
1177 mov $0xffff, %esi
1178 pxor %xmm0, %xmm0
1179 movdqa (%edx), %xmm2
1180 movdqa (%eax), %xmm1
1181 pcmpeqb %xmm1, %xmm0
1182 pslldq $7, %xmm2
1183 pcmpeqb %xmm1, %xmm2
1184 psubb %xmm0, %xmm2
1185 pmovmskb %xmm2, %edi
1186 shr %cl, %esi
1187 shr %cl, %edi
1188 sub %edi, %esi
1189 lea -7(%ecx), %edi
1190 jnz L(less32bytes)
1191
1192 UPDATE_STRNCMP_COUNTER
1193
1194 movdqa (%edx), %xmm3
1195 pxor %xmm0, %xmm0
1196 mov $16, %ecx
1197 or $9, %ebx
1198 lea 9(%edx), %edi
1199 and $0xfff, %edi
1200 sub $0x1000, %edi
1201
1202 .p2align 4
1203L(loop_ashr_9):
1204 add $16, %edi
1205 jg L(nibble_ashr_9)
1206
1207L(gobble_ashr_9):
1208 movdqa (%eax, %ecx), %xmm1
1209 movdqa (%edx, %ecx), %xmm2
1210 movdqa %xmm2, %xmm4
1211
1212 palignr $9, %xmm3, %xmm2
1213
1214 pcmpeqb %xmm1, %xmm0
1215 pcmpeqb %xmm2, %xmm1
1216 psubb %xmm0, %xmm1
1217 pmovmskb %xmm1, %esi
1218 sub $0xffff, %esi
1219 jnz L(exit)
1220
1221#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001222 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001223 lea -16(%ebp), %ebp
1224 jbe L(more8byteseq)
1225#endif
1226 add $16, %ecx
1227 movdqa %xmm4, %xmm3
1228
1229 add $16, %edi
1230 jg L(nibble_ashr_9)
1231
1232 movdqa (%eax, %ecx), %xmm1
1233 movdqa (%edx, %ecx), %xmm2
1234 movdqa %xmm2, %xmm4
1235
1236 palignr $9, %xmm3, %xmm2
1237
1238 pcmpeqb %xmm1, %xmm0
1239 pcmpeqb %xmm2, %xmm1
1240 psubb %xmm0, %xmm1
1241 pmovmskb %xmm1, %esi
1242 sub $0xffff, %esi
1243 jnz L(exit)
1244
1245#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001246 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001247 lea -16(%ebp), %ebp
1248 jbe L(more8byteseq)
1249#endif
1250 add $16, %ecx
1251 movdqa %xmm4, %xmm3
1252 jmp L(loop_ashr_9)
1253
1254 .p2align 4
1255L(nibble_ashr_9):
1256 pcmpeqb %xmm3, %xmm0
1257 pmovmskb %xmm0, %esi
1258 test $0xfe00, %esi
1259 jnz L(ashr_9_exittail)
1260
1261#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001262 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001263 jbe L(ashr_9_exittail)
1264#endif
1265 pxor %xmm0, %xmm0
1266 sub $0x1000, %edi
1267 jmp L(gobble_ashr_9)
1268
1269 .p2align 4
1270L(ashr_9_exittail):
1271 movdqa (%eax, %ecx), %xmm1
1272 psrldq $9, %xmm0
1273 psrldq $9, %xmm3
1274 jmp L(aftertail)
1275
1276/*
1277 * The following cases will be handled by ashr_10
1278 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1279 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1280 */
1281 .p2align 4
1282L(ashr_10):
1283 mov $0xffff, %esi
1284 pxor %xmm0, %xmm0
1285 movdqa (%edx), %xmm2
1286 movdqa (%eax), %xmm1
1287 pcmpeqb %xmm1, %xmm0
1288 pslldq $6, %xmm2
1289 pcmpeqb %xmm1, %xmm2
1290 psubb %xmm0, %xmm2
1291 pmovmskb %xmm2, %edi
1292 shr %cl, %esi
1293 shr %cl, %edi
1294 sub %edi, %esi
1295 lea -6(%ecx), %edi
1296 jnz L(less32bytes)
1297
1298 UPDATE_STRNCMP_COUNTER
1299
1300 movdqa (%edx), %xmm3
1301 pxor %xmm0, %xmm0
1302 mov $16, %ecx
1303 or $10, %ebx
1304 lea 10(%edx), %edi
1305 and $0xfff, %edi
1306 sub $0x1000, %edi
1307
1308 .p2align 4
1309L(loop_ashr_10):
1310 add $16, %edi
1311 jg L(nibble_ashr_10)
1312
1313L(gobble_ashr_10):
1314 movdqa (%eax, %ecx), %xmm1
1315 movdqa (%edx, %ecx), %xmm2
1316 movdqa %xmm2, %xmm4
1317
1318 palignr $10, %xmm3, %xmm2
1319
1320 pcmpeqb %xmm1, %xmm0
1321 pcmpeqb %xmm2, %xmm1
1322 psubb %xmm0, %xmm1
1323 pmovmskb %xmm1, %esi
1324 sub $0xffff, %esi
1325 jnz L(exit)
1326
1327#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001328 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001329 lea -16(%ebp), %ebp
1330 jbe L(more8byteseq)
1331#endif
1332 add $16, %ecx
1333 movdqa %xmm4, %xmm3
1334
1335 add $16, %edi
1336 jg L(nibble_ashr_10)
1337
1338 movdqa (%eax, %ecx), %xmm1
1339 movdqa (%edx, %ecx), %xmm2
1340 movdqa %xmm2, %xmm4
1341
1342 palignr $10, %xmm3, %xmm2
1343
1344 pcmpeqb %xmm1, %xmm0
1345 pcmpeqb %xmm2, %xmm1
1346 psubb %xmm0, %xmm1
1347 pmovmskb %xmm1, %esi
1348 sub $0xffff, %esi
1349 jnz L(exit)
1350
1351#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001352 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001353 lea -16(%ebp), %ebp
1354 jbe L(more8byteseq)
1355#endif
1356 add $16, %ecx
1357 movdqa %xmm4, %xmm3
1358 jmp L(loop_ashr_10)
1359
1360 .p2align 4
1361L(nibble_ashr_10):
1362 pcmpeqb %xmm3, %xmm0
1363 pmovmskb %xmm0, %esi
1364 test $0xfc00, %esi
1365 jnz L(ashr_10_exittail)
1366
1367#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001368 cmpl $6, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001369 jbe L(ashr_10_exittail)
1370#endif
1371 pxor %xmm0, %xmm0
1372 sub $0x1000, %edi
1373 jmp L(gobble_ashr_10)
1374
1375 .p2align 4
1376L(ashr_10_exittail):
1377 movdqa (%eax, %ecx), %xmm1
1378 psrldq $10, %xmm0
1379 psrldq $10, %xmm3
1380 jmp L(aftertail)
1381
1382/*
1383 * The following cases will be handled by ashr_11
1384 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1385 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1386 */
1387 .p2align 4
1388L(ashr_11):
1389 mov $0xffff, %esi
1390 pxor %xmm0, %xmm0
1391 movdqa (%edx), %xmm2
1392 movdqa (%eax), %xmm1
1393 pcmpeqb %xmm1, %xmm0
1394 pslldq $5, %xmm2
1395 pcmpeqb %xmm1, %xmm2
1396 psubb %xmm0, %xmm2
1397 pmovmskb %xmm2, %edi
1398 shr %cl, %esi
1399 shr %cl, %edi
1400 sub %edi, %esi
1401 lea -5(%ecx), %edi
1402 jnz L(less32bytes)
1403
1404 UPDATE_STRNCMP_COUNTER
1405
1406 movdqa (%edx), %xmm3
1407 pxor %xmm0, %xmm0
1408 mov $16, %ecx
1409 or $11, %ebx
1410 lea 11(%edx), %edi
1411 and $0xfff, %edi
1412 sub $0x1000, %edi
1413
1414 .p2align 4
1415L(loop_ashr_11):
1416 add $16, %edi
1417 jg L(nibble_ashr_11)
1418
1419L(gobble_ashr_11):
1420 movdqa (%eax, %ecx), %xmm1
1421 movdqa (%edx, %ecx), %xmm2
1422 movdqa %xmm2, %xmm4
1423
1424 palignr $11, %xmm3, %xmm2
1425
1426 pcmpeqb %xmm1, %xmm0
1427 pcmpeqb %xmm2, %xmm1
1428 psubb %xmm0, %xmm1
1429 pmovmskb %xmm1, %esi
1430 sub $0xffff, %esi
1431 jnz L(exit)
1432
1433#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001434 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001435 lea -16(%ebp), %ebp
1436 jbe L(more8byteseq)
1437#endif
1438 add $16, %ecx
1439 movdqa %xmm4, %xmm3
1440
1441 add $16, %edi
1442 jg L(nibble_ashr_11)
1443
1444 movdqa (%eax, %ecx), %xmm1
1445 movdqa (%edx, %ecx), %xmm2
1446 movdqa %xmm2, %xmm4
1447
1448 palignr $11, %xmm3, %xmm2
1449
1450 pcmpeqb %xmm1, %xmm0
1451 pcmpeqb %xmm2, %xmm1
1452 psubb %xmm0, %xmm1
1453 pmovmskb %xmm1, %esi
1454 sub $0xffff, %esi
1455 jnz L(exit)
1456
1457#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001458 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001459 lea -16(%ebp), %ebp
1460 jbe L(more8byteseq)
1461#endif
1462 add $16, %ecx
1463 movdqa %xmm4, %xmm3
1464 jmp L(loop_ashr_11)
1465
1466 .p2align 4
1467L(nibble_ashr_11):
1468 pcmpeqb %xmm3, %xmm0
1469 pmovmskb %xmm0, %esi
1470 test $0xf800, %esi
1471 jnz L(ashr_11_exittail)
1472
1473#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001474 cmpl $5, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001475 jbe L(ashr_11_exittail)
1476#endif
1477 pxor %xmm0, %xmm0
1478 sub $0x1000, %edi
1479 jmp L(gobble_ashr_11)
1480
1481 .p2align 4
1482L(ashr_11_exittail):
1483 movdqa (%eax, %ecx), %xmm1
1484 psrldq $11, %xmm0
1485 psrldq $11, %xmm3
1486 jmp L(aftertail)
1487
1488/*
1489 * The following cases will be handled by ashr_12
1490 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1491 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1492 */
1493 .p2align 4
1494L(ashr_12):
1495 mov $0xffff, %esi
1496 pxor %xmm0, %xmm0
1497 movdqa (%edx), %xmm2
1498 movdqa (%eax), %xmm1
1499 pcmpeqb %xmm1, %xmm0
1500 pslldq $4, %xmm2
1501 pcmpeqb %xmm1, %xmm2
1502 psubb %xmm0, %xmm2
1503 pmovmskb %xmm2, %edi
1504 shr %cl, %esi
1505 shr %cl, %edi
1506 sub %edi, %esi
1507 lea -4(%ecx), %edi
1508 jnz L(less32bytes)
1509
1510 UPDATE_STRNCMP_COUNTER
1511
1512 movdqa (%edx), %xmm3
1513 pxor %xmm0, %xmm0
1514 mov $16, %ecx
1515 or $12, %ebx
1516 lea 12(%edx), %edi
1517 and $0xfff, %edi
1518 sub $0x1000, %edi
1519
1520 .p2align 4
1521L(loop_ashr_12):
1522 add $16, %edi
1523 jg L(nibble_ashr_12)
1524
1525L(gobble_ashr_12):
1526 movdqa (%eax, %ecx), %xmm1
1527 movdqa (%edx, %ecx), %xmm2
1528 movdqa %xmm2, %xmm4
1529
1530 palignr $12, %xmm3, %xmm2
1531
1532 pcmpeqb %xmm1, %xmm0
1533 pcmpeqb %xmm2, %xmm1
1534 psubb %xmm0, %xmm1
1535 pmovmskb %xmm1, %esi
1536 sub $0xffff, %esi
1537 jnz L(exit)
1538
Bruce Beare124a5422010-10-11 12:24:41 -07001539#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001540 cmpl $16, %ebp
Bruce Beare124a5422010-10-11 12:24:41 -07001541 lea -16(%ebp), %ebp
1542 jbe L(more8byteseq)
1543#endif
1544
Bruce Beare8ff1a272010-03-04 11:03:37 -08001545 add $16, %ecx
1546 movdqa %xmm4, %xmm3
1547
1548 add $16, %edi
1549 jg L(nibble_ashr_12)
1550
Bruce Beare8ff1a272010-03-04 11:03:37 -08001551 movdqa (%eax, %ecx), %xmm1
1552 movdqa (%edx, %ecx), %xmm2
1553 movdqa %xmm2, %xmm4
1554
1555 palignr $12, %xmm3, %xmm2
1556
1557 pcmpeqb %xmm1, %xmm0
1558 pcmpeqb %xmm2, %xmm1
1559 psubb %xmm0, %xmm1
1560 pmovmskb %xmm1, %esi
1561 sub $0xffff, %esi
1562 jnz L(exit)
1563
1564#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001565 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001566 lea -16(%ebp), %ebp
1567 jbe L(more8byteseq)
1568#endif
1569 add $16, %ecx
1570 movdqa %xmm4, %xmm3
1571 jmp L(loop_ashr_12)
1572
1573 .p2align 4
1574L(nibble_ashr_12):
1575 pcmpeqb %xmm3, %xmm0
1576 pmovmskb %xmm0, %esi
1577 test $0xf000, %esi
1578 jnz L(ashr_12_exittail)
1579
1580#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001581 cmpl $4, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001582 jbe L(ashr_12_exittail)
1583#endif
1584 pxor %xmm0, %xmm0
1585 sub $0x1000, %edi
1586 jmp L(gobble_ashr_12)
1587
1588 .p2align 4
1589L(ashr_12_exittail):
1590 movdqa (%eax, %ecx), %xmm1
1591 psrldq $12, %xmm0
1592 psrldq $12, %xmm3
1593 jmp L(aftertail)
1594
1595/*
1596 * The following cases will be handled by ashr_13
1597 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1598 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1599 */
1600 .p2align 4
1601L(ashr_13):
1602 mov $0xffff, %esi
1603 pxor %xmm0, %xmm0
1604 movdqa (%edx), %xmm2
1605 movdqa (%eax), %xmm1
1606 pcmpeqb %xmm1, %xmm0
1607 pslldq $3, %xmm2
1608 pcmpeqb %xmm1, %xmm2
1609 psubb %xmm0, %xmm2
1610 pmovmskb %xmm2, %edi
1611 shr %cl, %esi
1612 shr %cl, %edi
1613 sub %edi, %esi
1614 lea -3(%ecx), %edi
1615 jnz L(less32bytes)
1616
1617 UPDATE_STRNCMP_COUNTER
1618
1619 movdqa (%edx), %xmm3
1620 pxor %xmm0, %xmm0
1621 mov $16, %ecx
1622 or $13, %ebx
1623 lea 13(%edx), %edi
1624 and $0xfff, %edi
1625 sub $0x1000, %edi
1626
1627 .p2align 4
1628L(loop_ashr_13):
1629 add $16, %edi
1630 jg L(nibble_ashr_13)
1631
1632L(gobble_ashr_13):
1633 movdqa (%eax, %ecx), %xmm1
1634 movdqa (%edx, %ecx), %xmm2
1635 movdqa %xmm2, %xmm4
1636
1637 palignr $13, %xmm3, %xmm2
1638
1639 pcmpeqb %xmm1, %xmm0
1640 pcmpeqb %xmm2, %xmm1
1641 psubb %xmm0, %xmm1
1642 pmovmskb %xmm1, %esi
1643 sub $0xffff, %esi
1644 jnz L(exit)
1645
1646#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001647 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001648 lea -16(%ebp), %ebp
1649 jbe L(more8byteseq)
1650#endif
1651 add $16, %ecx
1652 movdqa %xmm4, %xmm3
1653
1654 add $16, %edi
1655 jg L(nibble_ashr_13)
1656
1657 movdqa (%eax, %ecx), %xmm1
1658 movdqa (%edx, %ecx), %xmm2
1659 movdqa %xmm2, %xmm4
1660
1661 palignr $13, %xmm3, %xmm2
1662
1663 pcmpeqb %xmm1, %xmm0
1664 pcmpeqb %xmm2, %xmm1
1665 psubb %xmm0, %xmm1
1666 pmovmskb %xmm1, %esi
1667 sub $0xffff, %esi
1668 jnz L(exit)
1669
1670#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001671 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001672 lea -16(%ebp), %ebp
1673 jbe L(more8byteseq)
1674#endif
1675 add $16, %ecx
1676 movdqa %xmm4, %xmm3
1677 jmp L(loop_ashr_13)
1678
1679 .p2align 4
1680L(nibble_ashr_13):
1681 pcmpeqb %xmm3, %xmm0
1682 pmovmskb %xmm0, %esi
1683 test $0xe000, %esi
1684 jnz L(ashr_13_exittail)
1685
1686#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001687 cmpl $3, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001688 jbe L(ashr_13_exittail)
1689#endif
1690 pxor %xmm0, %xmm0
1691 sub $0x1000, %edi
1692 jmp L(gobble_ashr_13)
1693
1694 .p2align 4
1695L(ashr_13_exittail):
1696 movdqa (%eax, %ecx), %xmm1
1697 psrldq $13, %xmm0
1698 psrldq $13, %xmm3
1699 jmp L(aftertail)
1700
1701/*
1702 * The following cases will be handled by ashr_14
1703 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1704 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1705 */
1706 .p2align 4
1707L(ashr_14):
1708 mov $0xffff, %esi
1709 pxor %xmm0, %xmm0
1710 movdqa (%edx), %xmm2
1711 movdqa (%eax), %xmm1
1712 pcmpeqb %xmm1, %xmm0
1713 pslldq $2, %xmm2
1714 pcmpeqb %xmm1, %xmm2
1715 psubb %xmm0, %xmm2
1716 pmovmskb %xmm2, %edi
1717 shr %cl, %esi
1718 shr %cl, %edi
1719 sub %edi, %esi
1720 lea -2(%ecx), %edi
1721 jnz L(less32bytes)
1722
1723 UPDATE_STRNCMP_COUNTER
1724
1725 movdqa (%edx), %xmm3
1726 pxor %xmm0, %xmm0
1727 mov $16, %ecx
1728 or $14, %ebx
1729 lea 14(%edx), %edi
1730 and $0xfff, %edi
1731 sub $0x1000, %edi
1732
1733 .p2align 4
1734L(loop_ashr_14):
1735 add $16, %edi
1736 jg L(nibble_ashr_14)
1737
1738L(gobble_ashr_14):
1739 movdqa (%eax, %ecx), %xmm1
1740 movdqa (%edx, %ecx), %xmm2
1741 movdqa %xmm2, %xmm4
1742
1743 palignr $14, %xmm3, %xmm2
1744
1745 pcmpeqb %xmm1, %xmm0
1746 pcmpeqb %xmm2, %xmm1
1747 psubb %xmm0, %xmm1
1748 pmovmskb %xmm1, %esi
1749 sub $0xffff, %esi
1750 jnz L(exit)
1751
1752#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001753 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001754 lea -16(%ebp), %ebp
1755 jbe L(more8byteseq)
1756#endif
1757 add $16, %ecx
1758 movdqa %xmm4, %xmm3
1759
1760 add $16, %edi
1761 jg L(nibble_ashr_14)
1762
1763 movdqa (%eax, %ecx), %xmm1
1764 movdqa (%edx, %ecx), %xmm2
1765 movdqa %xmm2, %xmm4
1766
1767 palignr $14, %xmm3, %xmm2
1768
1769 pcmpeqb %xmm1, %xmm0
1770 pcmpeqb %xmm2, %xmm1
1771 psubb %xmm0, %xmm1
1772 pmovmskb %xmm1, %esi
1773 sub $0xffff, %esi
1774 jnz L(exit)
1775
1776#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001777 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001778 lea -16(%ebp), %ebp
1779 jbe L(more8byteseq)
1780#endif
1781 add $16, %ecx
1782 movdqa %xmm4, %xmm3
1783 jmp L(loop_ashr_14)
1784
1785 .p2align 4
1786L(nibble_ashr_14):
1787 pcmpeqb %xmm3, %xmm0
1788 pmovmskb %xmm0, %esi
1789 test $0xc000, %esi
1790 jnz L(ashr_14_exittail)
1791
1792#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001793 cmpl $2, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001794 jbe L(ashr_14_exittail)
1795#endif
1796 pxor %xmm0, %xmm0
1797 sub $0x1000, %edi
1798 jmp L(gobble_ashr_14)
1799
1800 .p2align 4
1801L(ashr_14_exittail):
1802 movdqa (%eax, %ecx), %xmm1
1803 psrldq $14, %xmm0
1804 psrldq $14, %xmm3
1805 jmp L(aftertail)
1806
1807/*
1808 * The following cases will be handled by ashr_14
1809 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1810 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1811 */
1812
1813 .p2align 4
1814L(ashr_15):
1815 mov $0xffff, %esi
1816 pxor %xmm0, %xmm0
1817 movdqa (%edx), %xmm2
1818 movdqa (%eax), %xmm1
1819 pcmpeqb %xmm1, %xmm0
1820 pslldq $1, %xmm2
1821 pcmpeqb %xmm1, %xmm2
1822 psubb %xmm0, %xmm2
1823 pmovmskb %xmm2, %edi
1824 shr %cl, %esi
1825 shr %cl, %edi
1826 sub %edi, %esi
1827 lea -1(%ecx), %edi
1828 jnz L(less32bytes)
1829
1830 UPDATE_STRNCMP_COUNTER
1831
1832 movdqa (%edx), %xmm3
1833 pxor %xmm0, %xmm0
1834 mov $16, %ecx
1835 or $15, %ebx
1836 lea 15(%edx), %edi
1837 and $0xfff, %edi
1838 sub $0x1000, %edi
1839
1840 .p2align 4
1841L(loop_ashr_15):
1842 add $16, %edi
1843 jg L(nibble_ashr_15)
1844
1845L(gobble_ashr_15):
1846 movdqa (%eax, %ecx), %xmm1
1847 movdqa (%edx, %ecx), %xmm2
1848 movdqa %xmm2, %xmm4
1849
1850 palignr $15, %xmm3, %xmm2
1851
1852 pcmpeqb %xmm1, %xmm0
1853 pcmpeqb %xmm2, %xmm1
1854 psubb %xmm0, %xmm1
1855 pmovmskb %xmm1, %esi
1856 sub $0xffff, %esi
1857 jnz L(exit)
1858
1859#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001860 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001861 lea -16(%ebp), %ebp
1862 jbe L(more8byteseq)
1863#endif
1864 add $16, %ecx
1865 movdqa %xmm4, %xmm3
1866
1867 add $16, %edi
1868 jg L(nibble_ashr_15)
1869
1870 movdqa (%eax, %ecx), %xmm1
1871 movdqa (%edx, %ecx), %xmm2
1872 movdqa %xmm2, %xmm4
1873
1874 palignr $15, %xmm3, %xmm2
1875
1876 pcmpeqb %xmm1, %xmm0
1877 pcmpeqb %xmm2, %xmm1
1878 psubb %xmm0, %xmm1
1879 pmovmskb %xmm1, %esi
1880 sub $0xffff, %esi
1881 jnz L(exit)
1882
1883#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001884 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001885 lea -16(%ebp), %ebp
1886 jbe L(more8byteseq)
1887#endif
1888 add $16, %ecx
1889 movdqa %xmm4, %xmm3
1890 jmp L(loop_ashr_15)
1891
1892 .p2align 4
1893L(nibble_ashr_15):
1894 pcmpeqb %xmm3, %xmm0
1895 pmovmskb %xmm0, %esi
1896 test $0x8000, %esi
1897 jnz L(ashr_15_exittail)
1898
1899#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001900 cmpl $1, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001901 jbe L(ashr_15_exittail)
1902#endif
1903 pxor %xmm0, %xmm0
1904 sub $0x1000, %edi
1905 jmp L(gobble_ashr_15)
1906
1907 .p2align 4
1908L(ashr_15_exittail):
1909 movdqa (%eax, %ecx), %xmm1
1910 psrldq $15, %xmm0
1911 psrldq $15, %xmm3
1912 jmp L(aftertail)
1913
1914 .p2align 4
1915L(aftertail):
1916 pcmpeqb %xmm3, %xmm1
1917 psubb %xmm0, %xmm1
1918 pmovmskb %xmm1, %esi
1919 not %esi
1920L(exit):
1921 mov %ebx, %edi
1922 and $0x1f, %edi
1923 lea -16(%edi, %ecx), %edi
1924L(less32bytes):
1925 add %edi, %edx
1926 add %ecx, %eax
1927 test $0x20, %ebx
1928 jz L(ret2)
1929 xchg %eax, %edx
1930
1931 .p2align 4
1932L(ret2):
1933 mov %esi, %ecx
1934 POP (%esi)
1935 POP (%edi)
1936 POP (%ebx)
1937L(less16bytes):
1938 test %cl, %cl
1939 jz L(2next_8_bytes)
1940
1941 test $0x01, %cl
1942 jnz L(Byte0)
1943
1944 test $0x02, %cl
1945 jnz L(Byte1)
1946
1947 test $0x04, %cl
1948 jnz L(Byte2)
1949
1950 test $0x08, %cl
1951 jnz L(Byte3)
1952
1953 test $0x10, %cl
1954 jnz L(Byte4)
1955
1956 test $0x20, %cl
1957 jnz L(Byte5)
1958
1959 test $0x40, %cl
1960 jnz L(Byte6)
1961#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001962 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001963 jbe L(eq)
1964#endif
1965
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001966 movzbl 7(%eax), %ecx
1967 movzbl 7(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08001968
1969 sub %ecx, %eax
1970 RETURN
1971
1972 .p2align 4
1973L(Byte0):
1974#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001975 cmpl $0, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001976 jbe L(eq)
1977#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001978 movzbl (%eax), %ecx
1979 movzbl (%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08001980
1981 sub %ecx, %eax
1982 RETURN
1983
1984 .p2align 4
1985L(Byte1):
1986#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001987 cmpl $1, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001988 jbe L(eq)
1989#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001990 movzbl 1(%eax), %ecx
1991 movzbl 1(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08001992
1993 sub %ecx, %eax
1994 RETURN
1995
1996 .p2align 4
1997L(Byte2):
1998#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001999 cmpl $2, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002000 jbe L(eq)
2001#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002002 movzbl 2(%eax), %ecx
2003 movzbl 2(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002004
2005 sub %ecx, %eax
2006 RETURN
2007
2008 .p2align 4
2009L(Byte3):
2010#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002011 cmpl $3, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002012 jbe L(eq)
2013#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002014 movzbl 3(%eax), %ecx
2015 movzbl 3(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002016
2017 sub %ecx, %eax
2018 RETURN
2019
2020 .p2align 4
2021L(Byte4):
2022#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002023 cmpl $4, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002024 jbe L(eq)
2025#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002026 movzbl 4(%eax), %ecx
2027 movzbl 4(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002028
2029 sub %ecx, %eax
2030 RETURN
2031
2032 .p2align 4
2033L(Byte5):
2034#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002035 cmpl $5, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002036 jbe L(eq)
2037#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002038 movzbl 5(%eax), %ecx
2039 movzbl 5(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002040
2041 sub %ecx, %eax
2042 RETURN
2043
2044 .p2align 4
2045L(Byte6):
2046#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002047 cmpl $6, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002048 jbe L(eq)
2049#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002050 movzbl 6(%eax), %ecx
2051 movzbl 6(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002052
2053 sub %ecx, %eax
2054 RETURN
2055
2056 .p2align 4
2057L(2next_8_bytes):
2058 add $8, %eax
2059 add $8, %edx
2060#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002061 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002062 lea -8(%ebp), %ebp
2063 jbe L(eq)
2064#endif
2065
2066 test $0x01, %ch
2067 jnz L(Byte0)
2068
2069 test $0x02, %ch
2070 jnz L(Byte1)
2071
2072 test $0x04, %ch
2073 jnz L(Byte2)
2074
2075 test $0x08, %ch
2076 jnz L(Byte3)
2077
2078 test $0x10, %ch
2079 jnz L(Byte4)
2080
2081 test $0x20, %ch
2082 jnz L(Byte5)
2083
2084 test $0x40, %ch
2085 jnz L(Byte6)
2086
2087#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002088 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002089 jbe L(eq)
2090#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002091 movzbl 7(%eax), %ecx
2092 movzbl 7(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002093
2094 sub %ecx, %eax
2095 RETURN
2096
2097 .p2align 4
2098L(neq):
2099 mov $1, %eax
2100 ja L(neq_bigger)
2101 neg %eax
2102L(neq_bigger):
2103 RETURN
2104
2105#ifdef USE_AS_STRNCMP
Bruce Beare124a5422010-10-11 12:24:41 -07002106 cfi_restore_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08002107 .p2align 4
2108L(more8byteseq):
2109 POP (%esi)
2110 POP (%edi)
2111 POP (%ebx)
2112#endif
2113
2114L(eq):
2115
2116#ifdef USE_AS_STRNCMP
2117 POP (%ebp)
2118#endif
2119 xorl %eax, %eax
2120 ret
2121
2122#ifdef USE_AS_STRNCMP
2123 CFI_PUSH (%ebp)
2124
2125 .p2align 4
2126L(less16bytes_sncmp):
2127 test %ebp, %ebp
2128 jz L(eq)
2129
2130 movzbl (%eax), %ecx
2131 cmpb %cl, (%edx)
2132 jne L(neq)
2133 test %cl, %cl
2134 je L(eq)
2135
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002136 cmpl $1, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002137 je L(eq)
2138
2139 movzbl 1(%eax), %ecx
2140 cmpb %cl, 1(%edx)
2141 jne L(neq)
2142 test %cl, %cl
2143 je L(eq)
2144
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002145 cmpl $2, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002146 je L(eq)
2147
2148 movzbl 2(%eax), %ecx
2149 cmpb %cl, 2(%edx)
2150 jne L(neq)
2151 test %cl, %cl
2152 je L(eq)
2153
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002154 cmpl $3, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002155 je L(eq)
2156
2157 movzbl 3(%eax), %ecx
2158 cmpb %cl, 3(%edx)
2159 jne L(neq)
2160 test %cl, %cl
2161 je L(eq)
2162
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002163 cmpl $4, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002164 je L(eq)
2165
2166 movzbl 4(%eax), %ecx
2167 cmpb %cl, 4(%edx)
2168 jne L(neq)
2169 test %cl, %cl
2170 je L(eq)
2171
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002172 cmpl $5, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002173 je L(eq)
2174
2175 movzbl 5(%eax), %ecx
2176 cmpb %cl, 5(%edx)
2177 jne L(neq)
2178 test %cl, %cl
2179 je L(eq)
2180
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002181 cmpl $6, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002182 je L(eq)
2183
2184 movzbl 6(%eax), %ecx
2185 cmpb %cl, 6(%edx)
2186 jne L(neq)
2187 test %cl, %cl
2188 je L(eq)
2189
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002190 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002191 je L(eq)
2192
2193 movzbl 7(%eax), %ecx
2194 cmpb %cl, 7(%edx)
2195 jne L(neq)
2196 test %cl, %cl
2197 je L(eq)
2198
2199
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002200 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002201 je L(eq)
2202
2203 movzbl 8(%eax), %ecx
2204 cmpb %cl, 8(%edx)
2205 jne L(neq)
2206 test %cl, %cl
2207 je L(eq)
2208
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002209 cmpl $9, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002210 je L(eq)
2211
2212 movzbl 9(%eax), %ecx
2213 cmpb %cl, 9(%edx)
2214 jne L(neq)
2215 test %cl, %cl
2216 je L(eq)
2217
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002218 cmpl $10, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002219 je L(eq)
2220
2221 movzbl 10(%eax), %ecx
2222 cmpb %cl, 10(%edx)
2223 jne L(neq)
2224 test %cl, %cl
2225 je L(eq)
2226
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002227 cmpl $11, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002228 je L(eq)
2229
2230 movzbl 11(%eax), %ecx
2231 cmpb %cl, 11(%edx)
2232 jne L(neq)
2233 test %cl, %cl
2234 je L(eq)
2235
2236
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002237 cmpl $12, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002238 je L(eq)
2239
2240 movzbl 12(%eax), %ecx
2241 cmpb %cl, 12(%edx)
2242 jne L(neq)
2243 test %cl, %cl
2244 je L(eq)
2245
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002246 cmpl $13, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002247 je L(eq)
2248
2249 movzbl 13(%eax), %ecx
2250 cmpb %cl, 13(%edx)
2251 jne L(neq)
2252 test %cl, %cl
2253 je L(eq)
2254
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002255 cmpl $14, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002256 je L(eq)
2257
2258 movzbl 14(%eax), %ecx
2259 cmpb %cl, 14(%edx)
2260 jne L(neq)
2261 test %cl, %cl
2262 je L(eq)
2263
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002264 cmpl $15, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002265 je L(eq)
2266
2267 movzbl 15(%eax), %ecx
2268 cmpb %cl, 15(%edx)
2269 jne L(neq)
2270 test %cl, %cl
2271 je L(eq)
2272
2273 POP (%ebp)
2274 xor %eax, %eax
2275 ret
2276#endif
2277
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002278END (STRCMP)