blob: da3dc3bcb283d66fe0e7490bb2ee2071e1f0d37e [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60name: \
61 cfi_startproc
62#endif
63
64#ifndef END
65# define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68#endif
69
70#define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#define PARMS 8
82#define ENTRANCE PUSH(%edi);
83#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
84
85#define STR1 PARMS
86#define STR2 STR1+4
87
88 .text
89ENTRY (strrchr)
90
91 ENTRANCE
92 mov STR1(%esp), %ecx
93 movd STR2(%esp), %xmm1
94
95 pxor %xmm2, %xmm2
96 mov %ecx, %edi
97 punpcklbw %xmm1, %xmm1
98 punpcklbw %xmm1, %xmm1
99 /* ECX has OFFSET. */
100 and $63, %ecx
101 pshufd $0, %xmm1, %xmm1
102 cmp $48, %ecx
103 ja L(crosscache)
104
105/* unaligned string. */
106 movdqu (%edi), %xmm0
107 pcmpeqb %xmm0, %xmm2
108 pcmpeqb %xmm1, %xmm0
109 /* Find where NULL is. */
110 pmovmskb %xmm2, %ecx
111 /* Check if there is a match. */
112 pmovmskb %xmm0, %eax
113 add $16, %edi
114
115 test %eax, %eax
116 jnz L(unaligned_match1)
117
118 test %ecx, %ecx
119 jnz L(return_null)
120
121 and $-16, %edi
122
123 PUSH (%esi)
124 PUSH (%ebx)
125
126 xor %ebx, %ebx
127 jmp L(loop)
128
129 CFI_POP (%esi)
130 CFI_POP (%ebx)
131
132 .p2align 4
133L(unaligned_match1):
134 test %ecx, %ecx
135 jnz L(prolog_find_zero_1)
136
137 PUSH (%esi)
138 PUSH (%ebx)
139
140 mov %eax, %ebx
141 mov %edi, %esi
142 and $-16, %edi
143 jmp L(loop)
144
145 CFI_POP (%esi)
146 CFI_POP (%ebx)
147
148 .p2align 4
149L(crosscache):
150/* Hancle unaligned string. */
151 and $15, %ecx
152 and $-16, %edi
153 pxor %xmm3, %xmm3
154 movdqa (%edi), %xmm0
155 pcmpeqb %xmm0, %xmm3
156 pcmpeqb %xmm1, %xmm0
157 /* Find where NULL is. */
158 pmovmskb %xmm3, %edx
159 /* Check if there is a match. */
160 pmovmskb %xmm0, %eax
161 /* Remove the leading bytes. */
162 shr %cl, %edx
163 shr %cl, %eax
164 add $16, %edi
165
166 test %eax, %eax
167 jnz L(unaligned_match)
168
169 test %edx, %edx
170 jnz L(return_null)
171
172 PUSH (%esi)
173 PUSH (%ebx)
174
175 xor %ebx, %ebx
176 jmp L(loop)
177
178 CFI_POP (%esi)
179 CFI_POP (%ebx)
180
181 .p2align 4
182L(unaligned_match):
183 test %edx, %edx
184 jnz L(prolog_find_zero)
185
186 PUSH (%esi)
187 PUSH (%ebx)
188
189 mov %eax, %ebx
190 lea (%edi, %ecx), %esi
191
192/* Loop start on aligned string. */
193 .p2align 4
194L(loop):
195 movdqa (%edi), %xmm0
196 pcmpeqb %xmm0, %xmm2
197 add $16, %edi
198 pcmpeqb %xmm1, %xmm0
199 pmovmskb %xmm2, %ecx
200 pmovmskb %xmm0, %eax
201 or %eax, %ecx
202 jnz L(matches)
203
204 movdqa (%edi), %xmm0
205 pcmpeqb %xmm0, %xmm2
206 add $16, %edi
207 pcmpeqb %xmm1, %xmm0
208 pmovmskb %xmm2, %ecx
209 pmovmskb %xmm0, %eax
210 or %eax, %ecx
211 jnz L(matches)
212
213 movdqa (%edi), %xmm0
214 pcmpeqb %xmm0, %xmm2
215 add $16, %edi
216 pcmpeqb %xmm1, %xmm0
217 pmovmskb %xmm2, %ecx
218 pmovmskb %xmm0, %eax
219 or %eax, %ecx
220 jnz L(matches)
221
222 movdqa (%edi), %xmm0
223 pcmpeqb %xmm0, %xmm2
224 add $16, %edi
225 pcmpeqb %xmm1, %xmm0
226 pmovmskb %xmm2, %ecx
227 pmovmskb %xmm0, %eax
228 or %eax, %ecx
229 jz L(loop)
230
231L(matches):
232 test %eax, %eax
233 jnz L(match)
234L(return_value):
235 test %ebx, %ebx
236 jz L(return_null_1)
237 mov %ebx, %eax
238 mov %esi, %edi
239
240 POP (%ebx)
241 POP (%esi)
242
243 jmp L(match_case1)
244
245 CFI_PUSH (%ebx)
246 CFI_PUSH (%esi)
247
248 .p2align 4
249L(return_null_1):
250 POP (%ebx)
251 POP (%esi)
252
253 xor %eax, %eax
254 RETURN
255
256 CFI_PUSH (%ebx)
257 CFI_PUSH (%esi)
258
259 .p2align 4
260L(match):
261 pmovmskb %xmm2, %ecx
262 test %ecx, %ecx
263 jnz L(find_zero)
264 mov %eax, %ebx
265 mov %edi, %esi
266 jmp L(loop)
267
268 .p2align 4
269L(find_zero):
270 test %cl, %cl
271 jz L(find_zero_high)
272 mov %cl, %dl
273 and $15, %dl
274 jz L(find_zero_8)
275 test $0x01, %cl
276 jnz L(FindZeroExit1)
277 test $0x02, %cl
278 jnz L(FindZeroExit2)
279 test $0x04, %cl
280 jnz L(FindZeroExit3)
281 and $1 << 4 - 1, %eax
282 jz L(return_value)
283
284 POP (%ebx)
285 POP (%esi)
286 jmp L(match_case1)
287
288 CFI_PUSH (%ebx)
289 CFI_PUSH (%esi)
290
291 .p2align 4
292L(find_zero_8):
293 test $0x10, %cl
294 jnz L(FindZeroExit5)
295 test $0x20, %cl
296 jnz L(FindZeroExit6)
297 test $0x40, %cl
298 jnz L(FindZeroExit7)
299 and $1 << 8 - 1, %eax
300 jz L(return_value)
301
302 POP (%ebx)
303 POP (%esi)
304 jmp L(match_case1)
305
306 CFI_PUSH (%ebx)
307 CFI_PUSH (%esi)
308
309 .p2align 4
310L(find_zero_high):
311 mov %ch, %dh
312 and $15, %dh
313 jz L(find_zero_high_8)
314 test $0x01, %ch
315 jnz L(FindZeroExit9)
316 test $0x02, %ch
317 jnz L(FindZeroExit10)
318 test $0x04, %ch
319 jnz L(FindZeroExit11)
320 and $1 << 12 - 1, %eax
321 jz L(return_value)
322
323 POP (%ebx)
324 POP (%esi)
325 jmp L(match_case1)
326
327 CFI_PUSH (%ebx)
328 CFI_PUSH (%esi)
329
330 .p2align 4
331L(find_zero_high_8):
332 test $0x10, %ch
333 jnz L(FindZeroExit13)
334 test $0x20, %ch
335 jnz L(FindZeroExit14)
336 test $0x40, %ch
337 jnz L(FindZeroExit15)
338 and $1 << 16 - 1, %eax
339 jz L(return_value)
340
341 POP (%ebx)
342 POP (%esi)
343 jmp L(match_case1)
344
345 CFI_PUSH (%ebx)
346 CFI_PUSH (%esi)
347
348 .p2align 4
349L(FindZeroExit1):
350 and $1, %eax
351 jz L(return_value)
352
353 POP (%ebx)
354 POP (%esi)
355 jmp L(match_case1)
356
357 CFI_PUSH (%ebx)
358 CFI_PUSH (%esi)
359
360 .p2align 4
361L(FindZeroExit2):
362 and $1 << 2 - 1, %eax
363 jz L(return_value)
364
365 POP (%ebx)
366 POP (%esi)
367 jmp L(match_case1)
368
369 CFI_PUSH (%ebx)
370 CFI_PUSH (%esi)
371
372 .p2align 4
373L(FindZeroExit3):
374 and $1 << 3 - 1, %eax
375 jz L(return_value)
376
377 POP (%ebx)
378 POP (%esi)
379 jmp L(match_case1)
380
381 CFI_PUSH (%ebx)
382 CFI_PUSH (%esi)
383
384 .p2align 4
385L(FindZeroExit5):
386 and $1 << 5 - 1, %eax
387 jz L(return_value)
388
389 POP (%ebx)
390 POP (%esi)
391 jmp L(match_case1)
392
393 CFI_PUSH (%ebx)
394 CFI_PUSH (%esi)
395
396 .p2align 4
397L(FindZeroExit6):
398 and $1 << 6 - 1, %eax
399 jz L(return_value)
400
401 POP (%ebx)
402 POP (%esi)
403 jmp L(match_case1)
404
405 CFI_PUSH (%ebx)
406 CFI_PUSH (%esi)
407
408 .p2align 4
409L(FindZeroExit7):
410 and $1 << 7 - 1, %eax
411 jz L(return_value)
412
413 POP (%ebx)
414 POP (%esi)
415 jmp L(match_case1)
416
417 CFI_PUSH (%ebx)
418 CFI_PUSH (%esi)
419
420 .p2align 4
421L(FindZeroExit9):
422 and $1 << 9 - 1, %eax
423 jz L(return_value)
424
425 POP (%ebx)
426 POP (%esi)
427 jmp L(match_case1)
428
429 CFI_PUSH (%ebx)
430 CFI_PUSH (%esi)
431
432 .p2align 4
433L(FindZeroExit10):
434 and $1 << 10 - 1, %eax
435 jz L(return_value)
436
437 POP (%ebx)
438 POP (%esi)
439 jmp L(match_case1)
440
441 CFI_PUSH (%ebx)
442 CFI_PUSH (%esi)
443
444 .p2align 4
445L(FindZeroExit11):
446 and $1 << 11 - 1, %eax
447 jz L(return_value)
448
449 POP (%ebx)
450 POP (%esi)
451 jmp L(match_case1)
452
453 CFI_PUSH (%ebx)
454 CFI_PUSH (%esi)
455
456 .p2align 4
457L(FindZeroExit13):
458 and $1 << 13 - 1, %eax
459 jz L(return_value)
460
461 POP (%ebx)
462 POP (%esi)
463 jmp L(match_case1)
464
465 CFI_PUSH (%ebx)
466 CFI_PUSH (%esi)
467
468 .p2align 4
469L(FindZeroExit14):
470 and $1 << 14 - 1, %eax
471 jz L(return_value)
472
473 POP (%ebx)
474 POP (%esi)
475 jmp L(match_case1)
476
477 CFI_PUSH (%ebx)
478 CFI_PUSH (%esi)
479
480 .p2align 4
481L(FindZeroExit15):
482 and $1 << 15 - 1, %eax
483 jz L(return_value)
484
485 POP (%ebx)
486 POP (%esi)
487
488 .p2align 4
489L(match_case1):
490 test %ah, %ah
491 jnz L(match_case1_high)
492 mov %al, %dl
493 and $15 << 4, %dl
494 jnz L(match_case1_8)
495 test $0x08, %al
496 jnz L(Exit4)
497 test $0x04, %al
498 jnz L(Exit3)
499 test $0x02, %al
500 jnz L(Exit2)
501 lea -16(%edi), %eax
502 RETURN
503
504 .p2align 4
505L(match_case1_8):
506 test $0x80, %al
507 jnz L(Exit8)
508 test $0x40, %al
509 jnz L(Exit7)
510 test $0x20, %al
511 jnz L(Exit6)
512 lea -12(%edi), %eax
513 RETURN
514
515 .p2align 4
516L(match_case1_high):
517 mov %ah, %dh
518 and $15 << 4, %dh
519 jnz L(match_case1_high_8)
520 test $0x08, %ah
521 jnz L(Exit12)
522 test $0x04, %ah
523 jnz L(Exit11)
524 test $0x02, %ah
525 jnz L(Exit10)
526 lea -8(%edi), %eax
527 RETURN
528
529 .p2align 4
530L(match_case1_high_8):
531 test $0x80, %ah
532 jnz L(Exit16)
533 test $0x40, %ah
534 jnz L(Exit15)
535 test $0x20, %ah
536 jnz L(Exit14)
537 lea -4(%edi), %eax
538 RETURN
539
540 .p2align 4
541L(Exit2):
542 lea -15(%edi), %eax
543 RETURN
544
545 .p2align 4
546L(Exit3):
547 lea -14(%edi), %eax
548 RETURN
549
550 .p2align 4
551L(Exit4):
552 lea -13(%edi), %eax
553 RETURN
554
555 .p2align 4
556L(Exit6):
557 lea -11(%edi), %eax
558 RETURN
559
560 .p2align 4
561L(Exit7):
562 lea -10(%edi), %eax
563 RETURN
564
565 .p2align 4
566L(Exit8):
567 lea -9(%edi), %eax
568 RETURN
569
570 .p2align 4
571L(Exit10):
572 lea -7(%edi), %eax
573 RETURN
574
575 .p2align 4
576L(Exit11):
577 lea -6(%edi), %eax
578 RETURN
579
580 .p2align 4
581L(Exit12):
582 lea -5(%edi), %eax
583 RETURN
584
585 .p2align 4
586L(Exit14):
587 lea -3(%edi), %eax
588 RETURN
589
590 .p2align 4
591L(Exit15):
592 lea -2(%edi), %eax
593 RETURN
594
595 .p2align 4
596L(Exit16):
597 lea -1(%edi), %eax
598 RETURN
599
600/* Return NULL. */
601 .p2align 4
602L(return_null):
603 xor %eax, %eax
604 RETURN
605
606 .p2align 4
607L(prolog_find_zero):
608 add %ecx, %edi
609 mov %edx, %ecx
610L(prolog_find_zero_1):
611 test %cl, %cl
612 jz L(prolog_find_zero_high)
613 mov %cl, %dl
614 and $15, %dl
615 jz L(prolog_find_zero_8)
616 test $0x01, %cl
617 jnz L(PrologFindZeroExit1)
618 test $0x02, %cl
619 jnz L(PrologFindZeroExit2)
620 test $0x04, %cl
621 jnz L(PrologFindZeroExit3)
622 and $1 << 4 - 1, %eax
623 jnz L(match_case1)
624 xor %eax, %eax
625 RETURN
626
627 .p2align 4
628L(prolog_find_zero_8):
629 test $0x10, %cl
630 jnz L(PrologFindZeroExit5)
631 test $0x20, %cl
632 jnz L(PrologFindZeroExit6)
633 test $0x40, %cl
634 jnz L(PrologFindZeroExit7)
635 and $1 << 8 - 1, %eax
636 jnz L(match_case1)
637 xor %eax, %eax
638 RETURN
639
640 .p2align 4
641L(prolog_find_zero_high):
642 mov %ch, %dh
643 and $15, %dh
644 jz L(prolog_find_zero_high_8)
645 test $0x01, %ch
646 jnz L(PrologFindZeroExit9)
647 test $0x02, %ch
648 jnz L(PrologFindZeroExit10)
649 test $0x04, %ch
650 jnz L(PrologFindZeroExit11)
651 and $1 << 12 - 1, %eax
652 jnz L(match_case1)
653 xor %eax, %eax
654 RETURN
655
656 .p2align 4
657L(prolog_find_zero_high_8):
658 test $0x10, %ch
659 jnz L(PrologFindZeroExit13)
660 test $0x20, %ch
661 jnz L(PrologFindZeroExit14)
662 test $0x40, %ch
663 jnz L(PrologFindZeroExit15)
664 and $1 << 16 - 1, %eax
665 jnz L(match_case1)
666 xor %eax, %eax
667 RETURN
668
669 .p2align 4
670L(PrologFindZeroExit1):
671 and $1, %eax
672 jnz L(match_case1)
673 xor %eax, %eax
674 RETURN
675
676 .p2align 4
677L(PrologFindZeroExit2):
678 and $1 << 2 - 1, %eax
679 jnz L(match_case1)
680 xor %eax, %eax
681 RETURN
682
683 .p2align 4
684L(PrologFindZeroExit3):
685 and $1 << 3 - 1, %eax
686 jnz L(match_case1)
687 xor %eax, %eax
688 RETURN
689
690 .p2align 4
691L(PrologFindZeroExit5):
692 and $1 << 5 - 1, %eax
693 jnz L(match_case1)
694 xor %eax, %eax
695 RETURN
696
697 .p2align 4
698L(PrologFindZeroExit6):
699 and $1 << 6 - 1, %eax
700 jnz L(match_case1)
701 xor %eax, %eax
702 RETURN
703
704 .p2align 4
705L(PrologFindZeroExit7):
706 and $1 << 7 - 1, %eax
707 jnz L(match_case1)
708 xor %eax, %eax
709 RETURN
710
711 .p2align 4
712L(PrologFindZeroExit9):
713 and $1 << 9 - 1, %eax
714 jnz L(match_case1)
715 xor %eax, %eax
716 RETURN
717
718 .p2align 4
719L(PrologFindZeroExit10):
720 and $1 << 10 - 1, %eax
721 jnz L(match_case1)
722 xor %eax, %eax
723 RETURN
724
725 .p2align 4
726L(PrologFindZeroExit11):
727 and $1 << 11 - 1, %eax
728 jnz L(match_case1)
729 xor %eax, %eax
730 RETURN
731
732 .p2align 4
733L(PrologFindZeroExit13):
734 and $1 << 13 - 1, %eax
735 jnz L(match_case1)
736 xor %eax, %eax
737 RETURN
738
739 .p2align 4
740L(PrologFindZeroExit14):
741 and $1 << 14 - 1, %eax
742 jnz L(match_case1)
743 xor %eax, %eax
744 RETURN
745
746 .p2align 4
747L(PrologFindZeroExit15):
748 and $1 << 15 - 1, %eax
749 jnz L(match_case1)
750 xor %eax, %eax
751 RETURN
752
753END (strrchr)