blob: daaf254d220ff1b1a85c572e614d046969d562bd [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/* Optimized strlcat with SSSE3 */
32
33#ifndef cfi_startproc
34# define cfi_startproc .cfi_startproc
35#endif
36
37#ifndef cfi_endproc
38# define cfi_endproc .cfi_endproc
39#endif
40
41#ifndef cfi_rel_offset
42# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
43#endif
44
45#ifndef cfi_restore
46# define cfi_restore(reg) .cfi_restore reg
47#endif
48
49#ifndef cfi_adjust_cfa_offset
50# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
51#endif
52
53#ifndef ENTRY
54# define ENTRY(name) \
55 .type name, @function; \
56 .globl name; \
57 .p2align 4; \
58name: \
59 cfi_startproc
60#endif
61
62#ifndef END
63# define END(name) \
64 cfi_endproc; \
65 .size name, .-name
66#endif
67
68#define CFI_PUSH(REG) \
69 cfi_adjust_cfa_offset (4); \
70 cfi_rel_offset (REG, 0)
71
72#define CFI_POP(REG) \
73 cfi_adjust_cfa_offset (-4); \
74 cfi_restore (REG)
75
76#define PUSH(REG) pushl REG; CFI_PUSH (REG)
77#define POP(REG) popl REG; CFI_POP (REG)
78#define L(label) .L##Prolog_##label
79
80#define DST 4
81#define SRC DST+8
82#define LEN SRC+4
83
84 .text
85ENTRY (strlcat)
86 mov DST(%esp), %edx
87 PUSH (%ebx)
88 mov LEN(%esp), %ebx
89 sub $4, %ebx
90 jbe L(len_less4_prolog)
91
92#define RETURN jmp L(StrcpyStep)
93#define edi ebx
94
95#define USE_AS_STRNLEN
96#define USE_AS_STRCAT
97#define USE_AS_STRLCAT
98
99#include "sse2-strlen-atom.S"
100
101 .p2align 4
102L(StrcpyStep):
103
104#undef edi
105#undef L
106#define L(label) .L##label
107#undef RETURN
108#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
109#define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
110
111 movl SRC(%esp), %ecx
112 movl LEN(%esp), %ebx
113
114 cmp %eax, %ebx
115 je L(CalculateLengthOfSrcProlog)
116 sub %eax, %ebx
117
118 test %ebx, %ebx
119 jz L(CalculateLengthOfSrcProlog)
120
121 mov DST + 4(%esp), %edx
122
123 PUSH (%edi)
124 add %eax, %edx
125 mov %ecx, %edi
126 sub %eax, %edi
127
128 cmp $8, %ebx
129 jbe L(StrncpyExit8Bytes)
130
131 cmpb $0, (%ecx)
132 jz L(Exit1)
133 cmpb $0, 1(%ecx)
134 jz L(Exit2)
135 cmpb $0, 2(%ecx)
136 jz L(Exit3)
137 cmpb $0, 3(%ecx)
138 jz L(Exit4)
139 cmpb $0, 4(%ecx)
140 jz L(Exit5)
141 cmpb $0, 5(%ecx)
142 jz L(Exit6)
143 cmpb $0, 6(%ecx)
144 jz L(Exit7)
145 cmpb $0, 7(%ecx)
146 jz L(Exit8)
147 cmp $16, %ebx
148 jb L(StrncpyExit15Bytes)
149 cmpb $0, 8(%ecx)
150 jz L(Exit9)
151 cmpb $0, 9(%ecx)
152 jz L(Exit10)
153 cmpb $0, 10(%ecx)
154 jz L(Exit11)
155 cmpb $0, 11(%ecx)
156 jz L(Exit12)
157 cmpb $0, 12(%ecx)
158 jz L(Exit13)
159 cmpb $0, 13(%ecx)
160 jz L(Exit14)
161 cmpb $0, 14(%ecx)
162 jz L(Exit15)
163 cmpb $0, 15(%ecx)
164 jz L(Exit16)
165 cmp $16, %ebx
166 je L(StrlcpyExit16)
167
168#define USE_AS_STRNCPY
169#include "ssse3-strcpy-atom.S"
170
171 .p2align 4
172L(CopyFrom1To16Bytes):
173 add %esi, %edx
174 add %esi, %ecx
175
176 POP (%esi)
177 test %al, %al
178 jz L(ExitHigh8)
179
180L(CopyFrom1To16BytesLess8):
181 mov %al, %ah
182 and $15, %ah
183 jz L(ExitHigh4)
184
185 test $0x01, %al
186 jnz L(Exit1)
187 test $0x02, %al
188 jnz L(Exit2)
189 test $0x04, %al
190 jnz L(Exit3)
191L(Exit4):
192 movl (%ecx), %eax
193 movl %eax, (%edx)
194
195 lea 3(%ecx), %eax
196 sub %edi, %eax
197 RETURN1
198
199 .p2align 4
200L(ExitHigh4):
201 test $0x10, %al
202 jnz L(Exit5)
203 test $0x20, %al
204 jnz L(Exit6)
205 test $0x40, %al
206 jnz L(Exit7)
207L(Exit8):
208 movlpd (%ecx), %xmm0
209 movlpd %xmm0, (%edx)
210
211 lea 7(%ecx), %eax
212 sub %edi, %eax
213 RETURN1
214
215 .p2align 4
216L(ExitHigh8):
217 mov %ah, %al
218 and $15, %al
219 jz L(ExitHigh12)
220
221 test $0x01, %ah
222 jnz L(Exit9)
223 test $0x02, %ah
224 jnz L(Exit10)
225 test $0x04, %ah
226 jnz L(Exit11)
227L(Exit12):
228 movlpd (%ecx), %xmm0
229 movlpd %xmm0, (%edx)
230 movl 8(%ecx), %eax
231 movl %eax, 8(%edx)
232
233 lea 11(%ecx), %eax
234 sub %edi, %eax
235 RETURN1
236
237 .p2align 4
238L(ExitHigh12):
239 test $0x10, %ah
240 jnz L(Exit13)
241 test $0x20, %ah
242 jnz L(Exit14)
243 test $0x40, %ah
244 jnz L(Exit15)
245L(Exit16):
246 movlpd (%ecx), %xmm0
247 movlpd 8(%ecx), %xmm1
248 movlpd %xmm0, (%edx)
249 movlpd %xmm1, 8(%edx)
250
251 lea 15(%ecx), %eax
252 sub %edi, %eax
253 RETURN1
254
255 CFI_PUSH(%esi)
256
257 .p2align 4
258L(CopyFrom1To16BytesCase2):
259 add $16, %ebx
260 add %esi, %ecx
261 add %esi, %edx
262
263 POP (%esi)
264
265 test %al, %al
266 jz L(ExitHighCase2)
267
268 cmp $8, %ebx
269 ja L(CopyFrom1To16BytesLess8)
270
271 test $0x01, %al
272 jnz L(Exit1)
273 cmp $1, %ebx
274 je L(StrlcpyExit1)
275 test $0x02, %al
276 jnz L(Exit2)
277 cmp $2, %ebx
278 je L(StrlcpyExit2)
279 test $0x04, %al
280 jnz L(Exit3)
281 cmp $3, %ebx
282 je L(StrlcpyExit3)
283 test $0x08, %al
284 jnz L(Exit4)
285 cmp $4, %ebx
286 je L(StrlcpyExit4)
287 test $0x10, %al
288 jnz L(Exit5)
289 cmp $5, %ebx
290 je L(StrlcpyExit5)
291 test $0x20, %al
292 jnz L(Exit6)
293 cmp $6, %ebx
294 je L(StrlcpyExit6)
295 test $0x40, %al
296 jnz L(Exit7)
297 cmp $7, %ebx
298 je L(StrlcpyExit7)
299 test $0x80, %al
300 jnz L(Exit8)
301 jmp L(StrlcpyExit8)
302
303 .p2align 4
304L(ExitHighCase2):
305 cmp $8, %ebx
306 jbe L(CopyFrom1To16BytesLess8Case3)
307
308 test $0x01, %ah
309 jnz L(Exit9)
310 cmp $9, %ebx
311 je L(StrlcpyExit9)
312 test $0x02, %ah
313 jnz L(Exit10)
314 cmp $10, %ebx
315 je L(StrlcpyExit10)
316 test $0x04, %ah
317 jnz L(Exit11)
318 cmp $11, %ebx
319 je L(StrlcpyExit11)
320 test $0x8, %ah
321 jnz L(Exit12)
322 cmp $12, %ebx
323 je L(StrlcpyExit12)
324 test $0x10, %ah
325 jnz L(Exit13)
326 cmp $13, %ebx
327 je L(StrlcpyExit13)
328 test $0x20, %ah
329 jnz L(Exit14)
330 cmp $14, %ebx
331 je L(StrlcpyExit14)
332 test $0x40, %ah
333 jnz L(Exit15)
334 cmp $15, %ebx
335 je L(StrlcpyExit15)
336 test $0x80, %ah
337 jnz L(Exit16)
338 jmp L(StrlcpyExit16)
339
340 CFI_PUSH(%esi)
341
342 .p2align 4
343L(CopyFrom1To16BytesCase2OrCase3):
344 test %eax, %eax
345 jnz L(CopyFrom1To16BytesCase2)
346
347 .p2align 4
348L(CopyFrom1To16BytesCase3):
349 add $16, %ebx
350 add %esi, %edx
351 add %esi, %ecx
352
353 POP (%esi)
354
355 cmp $8, %ebx
356 ja L(ExitHigh8Case3)
357
358L(CopyFrom1To16BytesLess8Case3):
359 cmp $4, %ebx
360 ja L(ExitHigh4Case3)
361
362 cmp $1, %ebx
363 je L(StrlcpyExit1)
364 cmp $2, %ebx
365 je L(StrlcpyExit2)
366 cmp $3, %ebx
367 je L(StrlcpyExit3)
368L(StrlcpyExit4):
369 movb %bh, 3(%edx)
370 movw (%ecx), %ax
371 movw %ax, (%edx)
372 movb 2(%ecx), %al
373 movb %al, 2(%edx)
374
375 lea 4(%ecx), %edx
376 mov %edi, %ecx
377 POP (%edi)
378 jmp L(CalculateLengthOfSrc)
379 CFI_PUSH (%edi)
380
381 .p2align 4
382L(ExitHigh4Case3):
383 cmp $5, %ebx
384 je L(StrlcpyExit5)
385 cmp $6, %ebx
386 je L(StrlcpyExit6)
387 cmp $7, %ebx
388 je L(StrlcpyExit7)
389L(StrlcpyExit8):
390 movb %bh, 7(%edx)
391 movl (%ecx), %eax
392 movl %eax, (%edx)
393 movl 3(%ecx), %eax
394 movl %eax, 3(%edx)
395
396 lea 8(%ecx), %edx
397 mov %edi, %ecx
398 POP (%edi)
399 jmp L(CalculateLengthOfSrc)
400 CFI_PUSH (%edi)
401
402 .p2align 4
403L(ExitHigh8Case3):
404 cmp $12, %ebx
405 ja L(ExitHigh12Case3)
406
407 cmp $9, %ebx
408 je L(StrlcpyExit9)
409 cmp $10, %ebx
410 je L(StrlcpyExit10)
411 cmp $11, %ebx
412 je L(StrlcpyExit11)
413L(StrlcpyExit12):
414 movb %bh, 11(%edx)
415 movlpd (%ecx), %xmm0
416 movlpd %xmm0, (%edx)
417 movl 7(%ecx), %eax
418 movl %eax, 7(%edx)
419
420 lea 12(%ecx), %edx
421 mov %edi, %ecx
422 POP (%edi)
423 jmp L(CalculateLengthOfSrc)
424 CFI_PUSH (%edi)
425
426 .p2align 4
427L(ExitHigh12Case3):
428 cmp $13, %ebx
429 je L(StrlcpyExit13)
430 cmp $14, %ebx
431 je L(StrlcpyExit14)
432 cmp $15, %ebx
433 je L(StrlcpyExit15)
434L(StrlcpyExit16):
435 movb %bh, 15(%edx)
436 movlpd (%ecx), %xmm0
437 movlpd %xmm0, (%edx)
438 movlpd 7(%ecx), %xmm0
439 movlpd %xmm0, 7(%edx)
440
441 lea 16(%ecx), %edx
442 mov %edi, %ecx
443 POP (%edi)
444 jmp L(CalculateLengthOfSrc)
445 CFI_PUSH (%edi)
446
447 .p2align 4
448L(StrlcpyExit1):
449 movb %bh, (%edx)
450
451 lea 1(%ecx), %edx
452 mov %edi, %ecx
453 POP (%edi)
454 jmp L(CalculateLengthOfSrc)
455 CFI_PUSH (%edi)
456
457 .p2align 4
458L(Exit1):
459 movb (%ecx), %al
460 movb %al, (%edx)
461
462 mov %ecx, %eax
463 sub %edi, %eax
464 RETURN1
465
466 .p2align 4
467L(StrlcpyExit2):
468 movb %bh, 1(%edx)
469 movb (%ecx), %al
470 movb %al, (%edx)
471
472 lea 2(%ecx), %edx
473 mov %edi, %ecx
474 POP (%edi)
475 jmp L(CalculateLengthOfSrc)
476 CFI_PUSH (%edi)
477
478 .p2align 4
479L(Exit2):
480 movw (%ecx), %ax
481 movw %ax, (%edx)
482 movl %edi, %eax
483
484 lea 1(%ecx), %eax
485 sub %edi, %eax
486 RETURN1
487
488 .p2align 4
489L(StrlcpyExit3):
490 movb %bh, 2(%edx)
491 movw (%ecx), %ax
492 movw %ax, (%edx)
493
494 lea 3(%ecx), %edx
495 mov %edi, %ecx
496 POP (%edi)
497 jmp L(CalculateLengthOfSrc)
498 CFI_PUSH (%edi)
499
500 .p2align 4
501L(Exit3):
502 movw (%ecx), %ax
503 movw %ax, (%edx)
504 movb 2(%ecx), %al
505 movb %al, 2(%edx)
506
507 lea 2(%ecx), %eax
508 sub %edi, %eax
509 RETURN1
510
511 .p2align 4
512L(StrlcpyExit5):
513 movb %bh, 4(%edx)
514 movl (%ecx), %eax
515 movl %eax, (%edx)
516 movl %edi, %eax
517
518 lea 5(%ecx), %edx
519 mov %edi, %ecx
520 POP (%edi)
521 jmp L(CalculateLengthOfSrc)
522 CFI_PUSH (%edi)
523
524 .p2align 4
525L(Exit5):
526 movl (%ecx), %eax
527 movl %eax, (%edx)
528 movb 4(%ecx), %al
529 movb %al, 4(%edx)
530
531 lea 4(%ecx), %eax
532 sub %edi, %eax
533 RETURN1
534
535 .p2align 4
536L(StrlcpyExit6):
537 movb %bh, 5(%edx)
538 movl (%ecx), %eax
539 movl %eax, (%edx)
540 movb 4(%ecx), %al
541 movb %al, 4(%edx)
542
543 lea 6(%ecx), %edx
544 mov %edi, %ecx
545 POP (%edi)
546 jmp L(CalculateLengthOfSrc)
547 CFI_PUSH (%edi)
548
549 .p2align 4
550L(Exit6):
551 movl (%ecx), %eax
552 movl %eax, (%edx)
553 movw 4(%ecx), %ax
554 movw %ax, 4(%edx)
555
556 lea 5(%ecx), %eax
557 sub %edi, %eax
558 RETURN1
559
560 .p2align 4
561L(StrlcpyExit7):
562 movb %bh, 6(%edx)
563 movl (%ecx), %eax
564 movl %eax, (%edx)
565 movw 4(%ecx), %ax
566 movw %ax, 4(%edx)
567
568 lea 7(%ecx), %edx
569 mov %edi, %ecx
570 POP (%edi)
571 jmp L(CalculateLengthOfSrc)
572 CFI_PUSH (%edi)
573
574 .p2align 4
575L(Exit7):
576 movl (%ecx), %eax
577 movl %eax, (%edx)
578 movl 3(%ecx), %eax
579 movl %eax, 3(%edx)
580
581 lea 6(%ecx), %eax
582 sub %edi, %eax
583 RETURN1
584
585 .p2align 4
586L(StrlcpyExit9):
587 movb %bh, 8(%edx)
588 movlpd (%ecx), %xmm0
589 movlpd %xmm0, (%edx)
590
591 lea 9(%ecx), %edx
592 mov %edi, %ecx
593 POP (%edi)
594 jmp L(CalculateLengthOfSrc)
595 CFI_PUSH (%edi)
596
597 .p2align 4
598L(Exit9):
599 movlpd (%ecx), %xmm0
600 movlpd %xmm0, (%edx)
601 movb 8(%ecx), %al
602 movb %al, 8(%edx)
603
604 lea 8(%ecx), %eax
605 sub %edi, %eax
606 RETURN1
607
608 .p2align 4
609L(StrlcpyExit10):
610 movb %bh, 9(%edx)
611 movlpd (%ecx), %xmm0
612 movlpd %xmm0, (%edx)
613 movb 8(%ecx), %al
614 movb %al, 8(%edx)
615
616 lea 10(%ecx), %edx
617 mov %edi, %ecx
618 POP (%edi)
619 jmp L(CalculateLengthOfSrc)
620 CFI_PUSH (%edi)
621
622 .p2align 4
623L(Exit10):
624 movlpd (%ecx), %xmm0
625 movlpd %xmm0, (%edx)
626 movw 8(%ecx), %ax
627 movw %ax, 8(%edx)
628
629 lea 9(%ecx), %eax
630 sub %edi, %eax
631 RETURN1
632
633 .p2align 4
634L(StrlcpyExit11):
635 movb %bh, 10(%edx)
636 movlpd (%ecx), %xmm0
637 movlpd %xmm0, (%edx)
638 movw 8(%ecx), %ax
639 movw %ax, 8(%edx)
640
641 lea 11(%ecx), %edx
642 mov %edi, %ecx
643 POP (%edi)
644 jmp L(CalculateLengthOfSrc)
645 CFI_PUSH (%edi)
646
647 .p2align 4
648L(Exit11):
649 movlpd (%ecx), %xmm0
650 movlpd %xmm0, (%edx)
651 movl 7(%ecx), %eax
652 movl %eax, 7(%edx)
653
654 lea 10(%ecx), %eax
655 sub %edi, %eax
656 RETURN1
657
658 .p2align 4
659L(StrlcpyExit13):
660 movb %bh, 12(%edx)
661 movlpd (%ecx), %xmm0
662 movlpd %xmm0, (%edx)
663 movl 8(%ecx), %eax
664 movl %eax, 8(%edx)
665
666 lea 13(%ecx), %edx
667 mov %edi, %ecx
668 POP (%edi)
669 jmp L(CalculateLengthOfSrc)
670 CFI_PUSH (%edi)
671
672 .p2align 4
673L(Exit13):
674 movlpd (%ecx), %xmm0
675 movlpd %xmm0, (%edx)
676 movlpd 5(%ecx), %xmm0
677 movlpd %xmm0, 5(%edx)
678
679 lea 12(%ecx), %eax
680 sub %edi, %eax
681 RETURN1
682
683 .p2align 4
684L(StrlcpyExit14):
685 movb %bh, 13(%edx)
686 movlpd (%ecx), %xmm0
687 movlpd %xmm0, (%edx)
688 movlpd 5(%ecx), %xmm0
689 movlpd %xmm0, 5(%edx)
690
691 lea 14(%ecx), %edx
692 mov %edi, %ecx
693 POP (%edi)
694 jmp L(CalculateLengthOfSrc)
695 CFI_PUSH (%edi)
696
697 .p2align 4
698L(Exit14):
699 movlpd (%ecx), %xmm0
700 movlpd %xmm0, (%edx)
701 movlpd 6(%ecx), %xmm0
702 movlpd %xmm0, 6(%edx)
703
704 lea 13(%ecx), %eax
705 sub %edi, %eax
706 RETURN1
707
708 .p2align 4
709L(StrlcpyExit15):
710 movb %bh, 14(%edx)
711 movlpd (%ecx), %xmm0
712 movlpd %xmm0, (%edx)
713 movlpd 6(%ecx), %xmm0
714 movlpd %xmm0, 6(%edx)
715
716 lea 15(%ecx), %edx
717 mov %edi, %ecx
718 POP (%edi)
719 jmp L(CalculateLengthOfSrc)
720 CFI_PUSH (%edi)
721
722 .p2align 4
723L(Exit15):
724 movlpd (%ecx), %xmm0
725 movlpd %xmm0, (%edx)
726 movlpd 7(%ecx), %xmm0
727 movlpd %xmm0, 7(%edx)
728
729 lea 14(%ecx), %eax
730 sub %edi, %eax
731 RETURN1
732
733 .p2align 4
734L(StrncpyExit15Bytes):
735 cmp $12, %ebx
736 ja L(StrncpyExit15Bytes1)
737
738 cmpb $0, 8(%ecx)
739 jz L(Exit9)
740 cmp $9, %ebx
741 je L(StrlcpyExit9)
742
743 cmpb $0, 9(%ecx)
744 jz L(Exit10)
745 cmp $10, %ebx
746 je L(StrlcpyExit10)
747
748 cmpb $0, 10(%ecx)
749 jz L(Exit11)
750 cmp $11, %ebx
751 je L(StrlcpyExit11)
752
753 cmpb $0, 11(%ecx)
754 jz L(Exit12)
755 jmp L(StrlcpyExit12)
756
757 .p2align 4
758L(StrncpyExit15Bytes1):
759 cmpb $0, 8(%ecx)
760 jz L(Exit9)
761 cmpb $0, 9(%ecx)
762 jz L(Exit10)
763 cmpb $0, 10(%ecx)
764 jz L(Exit11)
765 cmpb $0, 11(%ecx)
766 jz L(Exit12)
767
768 cmpb $0, 12(%ecx)
769 jz L(Exit13)
770 cmp $13, %ebx
771 je L(StrlcpyExit13)
772
773 cmpb $0, 13(%ecx)
774 jz L(Exit14)
775 cmp $14, %ebx
776 je L(StrlcpyExit14)
777
778 cmpb $0, 14(%ecx)
779 jz L(Exit15)
780 jmp L(StrlcpyExit15)
781
782 .p2align 4
783L(StrncpyExit8Bytes):
784 cmp $4, %ebx
785 ja L(StrncpyExit8Bytes1)
786
787 cmpb $0, (%ecx)
788 jz L(Exit1)
789 cmp $1, %ebx
790 je L(StrlcpyExit1)
791
792 cmpb $0, 1(%ecx)
793 jz L(Exit2)
794 cmp $2, %ebx
795 je L(StrlcpyExit2)
796
797 cmpb $0, 2(%ecx)
798 jz L(Exit3)
799 cmp $3, %ebx
800 je L(StrlcpyExit3)
801
802 cmpb $0, 3(%ecx)
803 jz L(Exit4)
804 jmp L(StrlcpyExit4)
805
806 .p2align 4
807L(StrncpyExit8Bytes1):
808 cmpb $0, (%ecx)
809 jz L(Exit1)
810 cmpb $0, 1(%ecx)
811 jz L(Exit2)
812 cmpb $0, 2(%ecx)
813 jz L(Exit3)
814 cmpb $0, 3(%ecx)
815 jz L(Exit4)
816
817 cmpb $0, 4(%ecx)
818 jz L(Exit5)
819 cmp $5, %ebx
820 je L(StrlcpyExit5)
821
822 cmpb $0, 5(%ecx)
823 jz L(Exit6)
824 cmp $6, %ebx
825 je L(StrlcpyExit6)
826
827 cmpb $0, 6(%ecx)
828 jz L(Exit7)
829 cmp $7, %ebx
830 je L(StrlcpyExit7)
831
832 cmpb $0, 7(%ecx)
833 jz L(Exit8)
834 jmp L(StrlcpyExit8)
835
836 CFI_POP (%edi)
837
838
839 .p2align 4
840L(Prolog_return_start_len):
841 movl LEN(%esp), %ebx
842 movl SRC(%esp), %ecx
843L(CalculateLengthOfSrcProlog):
844 mov %ecx, %edx
845 sub %ebx, %ecx
846
847 .p2align 4
848L(CalculateLengthOfSrc):
849 cmpb $0, (%edx)
850 jz L(exit_tail0)
851 cmpb $0, 1(%edx)
852 jz L(exit_tail1)
853 cmpb $0, 2(%edx)
854 jz L(exit_tail2)
855 cmpb $0, 3(%edx)
856 jz L(exit_tail3)
857
858 cmpb $0, 4(%edx)
859 jz L(exit_tail4)
860 cmpb $0, 5(%edx)
861 jz L(exit_tail5)
862 cmpb $0, 6(%edx)
863 jz L(exit_tail6)
864 cmpb $0, 7(%edx)
865 jz L(exit_tail7)
866
867 cmpb $0, 8(%edx)
868 jz L(exit_tail8)
869 cmpb $0, 9(%edx)
870 jz L(exit_tail9)
871 cmpb $0, 10(%edx)
872 jz L(exit_tail10)
873 cmpb $0, 11(%edx)
874 jz L(exit_tail11)
875
876 cmpb $0, 12(%edx)
877 jz L(exit_tail12)
878 cmpb $0, 13(%edx)
879 jz L(exit_tail13)
880 cmpb $0, 14(%edx)
881 jz L(exit_tail14)
882 cmpb $0, 15(%edx)
883 jz L(exit_tail15)
884
885 pxor %xmm0, %xmm0
886 lea 16(%edx), %eax
887 add $16, %ecx
888 and $-16, %eax
889
890 pcmpeqb (%eax), %xmm0
891 pmovmskb %xmm0, %edx
892 pxor %xmm1, %xmm1
893 lea 16(%eax), %eax
894 test %edx, %edx
895 jnz L(exit)
896
897 pcmpeqb (%eax), %xmm1
898 pmovmskb %xmm1, %edx
899 pxor %xmm2, %xmm2
900 lea 16(%eax), %eax
901 test %edx, %edx
902 jnz L(exit)
903
904 pcmpeqb (%eax), %xmm2
905 pmovmskb %xmm2, %edx
906 pxor %xmm3, %xmm3
907 lea 16(%eax), %eax
908 test %edx, %edx
909 jnz L(exit)
910
911 pcmpeqb (%eax), %xmm3
912 pmovmskb %xmm3, %edx
913 lea 16(%eax), %eax
914 test %edx, %edx
915 jnz L(exit)
916
917 pcmpeqb (%eax), %xmm0
918 pmovmskb %xmm0, %edx
919 lea 16(%eax), %eax
920 test %edx, %edx
921 jnz L(exit)
922
923 pcmpeqb (%eax), %xmm1
924 pmovmskb %xmm1, %edx
925 lea 16(%eax), %eax
926 test %edx, %edx
927 jnz L(exit)
928
929 pcmpeqb (%eax), %xmm2
930 pmovmskb %xmm2, %edx
931 lea 16(%eax), %eax
932 test %edx, %edx
933 jnz L(exit)
934
935 pcmpeqb (%eax), %xmm3
936 pmovmskb %xmm3, %edx
937 lea 16(%eax), %eax
938 test %edx, %edx
939 jnz L(exit)
940
941 pcmpeqb (%eax), %xmm0
942 pmovmskb %xmm0, %edx
943 lea 16(%eax), %eax
944 test %edx, %edx
945 jnz L(exit)
946
947 pcmpeqb (%eax), %xmm1
948 pmovmskb %xmm1, %edx
949 lea 16(%eax), %eax
950 test %edx, %edx
951 jnz L(exit)
952
953 pcmpeqb (%eax), %xmm2
954 pmovmskb %xmm2, %edx
955 lea 16(%eax), %eax
956 test %edx, %edx
957 jnz L(exit)
958
959 pcmpeqb (%eax), %xmm3
960 pmovmskb %xmm3, %edx
961 lea 16(%eax), %eax
962 test %edx, %edx
963 jnz L(exit)
964
965 pcmpeqb (%eax), %xmm0
966 pmovmskb %xmm0, %edx
967 lea 16(%eax), %eax
968 test %edx, %edx
969 jnz L(exit)
970
971 pcmpeqb (%eax), %xmm1
972 pmovmskb %xmm1, %edx
973 lea 16(%eax), %eax
974 test %edx, %edx
975 jnz L(exit)
976
977 pcmpeqb (%eax), %xmm2
978 pmovmskb %xmm2, %edx
979 lea 16(%eax), %eax
980 test %edx, %edx
981 jnz L(exit)
982
983 pcmpeqb (%eax), %xmm3
984 pmovmskb %xmm3, %edx
985 lea 16(%eax), %eax
986 test %edx, %edx
987 jnz L(exit)
988
989 and $-0x40, %eax
990
991 .p2align 4
992L(aligned_64_loop):
993 movaps (%eax), %xmm0
994 movaps 16(%eax), %xmm1
995 movaps 32(%eax), %xmm2
996 movaps 48(%eax), %xmm6
997 pminub %xmm1, %xmm0
998 pminub %xmm6, %xmm2
999 pminub %xmm0, %xmm2
1000 pcmpeqb %xmm3, %xmm2
1001 pmovmskb %xmm2, %edx
1002 lea 64(%eax), %eax
1003 test %edx, %edx
1004 jz L(aligned_64_loop)
1005
1006 pcmpeqb -64(%eax), %xmm3
1007 pmovmskb %xmm3, %edx
1008 lea 48(%ecx), %ecx
1009 test %edx, %edx
1010 jnz L(exit)
1011
1012 pcmpeqb %xmm1, %xmm3
1013 pmovmskb %xmm3, %edx
1014 lea -16(%ecx), %ecx
1015 test %edx, %edx
1016 jnz L(exit)
1017
1018 pcmpeqb -32(%eax), %xmm3
1019 pmovmskb %xmm3, %edx
1020 lea -16(%ecx), %ecx
1021 test %edx, %edx
1022 jnz L(exit)
1023
1024 pcmpeqb %xmm6, %xmm3
1025 pmovmskb %xmm3, %edx
1026 lea -16(%ecx), %ecx
1027
1028 .p2align 4
1029L(exit):
1030 sub %ecx, %eax
1031 test %dl, %dl
1032 jz L(exit_more_8)
1033
1034 mov %dl, %cl
1035 and $15, %cl
1036 jz L(exit_more_4)
1037 test $0x01, %dl
1038 jnz L(exit_0)
1039 test $0x02, %dl
1040 jnz L(exit_1)
1041 test $0x04, %dl
1042 jnz L(exit_2)
1043 add $3, %eax
1044 RETURN
1045
1046 .p2align 4
1047L(exit_more_4):
1048 test $0x10, %dl
1049 jnz L(exit_4)
1050 test $0x20, %dl
1051 jnz L(exit_5)
1052 test $0x40, %dl
1053 jnz L(exit_6)
1054 add $7, %eax
1055 RETURN
1056
1057 .p2align 4
1058L(exit_more_8):
1059 mov %dh, %ch
1060 and $15, %ch
1061 jz L(exit_more_12)
1062 test $0x01, %dh
1063 jnz L(exit_8)
1064 test $0x02, %dh
1065 jnz L(exit_9)
1066 test $0x04, %dh
1067 jnz L(exit_10)
1068 add $11, %eax
1069 RETURN
1070
1071 .p2align 4
1072L(exit_more_12):
1073 test $0x10, %dh
1074 jnz L(exit_12)
1075 test $0x20, %dh
1076 jnz L(exit_13)
1077 test $0x40, %dh
1078 jnz L(exit_14)
1079 add $15, %eax
1080L(exit_0):
1081 RETURN
1082
1083 .p2align 4
1084L(exit_1):
1085 add $1, %eax
1086 RETURN
1087
1088L(exit_2):
1089 add $2, %eax
1090 RETURN
1091
1092L(exit_3):
1093 add $3, %eax
1094 RETURN
1095
1096L(exit_4):
1097 add $4, %eax
1098 RETURN
1099
1100L(exit_5):
1101 add $5, %eax
1102 RETURN
1103
1104L(exit_6):
1105 add $6, %eax
1106 RETURN
1107
1108L(exit_7):
1109 add $7, %eax
1110 RETURN
1111
1112L(exit_8):
1113 add $8, %eax
1114 RETURN
1115
1116L(exit_9):
1117 add $9, %eax
1118 RETURN
1119
1120L(exit_10):
1121 add $10, %eax
1122 RETURN
1123
1124L(exit_11):
1125 add $11, %eax
1126 RETURN
1127
1128L(exit_12):
1129 add $12, %eax
1130 RETURN
1131
1132L(exit_13):
1133 add $13, %eax
1134 RETURN
1135
1136L(exit_14):
1137 add $14, %eax
1138 RETURN
1139
1140L(exit_15):
1141 add $15, %eax
1142 RETURN
1143
1144L(exit_tail0):
1145 mov %edx, %eax
1146 sub %ecx, %eax
1147 RETURN
1148
1149 .p2align 4
1150L(exit_tail1):
1151 lea 1(%edx), %eax
1152 sub %ecx, %eax
1153 RETURN
1154
1155L(exit_tail2):
1156 lea 2(%edx), %eax
1157 sub %ecx, %eax
1158 RETURN
1159
1160L(exit_tail3):
1161 lea 3(%edx), %eax
1162 sub %ecx, %eax
1163 RETURN
1164
1165L(exit_tail4):
1166 lea 4(%edx), %eax
1167 sub %ecx, %eax
1168 RETURN
1169
1170L(exit_tail5):
1171 lea 5(%edx), %eax
1172 sub %ecx, %eax
1173 RETURN
1174
1175L(exit_tail6):
1176 lea 6(%edx), %eax
1177 sub %ecx, %eax
1178 RETURN
1179
1180L(exit_tail7):
1181 lea 7(%edx), %eax
1182 sub %ecx, %eax
1183 RETURN
1184
1185L(exit_tail8):
1186 lea 8(%edx), %eax
1187 sub %ecx, %eax
1188 RETURN
1189
1190L(exit_tail9):
1191 lea 9(%edx), %eax
1192 sub %ecx, %eax
1193 RETURN
1194
1195L(exit_tail10):
1196 lea 10(%edx), %eax
1197 sub %ecx, %eax
1198 RETURN
1199
1200L(exit_tail11):
1201 lea 11(%edx), %eax
1202 sub %ecx, %eax
1203 RETURN
1204
1205L(exit_tail12):
1206 lea 12(%edx), %eax
1207 sub %ecx, %eax
1208 RETURN
1209
1210L(exit_tail13):
1211 lea 13(%edx), %eax
1212 sub %ecx, %eax
1213 RETURN
1214
1215L(exit_tail14):
1216 lea 14(%edx), %eax
1217 sub %ecx, %eax
1218 RETURN
1219
1220L(exit_tail15):
1221 lea 15(%edx), %eax
1222 sub %ecx, %eax
1223 RETURN
1224
1225END (strlcat)