blob: 3e146bfbcfa5455d3469a7f57dfbebe0198bbc39 [file] [log] [blame]
Varvara Rainchika020a242014-04-29 17:44:56 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef USE_AS_STRCAT
32
33# ifndef STRCPY
34# define STRCPY strcpy
35# endif
36
37# ifndef L
38# define L(label) .L##label
39# endif
40
41# ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43# endif
44
45# ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47# endif
48
49# ifndef ENTRY
50# define ENTRY(name) \
51 .type name, @function; \
52 .globl name; \
53 .p2align 4; \
54name: \
55 cfi_startproc
56# endif
57
58# ifndef END
59# define END(name) \
60 cfi_endproc; \
61 .size name, .-name
62# endif
63
64#endif
65
66#define JMPTBL(I, B) I - B
67#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
68 lea TABLE(%rip), %r11; \
69 movslq (%r11, INDEX, SCALE), %rcx; \
70 lea (%r11, %rcx), %rcx; \
71 jmp *%rcx
72
73#ifndef USE_AS_STRCAT
74
75# define RETURN ret
76
77.text
78ENTRY (STRCPY)
79# ifdef USE_AS_STRNCPY
80 mov %rdx, %r8
81 test %r8, %r8
82 jz L(ExitZero)
83# endif
84 mov %rsi, %rcx
85# ifndef USE_AS_STPCPY
86 mov %rdi, %rax /* save result */
87# endif
88
89#endif
90 and $63, %rcx
91 cmp $32, %rcx
92 jbe L(SourceStringAlignmentLess32)
93
94 and $-16, %rsi
95 and $15, %rcx
96 pxor %xmm0, %xmm0
97 pxor %xmm1, %xmm1
98
99 pcmpeqb (%rsi), %xmm1
100 pmovmskb %xmm1, %rdx
101 shr %cl, %rdx
102#ifdef USE_AS_STRNCPY
103# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
104 mov $16, %r10
105 sub %rcx, %r10
106 cmp %r10, %r8
107# else
108 mov $17, %r10
109 sub %rcx, %r10
110 cmp %r10, %r8
111# endif
112 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
113#endif
114 test %rdx, %rdx
115 jnz L(CopyFrom1To16BytesTail)
116
117 pcmpeqb 16(%rsi), %xmm0
118 pmovmskb %xmm0, %rdx
119#ifdef USE_AS_STRNCPY
120 add $16, %r10
121 cmp %r10, %r8
122 jbe L(CopyFrom1To32BytesCase2OrCase3)
123#endif
124 test %rdx, %rdx
125 jnz L(CopyFrom1To32Bytes)
126
127 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
128 movdqu %xmm1, (%rdi)
129
130/* If source adress alignment != destination adress alignment */
131 .p2align 4
132L(Unalign16Both):
133 sub %rcx, %rdi
134#ifdef USE_AS_STRNCPY
135 add %rcx, %r8
136#endif
137 mov $16, %rcx
138 movdqa (%rsi, %rcx), %xmm1
139 movaps 16(%rsi, %rcx), %xmm2
140 movdqu %xmm1, (%rdi, %rcx)
141 pcmpeqb %xmm2, %xmm0
142 pmovmskb %xmm0, %rdx
143 add $16, %rcx
144#ifdef USE_AS_STRNCPY
145 sub $48, %r8
146 jbe L(CopyFrom1To16BytesCase2OrCase3)
147#endif
148 test %rdx, %rdx
149#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
150 jnz L(CopyFrom1To16BytesUnalignedXmm2)
151#else
152 jnz L(CopyFrom1To16Bytes)
153#endif
154
155 movaps 16(%rsi, %rcx), %xmm3
156 movdqu %xmm2, (%rdi, %rcx)
157 pcmpeqb %xmm3, %xmm0
158 pmovmskb %xmm0, %rdx
159 add $16, %rcx
160#ifdef USE_AS_STRNCPY
161 sub $16, %r8
162 jbe L(CopyFrom1To16BytesCase2OrCase3)
163#endif
164 test %rdx, %rdx
165#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
166 jnz L(CopyFrom1To16BytesUnalignedXmm3)
167#else
168 jnz L(CopyFrom1To16Bytes)
169#endif
170
171 movaps 16(%rsi, %rcx), %xmm4
172 movdqu %xmm3, (%rdi, %rcx)
173 pcmpeqb %xmm4, %xmm0
174 pmovmskb %xmm0, %rdx
175 add $16, %rcx
176#ifdef USE_AS_STRNCPY
177 sub $16, %r8
178 jbe L(CopyFrom1To16BytesCase2OrCase3)
179#endif
180 test %rdx, %rdx
181#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
182 jnz L(CopyFrom1To16BytesUnalignedXmm4)
183#else
184 jnz L(CopyFrom1To16Bytes)
185#endif
186
187 movaps 16(%rsi, %rcx), %xmm1
188 movdqu %xmm4, (%rdi, %rcx)
189 pcmpeqb %xmm1, %xmm0
190 pmovmskb %xmm0, %rdx
191 add $16, %rcx
192#ifdef USE_AS_STRNCPY
193 sub $16, %r8
194 jbe L(CopyFrom1To16BytesCase2OrCase3)
195#endif
196 test %rdx, %rdx
197#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
198 jnz L(CopyFrom1To16BytesUnalignedXmm1)
199#else
200 jnz L(CopyFrom1To16Bytes)
201#endif
202
203 movaps 16(%rsi, %rcx), %xmm2
204 movdqu %xmm1, (%rdi, %rcx)
205 pcmpeqb %xmm2, %xmm0
206 pmovmskb %xmm0, %rdx
207 add $16, %rcx
208#ifdef USE_AS_STRNCPY
209 sub $16, %r8
210 jbe L(CopyFrom1To16BytesCase2OrCase3)
211#endif
212 test %rdx, %rdx
213#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
214 jnz L(CopyFrom1To16BytesUnalignedXmm2)
215#else
216 jnz L(CopyFrom1To16Bytes)
217#endif
218
219 movaps 16(%rsi, %rcx), %xmm3
220 movdqu %xmm2, (%rdi, %rcx)
221 pcmpeqb %xmm3, %xmm0
222 pmovmskb %xmm0, %rdx
223 add $16, %rcx
224#ifdef USE_AS_STRNCPY
225 sub $16, %r8
226 jbe L(CopyFrom1To16BytesCase2OrCase3)
227#endif
228 test %rdx, %rdx
229#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
230 jnz L(CopyFrom1To16BytesUnalignedXmm3)
231#else
232 jnz L(CopyFrom1To16Bytes)
233#endif
234
235 movdqu %xmm3, (%rdi, %rcx)
236 mov %rsi, %rdx
237 lea 16(%rsi, %rcx), %rsi
238 and $-0x40, %rsi
239 sub %rsi, %rdx
240 sub %rdx, %rdi
241#ifdef USE_AS_STRNCPY
242 lea 128(%r8, %rdx), %r8
243#endif
244L(Unaligned64Loop):
245 movaps (%rsi), %xmm2
246 movaps %xmm2, %xmm4
247 movaps 16(%rsi), %xmm5
248 movaps 32(%rsi), %xmm3
249 movaps %xmm3, %xmm6
250 movaps 48(%rsi), %xmm7
251 pminub %xmm5, %xmm2
252 pminub %xmm7, %xmm3
253 pminub %xmm2, %xmm3
254 pcmpeqb %xmm0, %xmm3
255 pmovmskb %xmm3, %rdx
256#ifdef USE_AS_STRNCPY
257 sub $64, %r8
258 jbe L(UnalignedLeaveCase2OrCase3)
259#endif
260 test %rdx, %rdx
261 jnz L(Unaligned64Leave)
262
263L(Unaligned64Loop_start):
264 add $64, %rdi
265 add $64, %rsi
266 movdqu %xmm4, -64(%rdi)
267 movaps (%rsi), %xmm2
268 movdqa %xmm2, %xmm4
269 movdqu %xmm5, -48(%rdi)
270 movaps 16(%rsi), %xmm5
271 pminub %xmm5, %xmm2
272 movaps 32(%rsi), %xmm3
273 movdqu %xmm6, -32(%rdi)
274 movaps %xmm3, %xmm6
275 movdqu %xmm7, -16(%rdi)
276 movaps 48(%rsi), %xmm7
277 pminub %xmm7, %xmm3
278 pminub %xmm2, %xmm3
279 pcmpeqb %xmm0, %xmm3
280 pmovmskb %xmm3, %rdx
281#ifdef USE_AS_STRNCPY
282 sub $64, %r8
283 jbe L(UnalignedLeaveCase2OrCase3)
284#endif
285 test %rdx, %rdx
286 jz L(Unaligned64Loop_start)
287
288L(Unaligned64Leave):
289 pxor %xmm1, %xmm1
290
291 pcmpeqb %xmm4, %xmm0
292 pcmpeqb %xmm5, %xmm1
293 pmovmskb %xmm0, %rdx
294 pmovmskb %xmm1, %rcx
295 test %rdx, %rdx
296 jnz L(CopyFrom1To16BytesUnaligned_0)
297 test %rcx, %rcx
298 jnz L(CopyFrom1To16BytesUnaligned_16)
299
300 pcmpeqb %xmm6, %xmm0
301 pcmpeqb %xmm7, %xmm1
302 pmovmskb %xmm0, %rdx
303 pmovmskb %xmm1, %rcx
304 test %rdx, %rdx
305 jnz L(CopyFrom1To16BytesUnaligned_32)
306
307 bsf %rcx, %rdx
308 movdqu %xmm4, (%rdi)
309 movdqu %xmm5, 16(%rdi)
310 movdqu %xmm6, 32(%rdi)
311#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
312# ifdef USE_AS_STPCPY
313 lea 48(%rdi, %rdx), %rax
314# endif
315 movdqu %xmm7, 48(%rdi)
316 add $15, %r8
317 sub %rdx, %r8
318 lea 49(%rdi, %rdx), %rdi
319 jmp L(StrncpyFillTailWithZero)
320#else
321 add $48, %rsi
322 add $48, %rdi
323 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
324#endif
325
326/* If source adress alignment == destination adress alignment */
327
328L(SourceStringAlignmentLess32):
329 pxor %xmm0, %xmm0
330 movdqu (%rsi), %xmm1
331 movdqu 16(%rsi), %xmm2
332 pcmpeqb %xmm1, %xmm0
333 pmovmskb %xmm0, %rdx
334
335#ifdef USE_AS_STRNCPY
336# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
337 cmp $16, %r8
338# else
339 cmp $17, %r8
340# endif
341 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
342#endif
343 test %rdx, %rdx
344 jnz L(CopyFrom1To16BytesTail1)
345
346 pcmpeqb %xmm2, %xmm0
347 movdqu %xmm1, (%rdi)
348 pmovmskb %xmm0, %rdx
349
350#ifdef USE_AS_STRNCPY
351# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
352 cmp $32, %r8
353# else
354 cmp $33, %r8
355# endif
356 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
357#endif
358 test %rdx, %rdx
359 jnz L(CopyFrom1To32Bytes1)
360
361 and $15, %rcx
362 and $-16, %rsi
363
364 jmp L(Unalign16Both)
365
366/*------End of main part with loops---------------------*/
367
368/* Case1 */
369
370#if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
371 .p2align 4
372L(CopyFrom1To16Bytes):
373 add %rcx, %rdi
374 add %rcx, %rsi
375 bsf %rdx, %rdx
376 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
377#endif
378 .p2align 4
379L(CopyFrom1To16BytesTail):
380 add %rcx, %rsi
381 bsf %rdx, %rdx
382 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
383
384 .p2align 4
385L(CopyFrom1To32Bytes1):
386 add $16, %rsi
387 add $16, %rdi
388#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
389 sub $16, %r8
390#endif
391L(CopyFrom1To16BytesTail1):
392 bsf %rdx, %rdx
393 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
394
395 .p2align 4
396L(CopyFrom1To32Bytes):
397 bsf %rdx, %rdx
398 add %rcx, %rsi
399 add $16, %rdx
400 sub %rcx, %rdx
401 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
402
403 .p2align 4
404L(CopyFrom1To16BytesUnaligned_0):
405 bsf %rdx, %rdx
406#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
407# ifdef USE_AS_STPCPY
408 lea (%rdi, %rdx), %rax
409# endif
410 movdqu %xmm4, (%rdi)
411 add $63, %r8
412 sub %rdx, %r8
413 lea 1(%rdi, %rdx), %rdi
414 jmp L(StrncpyFillTailWithZero)
415#else
416 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
417#endif
418
419 .p2align 4
420L(CopyFrom1To16BytesUnaligned_16):
421 bsf %rcx, %rdx
422 movdqu %xmm4, (%rdi)
423#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
424# ifdef USE_AS_STPCPY
425 lea 16(%rdi, %rdx), %rax
426# endif
427 movdqu %xmm5, 16(%rdi)
428 add $47, %r8
429 sub %rdx, %r8
430 lea 17(%rdi, %rdx), %rdi
431 jmp L(StrncpyFillTailWithZero)
432#else
433 add $16, %rsi
434 add $16, %rdi
435 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
436#endif
437
438 .p2align 4
439L(CopyFrom1To16BytesUnaligned_32):
440 bsf %rdx, %rdx
441 movdqu %xmm4, (%rdi)
442 movdqu %xmm5, 16(%rdi)
443#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
444# ifdef USE_AS_STPCPY
445 lea 32(%rdi, %rdx), %rax
446# endif
447 movdqu %xmm6, 32(%rdi)
448 add $31, %r8
449 sub %rdx, %r8
450 lea 33(%rdi, %rdx), %rdi
451 jmp L(StrncpyFillTailWithZero)
452#else
453 add $32, %rsi
454 add $32, %rdi
455 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
456#endif
457
458#ifdef USE_AS_STRNCPY
459# ifndef USE_AS_STRCAT
460 .p2align 4
461L(CopyFrom1To16BytesUnalignedXmm6):
462 movdqu %xmm6, (%rdi, %rcx)
463 jmp L(CopyFrom1To16BytesXmmExit)
464
465 .p2align 4
466L(CopyFrom1To16BytesUnalignedXmm5):
467 movdqu %xmm5, (%rdi, %rcx)
468 jmp L(CopyFrom1To16BytesXmmExit)
469
470 .p2align 4
471L(CopyFrom1To16BytesUnalignedXmm4):
472 movdqu %xmm4, (%rdi, %rcx)
473 jmp L(CopyFrom1To16BytesXmmExit)
474
475 .p2align 4
476L(CopyFrom1To16BytesUnalignedXmm3):
477 movdqu %xmm3, (%rdi, %rcx)
478 jmp L(CopyFrom1To16BytesXmmExit)
479
480 .p2align 4
481L(CopyFrom1To16BytesUnalignedXmm1):
482 movdqu %xmm1, (%rdi, %rcx)
483 jmp L(CopyFrom1To16BytesXmmExit)
484# endif
485
486 .p2align 4
487L(CopyFrom1To16BytesExit):
488 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
489
490/* Case2 */
491
492 .p2align 4
493L(CopyFrom1To16BytesCase2):
494 add $16, %r8
495 add %rcx, %rdi
496 add %rcx, %rsi
497 bsf %rdx, %rdx
498 cmp %r8, %rdx
499 jb L(CopyFrom1To16BytesExit)
500 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
501
502 .p2align 4
503L(CopyFrom1To32BytesCase2):
504 add %rcx, %rsi
505 bsf %rdx, %rdx
506 add $16, %rdx
507 sub %rcx, %rdx
508 cmp %r8, %rdx
509 jb L(CopyFrom1To16BytesExit)
510 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
511
512L(CopyFrom1To16BytesTailCase2):
513 add %rcx, %rsi
514 bsf %rdx, %rdx
515 cmp %r8, %rdx
516 jb L(CopyFrom1To16BytesExit)
517 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
518
519L(CopyFrom1To16BytesTail1Case2):
520 bsf %rdx, %rdx
521 cmp %r8, %rdx
522 jb L(CopyFrom1To16BytesExit)
523 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
524
525/* Case2 or Case3, Case3 */
526
527 .p2align 4
528L(CopyFrom1To16BytesCase2OrCase3):
529 test %rdx, %rdx
530 jnz L(CopyFrom1To16BytesCase2)
531L(CopyFrom1To16BytesCase3):
532 add $16, %r8
533 add %rcx, %rdi
534 add %rcx, %rsi
535 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
536
537 .p2align 4
538L(CopyFrom1To32BytesCase2OrCase3):
539 test %rdx, %rdx
540 jnz L(CopyFrom1To32BytesCase2)
541 add %rcx, %rsi
542 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
543
544 .p2align 4
545L(CopyFrom1To16BytesTailCase2OrCase3):
546 test %rdx, %rdx
547 jnz L(CopyFrom1To16BytesTailCase2)
548 add %rcx, %rsi
549 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
550
551 .p2align 4
552L(CopyFrom1To32Bytes1Case2OrCase3):
553 add $16, %rdi
554 add $16, %rsi
555 sub $16, %r8
556L(CopyFrom1To16BytesTail1Case2OrCase3):
557 test %rdx, %rdx
558 jnz L(CopyFrom1To16BytesTail1Case2)
559 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
560
561#endif
562
563/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
564
565 .p2align 4
566L(Exit1):
567 mov %dh, (%rdi)
568#ifdef USE_AS_STPCPY
569 lea (%rdi), %rax
570#endif
571#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
572 sub $1, %r8
573 lea 1(%rdi), %rdi
574 jnz L(StrncpyFillTailWithZero)
575#endif
576 RETURN
577
578 .p2align 4
579L(Exit2):
580 mov (%rsi), %dx
581 mov %dx, (%rdi)
582#ifdef USE_AS_STPCPY
583 lea 1(%rdi), %rax
584#endif
585#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
586 sub $2, %r8
587 lea 2(%rdi), %rdi
588 jnz L(StrncpyFillTailWithZero)
589#endif
590 RETURN
591
592 .p2align 4
593L(Exit3):
594 mov (%rsi), %cx
595 mov %cx, (%rdi)
596 mov %dh, 2(%rdi)
597#ifdef USE_AS_STPCPY
598 lea 2(%rdi), %rax
599#endif
600#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
601 sub $3, %r8
602 lea 3(%rdi), %rdi
603 jnz L(StrncpyFillTailWithZero)
604#endif
605 RETURN
606
607 .p2align 4
608L(Exit4):
609 mov (%rsi), %edx
610 mov %edx, (%rdi)
611#ifdef USE_AS_STPCPY
612 lea 3(%rdi), %rax
613#endif
614#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
615 sub $4, %r8
616 lea 4(%rdi), %rdi
617 jnz L(StrncpyFillTailWithZero)
618#endif
619 RETURN
620
621 .p2align 4
622L(Exit5):
623 mov (%rsi), %ecx
624 mov %dh, 4(%rdi)
625 mov %ecx, (%rdi)
626#ifdef USE_AS_STPCPY
627 lea 4(%rdi), %rax
628#endif
629#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
630 sub $5, %r8
631 lea 5(%rdi), %rdi
632 jnz L(StrncpyFillTailWithZero)
633#endif
634 RETURN
635
636 .p2align 4
637L(Exit6):
638 mov (%rsi), %ecx
639 mov 4(%rsi), %dx
640 mov %ecx, (%rdi)
641 mov %dx, 4(%rdi)
642#ifdef USE_AS_STPCPY
643 lea 5(%rdi), %rax
644#endif
645#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
646 sub $6, %r8
647 lea 6(%rdi), %rdi
648 jnz L(StrncpyFillTailWithZero)
649#endif
650 RETURN
651
652 .p2align 4
653L(Exit7):
654 mov (%rsi), %ecx
655 mov 3(%rsi), %edx
656 mov %ecx, (%rdi)
657 mov %edx, 3(%rdi)
658#ifdef USE_AS_STPCPY
659 lea 6(%rdi), %rax
660#endif
661#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
662 sub $7, %r8
663 lea 7(%rdi), %rdi
664 jnz L(StrncpyFillTailWithZero)
665#endif
666 RETURN
667
668 .p2align 4
669L(Exit8):
670 mov (%rsi), %rdx
671 mov %rdx, (%rdi)
672#ifdef USE_AS_STPCPY
673 lea 7(%rdi), %rax
674#endif
675#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
676 sub $8, %r8
677 lea 8(%rdi), %rdi
678 jnz L(StrncpyFillTailWithZero)
679#endif
680 RETURN
681
682 .p2align 4
683L(Exit9):
684 mov (%rsi), %rcx
685 mov %dh, 8(%rdi)
686 mov %rcx, (%rdi)
687#ifdef USE_AS_STPCPY
688 lea 8(%rdi), %rax
689#endif
690#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
691 sub $9, %r8
692 lea 9(%rdi), %rdi
693 jnz L(StrncpyFillTailWithZero)
694#endif
695 RETURN
696
697 .p2align 4
698L(Exit10):
699 mov (%rsi), %rcx
700 mov 8(%rsi), %dx
701 mov %rcx, (%rdi)
702 mov %dx, 8(%rdi)
703#ifdef USE_AS_STPCPY
704 lea 9(%rdi), %rax
705#endif
706#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
707 sub $10, %r8
708 lea 10(%rdi), %rdi
709 jnz L(StrncpyFillTailWithZero)
710#endif
711 RETURN
712
713 .p2align 4
714L(Exit11):
715 mov (%rsi), %rcx
716 mov 7(%rsi), %edx
717 mov %rcx, (%rdi)
718 mov %edx, 7(%rdi)
719#ifdef USE_AS_STPCPY
720 lea 10(%rdi), %rax
721#endif
722#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
723 sub $11, %r8
724 lea 11(%rdi), %rdi
725 jnz L(StrncpyFillTailWithZero)
726#endif
727 RETURN
728
729 .p2align 4
730L(Exit12):
731 mov (%rsi), %rcx
732 mov 8(%rsi), %edx
733 mov %rcx, (%rdi)
734 mov %edx, 8(%rdi)
735#ifdef USE_AS_STPCPY
736 lea 11(%rdi), %rax
737#endif
738#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
739 sub $12, %r8
740 lea 12(%rdi), %rdi
741 jnz L(StrncpyFillTailWithZero)
742#endif
743 RETURN
744
745 .p2align 4
746L(Exit13):
747 mov (%rsi), %rcx
748 mov 5(%rsi), %rdx
749 mov %rcx, (%rdi)
750 mov %rdx, 5(%rdi)
751#ifdef USE_AS_STPCPY
752 lea 12(%rdi), %rax
753#endif
754#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
755 sub $13, %r8
756 lea 13(%rdi), %rdi
757 jnz L(StrncpyFillTailWithZero)
758#endif
759 RETURN
760
761 .p2align 4
762L(Exit14):
763 mov (%rsi), %rcx
764 mov 6(%rsi), %rdx
765 mov %rcx, (%rdi)
766 mov %rdx, 6(%rdi)
767#ifdef USE_AS_STPCPY
768 lea 13(%rdi), %rax
769#endif
770#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
771 sub $14, %r8
772 lea 14(%rdi), %rdi
773 jnz L(StrncpyFillTailWithZero)
774#endif
775 RETURN
776
777 .p2align 4
778L(Exit15):
779 mov (%rsi), %rcx
780 mov 7(%rsi), %rdx
781 mov %rcx, (%rdi)
782 mov %rdx, 7(%rdi)
783#ifdef USE_AS_STPCPY
784 lea 14(%rdi), %rax
785#endif
786#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
787 sub $15, %r8
788 lea 15(%rdi), %rdi
789 jnz L(StrncpyFillTailWithZero)
790#endif
791 RETURN
792
793 .p2align 4
794L(Exit16):
795 movdqu (%rsi), %xmm0
796 movdqu %xmm0, (%rdi)
797#ifdef USE_AS_STPCPY
798 lea 15(%rdi), %rax
799#endif
800#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
801 sub $16, %r8
802 lea 16(%rdi), %rdi
803 jnz L(StrncpyFillTailWithZero)
804#endif
805 RETURN
806
807 .p2align 4
808L(Exit17):
809 movdqu (%rsi), %xmm0
810 movdqu %xmm0, (%rdi)
811 mov %dh, 16(%rdi)
812#ifdef USE_AS_STPCPY
813 lea 16(%rdi), %rax
814#endif
815#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
816 sub $17, %r8
817 lea 17(%rdi), %rdi
818 jnz L(StrncpyFillTailWithZero)
819#endif
820 RETURN
821
822 .p2align 4
823L(Exit18):
824 movdqu (%rsi), %xmm0
825 mov 16(%rsi), %cx
826 movdqu %xmm0, (%rdi)
827 mov %cx, 16(%rdi)
828#ifdef USE_AS_STPCPY
829 lea 17(%rdi), %rax
830#endif
831#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
832 sub $18, %r8
833 lea 18(%rdi), %rdi
834 jnz L(StrncpyFillTailWithZero)
835#endif
836 RETURN
837
838 .p2align 4
839L(Exit19):
840 movdqu (%rsi), %xmm0
841 mov 15(%rsi), %ecx
842 movdqu %xmm0, (%rdi)
843 mov %ecx, 15(%rdi)
844#ifdef USE_AS_STPCPY
845 lea 18(%rdi), %rax
846#endif
847#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
848 sub $19, %r8
849 lea 19(%rdi), %rdi
850 jnz L(StrncpyFillTailWithZero)
851#endif
852 RETURN
853
854 .p2align 4
855L(Exit20):
856 movdqu (%rsi), %xmm0
857 mov 16(%rsi), %ecx
858 movdqu %xmm0, (%rdi)
859 mov %ecx, 16(%rdi)
860#ifdef USE_AS_STPCPY
861 lea 19(%rdi), %rax
862#endif
863#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
864 sub $20, %r8
865 lea 20(%rdi), %rdi
866 jnz L(StrncpyFillTailWithZero)
867#endif
868 RETURN
869
870 .p2align 4
871L(Exit21):
872 movdqu (%rsi), %xmm0
873 mov 16(%rsi), %ecx
874 movdqu %xmm0, (%rdi)
875 mov %ecx, 16(%rdi)
876 mov %dh, 20(%rdi)
877#ifdef USE_AS_STPCPY
878 lea 20(%rdi), %rax
879#endif
880#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
881 sub $21, %r8
882 lea 21(%rdi), %rdi
883 jnz L(StrncpyFillTailWithZero)
884#endif
885 RETURN
886
887 .p2align 4
888L(Exit22):
889 movdqu (%rsi), %xmm0
890 mov 14(%rsi), %rcx
891 movdqu %xmm0, (%rdi)
892 mov %rcx, 14(%rdi)
893#ifdef USE_AS_STPCPY
894 lea 21(%rdi), %rax
895#endif
896#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
897 sub $22, %r8
898 lea 22(%rdi), %rdi
899 jnz L(StrncpyFillTailWithZero)
900#endif
901 RETURN
902
903 .p2align 4
904L(Exit23):
905 movdqu (%rsi), %xmm0
906 mov 15(%rsi), %rcx
907 movdqu %xmm0, (%rdi)
908 mov %rcx, 15(%rdi)
909#ifdef USE_AS_STPCPY
910 lea 22(%rdi), %rax
911#endif
912#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
913 sub $23, %r8
914 lea 23(%rdi), %rdi
915 jnz L(StrncpyFillTailWithZero)
916#endif
917 RETURN
918
919 .p2align 4
920L(Exit24):
921 movdqu (%rsi), %xmm0
922 mov 16(%rsi), %rcx
923 movdqu %xmm0, (%rdi)
924 mov %rcx, 16(%rdi)
925#ifdef USE_AS_STPCPY
926 lea 23(%rdi), %rax
927#endif
928#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
929 sub $24, %r8
930 lea 24(%rdi), %rdi
931 jnz L(StrncpyFillTailWithZero)
932#endif
933 RETURN
934
935 .p2align 4
936L(Exit25):
937 movdqu (%rsi), %xmm0
938 mov 16(%rsi), %rcx
939 movdqu %xmm0, (%rdi)
940 mov %rcx, 16(%rdi)
941 mov %dh, 24(%rdi)
942#ifdef USE_AS_STPCPY
943 lea 24(%rdi), %rax
944#endif
945#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
946 sub $25, %r8
947 lea 25(%rdi), %rdi
948 jnz L(StrncpyFillTailWithZero)
949#endif
950 RETURN
951
952 .p2align 4
953L(Exit26):
954 movdqu (%rsi), %xmm0
955 mov 16(%rsi), %rdx
956 mov 24(%rsi), %cx
957 movdqu %xmm0, (%rdi)
958 mov %rdx, 16(%rdi)
959 mov %cx, 24(%rdi)
960#ifdef USE_AS_STPCPY
961 lea 25(%rdi), %rax
962#endif
963#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
964 sub $26, %r8
965 lea 26(%rdi), %rdi
966 jnz L(StrncpyFillTailWithZero)
967#endif
968 RETURN
969
970 .p2align 4
971L(Exit27):
972 movdqu (%rsi), %xmm0
973 mov 16(%rsi), %rdx
974 mov 23(%rsi), %ecx
975 movdqu %xmm0, (%rdi)
976 mov %rdx, 16(%rdi)
977 mov %ecx, 23(%rdi)
978#ifdef USE_AS_STPCPY
979 lea 26(%rdi), %rax
980#endif
981#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
982 sub $27, %r8
983 lea 27(%rdi), %rdi
984 jnz L(StrncpyFillTailWithZero)
985#endif
986 RETURN
987
988 .p2align 4
989L(Exit28):
990 movdqu (%rsi), %xmm0
991 mov 16(%rsi), %rdx
992 mov 24(%rsi), %ecx
993 movdqu %xmm0, (%rdi)
994 mov %rdx, 16(%rdi)
995 mov %ecx, 24(%rdi)
996#ifdef USE_AS_STPCPY
997 lea 27(%rdi), %rax
998#endif
999#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1000 sub $28, %r8
1001 lea 28(%rdi), %rdi
1002 jnz L(StrncpyFillTailWithZero)
1003#endif
1004 RETURN
1005
1006 .p2align 4
1007L(Exit29):
1008 movdqu (%rsi), %xmm0
1009 movdqu 13(%rsi), %xmm2
1010 movdqu %xmm0, (%rdi)
1011 movdqu %xmm2, 13(%rdi)
1012#ifdef USE_AS_STPCPY
1013 lea 28(%rdi), %rax
1014#endif
1015#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1016 sub $29, %r8
1017 lea 29(%rdi), %rdi
1018 jnz L(StrncpyFillTailWithZero)
1019#endif
1020 RETURN
1021
1022 .p2align 4
1023L(Exit30):
1024 movdqu (%rsi), %xmm0
1025 movdqu 14(%rsi), %xmm2
1026 movdqu %xmm0, (%rdi)
1027 movdqu %xmm2, 14(%rdi)
1028#ifdef USE_AS_STPCPY
1029 lea 29(%rdi), %rax
1030#endif
1031#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1032 sub $30, %r8
1033 lea 30(%rdi), %rdi
1034 jnz L(StrncpyFillTailWithZero)
1035#endif
1036 RETURN
1037
1038 .p2align 4
1039L(Exit31):
1040 movdqu (%rsi), %xmm0
1041 movdqu 15(%rsi), %xmm2
1042 movdqu %xmm0, (%rdi)
1043 movdqu %xmm2, 15(%rdi)
1044#ifdef USE_AS_STPCPY
1045 lea 30(%rdi), %rax
1046#endif
1047#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1048 sub $31, %r8
1049 lea 31(%rdi), %rdi
1050 jnz L(StrncpyFillTailWithZero)
1051#endif
1052 RETURN
1053
1054 .p2align 4
1055L(Exit32):
1056 movdqu (%rsi), %xmm0
1057 movdqu 16(%rsi), %xmm2
1058 movdqu %xmm0, (%rdi)
1059 movdqu %xmm2, 16(%rdi)
1060#ifdef USE_AS_STPCPY
1061 lea 31(%rdi), %rax
1062#endif
1063#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1064 sub $32, %r8
1065 lea 32(%rdi), %rdi
1066 jnz L(StrncpyFillTailWithZero)
1067#endif
1068 RETURN
1069
1070#ifdef USE_AS_STRNCPY
1071
1072 .p2align 4
1073L(StrncpyExit0):
1074#ifdef USE_AS_STPCPY
1075 mov %rdi, %rax
1076#endif
1077#ifdef USE_AS_STRCAT
1078 xor %ch, %ch
1079 movb %ch, (%rdi)
1080#endif
1081 RETURN
1082
1083 .p2align 4
1084L(StrncpyExit1):
1085 mov (%rsi), %dl
1086 mov %dl, (%rdi)
1087#ifdef USE_AS_STPCPY
1088 lea 1(%rdi), %rax
1089#endif
1090#ifdef USE_AS_STRCAT
1091 xor %ch, %ch
1092 movb %ch, 1(%rdi)
1093#endif
1094 RETURN
1095
1096 .p2align 4
1097L(StrncpyExit2):
1098 mov (%rsi), %dx
1099 mov %dx, (%rdi)
1100#ifdef USE_AS_STPCPY
1101 lea 2(%rdi), %rax
1102#endif
1103#ifdef USE_AS_STRCAT
1104 xor %ch, %ch
1105 movb %ch, 2(%rdi)
1106#endif
1107 RETURN
1108
1109 .p2align 4
1110L(StrncpyExit3):
1111 mov (%rsi), %cx
1112 mov 2(%rsi), %dl
1113 mov %cx, (%rdi)
1114 mov %dl, 2(%rdi)
1115#ifdef USE_AS_STPCPY
1116 lea 3(%rdi), %rax
1117#endif
1118#ifdef USE_AS_STRCAT
1119 xor %ch, %ch
1120 movb %ch, 3(%rdi)
1121#endif
1122 RETURN
1123
1124 .p2align 4
1125L(StrncpyExit4):
1126 mov (%rsi), %edx
1127 mov %edx, (%rdi)
1128#ifdef USE_AS_STPCPY
1129 lea 4(%rdi), %rax
1130#endif
1131#ifdef USE_AS_STRCAT
1132 xor %ch, %ch
1133 movb %ch, 4(%rdi)
1134#endif
1135 RETURN
1136
1137 .p2align 4
1138L(StrncpyExit5):
1139 mov (%rsi), %ecx
1140 mov 4(%rsi), %dl
1141 mov %ecx, (%rdi)
1142 mov %dl, 4(%rdi)
1143#ifdef USE_AS_STPCPY
1144 lea 5(%rdi), %rax
1145#endif
1146#ifdef USE_AS_STRCAT
1147 xor %ch, %ch
1148 movb %ch, 5(%rdi)
1149#endif
1150 RETURN
1151
1152 .p2align 4
1153L(StrncpyExit6):
1154 mov (%rsi), %ecx
1155 mov 4(%rsi), %dx
1156 mov %ecx, (%rdi)
1157 mov %dx, 4(%rdi)
1158#ifdef USE_AS_STPCPY
1159 lea 6(%rdi), %rax
1160#endif
1161#ifdef USE_AS_STRCAT
1162 xor %ch, %ch
1163 movb %ch, 6(%rdi)
1164#endif
1165 RETURN
1166
1167 .p2align 4
1168L(StrncpyExit7):
1169 mov (%rsi), %ecx
1170 mov 3(%rsi), %edx
1171 mov %ecx, (%rdi)
1172 mov %edx, 3(%rdi)
1173#ifdef USE_AS_STPCPY
1174 lea 7(%rdi), %rax
1175#endif
1176#ifdef USE_AS_STRCAT
1177 xor %ch, %ch
1178 movb %ch, 7(%rdi)
1179#endif
1180 RETURN
1181
1182 .p2align 4
1183L(StrncpyExit8):
1184 mov (%rsi), %rdx
1185 mov %rdx, (%rdi)
1186#ifdef USE_AS_STPCPY
1187 lea 8(%rdi), %rax
1188#endif
1189#ifdef USE_AS_STRCAT
1190 xor %ch, %ch
1191 movb %ch, 8(%rdi)
1192#endif
1193 RETURN
1194
1195 .p2align 4
1196L(StrncpyExit9):
1197 mov (%rsi), %rcx
1198 mov 8(%rsi), %dl
1199 mov %rcx, (%rdi)
1200 mov %dl, 8(%rdi)
1201#ifdef USE_AS_STPCPY
1202 lea 9(%rdi), %rax
1203#endif
1204#ifdef USE_AS_STRCAT
1205 xor %ch, %ch
1206 movb %ch, 9(%rdi)
1207#endif
1208 RETURN
1209
1210 .p2align 4
1211L(StrncpyExit10):
1212 mov (%rsi), %rcx
1213 mov 8(%rsi), %dx
1214 mov %rcx, (%rdi)
1215 mov %dx, 8(%rdi)
1216#ifdef USE_AS_STPCPY
1217 lea 10(%rdi), %rax
1218#endif
1219#ifdef USE_AS_STRCAT
1220 xor %ch, %ch
1221 movb %ch, 10(%rdi)
1222#endif
1223 RETURN
1224
1225 .p2align 4
1226L(StrncpyExit11):
1227 mov (%rsi), %rcx
1228 mov 7(%rsi), %edx
1229 mov %rcx, (%rdi)
1230 mov %edx, 7(%rdi)
1231#ifdef USE_AS_STPCPY
1232 lea 11(%rdi), %rax
1233#endif
1234#ifdef USE_AS_STRCAT
1235 xor %ch, %ch
1236 movb %ch, 11(%rdi)
1237#endif
1238 RETURN
1239
1240 .p2align 4
1241L(StrncpyExit12):
1242 mov (%rsi), %rcx
1243 mov 8(%rsi), %edx
1244 mov %rcx, (%rdi)
1245 mov %edx, 8(%rdi)
1246#ifdef USE_AS_STPCPY
1247 lea 12(%rdi), %rax
1248#endif
1249#ifdef USE_AS_STRCAT
1250 xor %ch, %ch
1251 movb %ch, 12(%rdi)
1252#endif
1253 RETURN
1254
1255 .p2align 4
1256L(StrncpyExit13):
1257 mov (%rsi), %rcx
1258 mov 5(%rsi), %rdx
1259 mov %rcx, (%rdi)
1260 mov %rdx, 5(%rdi)
1261#ifdef USE_AS_STPCPY
1262 lea 13(%rdi), %rax
1263#endif
1264#ifdef USE_AS_STRCAT
1265 xor %ch, %ch
1266 movb %ch, 13(%rdi)
1267#endif
1268 RETURN
1269
1270 .p2align 4
1271L(StrncpyExit14):
1272 mov (%rsi), %rcx
1273 mov 6(%rsi), %rdx
1274 mov %rcx, (%rdi)
1275 mov %rdx, 6(%rdi)
1276#ifdef USE_AS_STPCPY
1277 lea 14(%rdi), %rax
1278#endif
1279#ifdef USE_AS_STRCAT
1280 xor %ch, %ch
1281 movb %ch, 14(%rdi)
1282#endif
1283 RETURN
1284
1285 .p2align 4
1286L(StrncpyExit15):
1287 mov (%rsi), %rcx
1288 mov 7(%rsi), %rdx
1289 mov %rcx, (%rdi)
1290 mov %rdx, 7(%rdi)
1291#ifdef USE_AS_STPCPY
1292 lea 15(%rdi), %rax
1293#endif
1294#ifdef USE_AS_STRCAT
1295 xor %ch, %ch
1296 movb %ch, 15(%rdi)
1297#endif
1298 RETURN
1299
1300 .p2align 4
1301L(StrncpyExit16):
1302 movdqu (%rsi), %xmm0
1303 movdqu %xmm0, (%rdi)
1304#ifdef USE_AS_STPCPY
1305 lea 16(%rdi), %rax
1306#endif
1307#ifdef USE_AS_STRCAT
1308 xor %ch, %ch
1309 movb %ch, 16(%rdi)
1310#endif
1311 RETURN
1312
1313 .p2align 4
1314L(StrncpyExit17):
1315 movdqu (%rsi), %xmm0
1316 mov 16(%rsi), %cl
1317 movdqu %xmm0, (%rdi)
1318 mov %cl, 16(%rdi)
1319#ifdef USE_AS_STPCPY
1320 lea 17(%rdi), %rax
1321#endif
1322#ifdef USE_AS_STRCAT
1323 xor %ch, %ch
1324 movb %ch, 17(%rdi)
1325#endif
1326 RETURN
1327
1328 .p2align 4
1329L(StrncpyExit18):
1330 movdqu (%rsi), %xmm0
1331 mov 16(%rsi), %cx
1332 movdqu %xmm0, (%rdi)
1333 mov %cx, 16(%rdi)
1334#ifdef USE_AS_STPCPY
1335 lea 18(%rdi), %rax
1336#endif
1337#ifdef USE_AS_STRCAT
1338 xor %ch, %ch
1339 movb %ch, 18(%rdi)
1340#endif
1341 RETURN
1342
1343 .p2align 4
1344L(StrncpyExit19):
1345 movdqu (%rsi), %xmm0
1346 mov 15(%rsi), %ecx
1347 movdqu %xmm0, (%rdi)
1348 mov %ecx, 15(%rdi)
1349#ifdef USE_AS_STPCPY
1350 lea 19(%rdi), %rax
1351#endif
1352#ifdef USE_AS_STRCAT
1353 xor %ch, %ch
1354 movb %ch, 19(%rdi)
1355#endif
1356 RETURN
1357
1358 .p2align 4
1359L(StrncpyExit20):
1360 movdqu (%rsi), %xmm0
1361 mov 16(%rsi), %ecx
1362 movdqu %xmm0, (%rdi)
1363 mov %ecx, 16(%rdi)
1364#ifdef USE_AS_STPCPY
1365 lea 20(%rdi), %rax
1366#endif
1367#ifdef USE_AS_STRCAT
1368 xor %ch, %ch
1369 movb %ch, 20(%rdi)
1370#endif
1371 RETURN
1372
1373 .p2align 4
1374L(StrncpyExit21):
1375 movdqu (%rsi), %xmm0
1376 mov 16(%rsi), %ecx
1377 mov 20(%rsi), %dl
1378 movdqu %xmm0, (%rdi)
1379 mov %ecx, 16(%rdi)
1380 mov %dl, 20(%rdi)
1381#ifdef USE_AS_STPCPY
1382 lea 21(%rdi), %rax
1383#endif
1384#ifdef USE_AS_STRCAT
1385 xor %ch, %ch
1386 movb %ch, 21(%rdi)
1387#endif
1388 RETURN
1389
1390 .p2align 4
1391L(StrncpyExit22):
1392 movdqu (%rsi), %xmm0
1393 mov 14(%rsi), %rcx
1394 movdqu %xmm0, (%rdi)
1395 mov %rcx, 14(%rdi)
1396#ifdef USE_AS_STPCPY
1397 lea 22(%rdi), %rax
1398#endif
1399#ifdef USE_AS_STRCAT
1400 xor %ch, %ch
1401 movb %ch, 22(%rdi)
1402#endif
1403 RETURN
1404
1405 .p2align 4
1406L(StrncpyExit23):
1407 movdqu (%rsi), %xmm0
1408 mov 15(%rsi), %rcx
1409 movdqu %xmm0, (%rdi)
1410 mov %rcx, 15(%rdi)
1411#ifdef USE_AS_STPCPY
1412 lea 23(%rdi), %rax
1413#endif
1414#ifdef USE_AS_STRCAT
1415 xor %ch, %ch
1416 movb %ch, 23(%rdi)
1417#endif
1418 RETURN
1419
1420 .p2align 4
1421L(StrncpyExit24):
1422 movdqu (%rsi), %xmm0
1423 mov 16(%rsi), %rcx
1424 movdqu %xmm0, (%rdi)
1425 mov %rcx, 16(%rdi)
1426#ifdef USE_AS_STPCPY
1427 lea 24(%rdi), %rax
1428#endif
1429#ifdef USE_AS_STRCAT
1430 xor %ch, %ch
1431 movb %ch, 24(%rdi)
1432#endif
1433 RETURN
1434
1435 .p2align 4
1436L(StrncpyExit25):
1437 movdqu (%rsi), %xmm0
1438 mov 16(%rsi), %rdx
1439 mov 24(%rsi), %cl
1440 movdqu %xmm0, (%rdi)
1441 mov %rdx, 16(%rdi)
1442 mov %cl, 24(%rdi)
1443#ifdef USE_AS_STPCPY
1444 lea 25(%rdi), %rax
1445#endif
1446#ifdef USE_AS_STRCAT
1447 xor %ch, %ch
1448 movb %ch, 25(%rdi)
1449#endif
1450 RETURN
1451
1452 .p2align 4
1453L(StrncpyExit26):
1454 movdqu (%rsi), %xmm0
1455 mov 16(%rsi), %rdx
1456 mov 24(%rsi), %cx
1457 movdqu %xmm0, (%rdi)
1458 mov %rdx, 16(%rdi)
1459 mov %cx, 24(%rdi)
1460#ifdef USE_AS_STPCPY
1461 lea 26(%rdi), %rax
1462#endif
1463#ifdef USE_AS_STRCAT
1464 xor %ch, %ch
1465 movb %ch, 26(%rdi)
1466#endif
1467 RETURN
1468
1469 .p2align 4
1470L(StrncpyExit27):
1471 movdqu (%rsi), %xmm0
1472 mov 16(%rsi), %rdx
1473 mov 23(%rsi), %ecx
1474 movdqu %xmm0, (%rdi)
1475 mov %rdx, 16(%rdi)
1476 mov %ecx, 23(%rdi)
1477#ifdef USE_AS_STPCPY
1478 lea 27(%rdi), %rax
1479#endif
1480#ifdef USE_AS_STRCAT
1481 xor %ch, %ch
1482 movb %ch, 27(%rdi)
1483#endif
1484 RETURN
1485
1486 .p2align 4
1487L(StrncpyExit28):
1488 movdqu (%rsi), %xmm0
1489 mov 16(%rsi), %rdx
1490 mov 24(%rsi), %ecx
1491 movdqu %xmm0, (%rdi)
1492 mov %rdx, 16(%rdi)
1493 mov %ecx, 24(%rdi)
1494#ifdef USE_AS_STPCPY
1495 lea 28(%rdi), %rax
1496#endif
1497#ifdef USE_AS_STRCAT
1498 xor %ch, %ch
1499 movb %ch, 28(%rdi)
1500#endif
1501 RETURN
1502
1503 .p2align 4
1504L(StrncpyExit29):
1505 movdqu (%rsi), %xmm0
1506 movdqu 13(%rsi), %xmm2
1507 movdqu %xmm0, (%rdi)
1508 movdqu %xmm2, 13(%rdi)
1509#ifdef USE_AS_STPCPY
1510 lea 29(%rdi), %rax
1511#endif
1512#ifdef USE_AS_STRCAT
1513 xor %ch, %ch
1514 movb %ch, 29(%rdi)
1515#endif
1516 RETURN
1517
1518 .p2align 4
1519L(StrncpyExit30):
1520 movdqu (%rsi), %xmm0
1521 movdqu 14(%rsi), %xmm2
1522 movdqu %xmm0, (%rdi)
1523 movdqu %xmm2, 14(%rdi)
1524#ifdef USE_AS_STPCPY
1525 lea 30(%rdi), %rax
1526#endif
1527#ifdef USE_AS_STRCAT
1528 xor %ch, %ch
1529 movb %ch, 30(%rdi)
1530#endif
1531 RETURN
1532
1533 .p2align 4
1534L(StrncpyExit31):
1535 movdqu (%rsi), %xmm0
1536 movdqu 15(%rsi), %xmm2
1537 movdqu %xmm0, (%rdi)
1538 movdqu %xmm2, 15(%rdi)
1539#ifdef USE_AS_STPCPY
1540 lea 31(%rdi), %rax
1541#endif
1542#ifdef USE_AS_STRCAT
1543 xor %ch, %ch
1544 movb %ch, 31(%rdi)
1545#endif
1546 RETURN
1547
1548 .p2align 4
1549L(StrncpyExit32):
1550 movdqu (%rsi), %xmm0
1551 movdqu 16(%rsi), %xmm2
1552 movdqu %xmm0, (%rdi)
1553 movdqu %xmm2, 16(%rdi)
1554#ifdef USE_AS_STPCPY
1555 lea 32(%rdi), %rax
1556#endif
1557#ifdef USE_AS_STRCAT
1558 xor %ch, %ch
1559 movb %ch, 32(%rdi)
1560#endif
1561 RETURN
1562
1563 .p2align 4
1564L(StrncpyExit33):
1565 movdqu (%rsi), %xmm0
1566 movdqu 16(%rsi), %xmm2
1567 mov 32(%rsi), %cl
1568 movdqu %xmm0, (%rdi)
1569 movdqu %xmm2, 16(%rdi)
1570 mov %cl, 32(%rdi)
1571#ifdef USE_AS_STRCAT
1572 xor %ch, %ch
1573 movb %ch, 33(%rdi)
1574#endif
1575 RETURN
1576
1577#ifndef USE_AS_STRCAT
1578
1579 .p2align 4
1580L(Fill0):
1581 RETURN
1582
1583 .p2align 4
1584L(Fill1):
1585 mov %dl, (%rdi)
1586 RETURN
1587
1588 .p2align 4
1589L(Fill2):
1590 mov %dx, (%rdi)
1591 RETURN
1592
1593 .p2align 4
1594L(Fill3):
1595 mov %edx, -1(%rdi)
1596 RETURN
1597
1598 .p2align 4
1599L(Fill4):
1600 mov %edx, (%rdi)
1601 RETURN
1602
1603 .p2align 4
1604L(Fill5):
1605 mov %edx, (%rdi)
1606 mov %dl, 4(%rdi)
1607 RETURN
1608
1609 .p2align 4
1610L(Fill6):
1611 mov %edx, (%rdi)
1612 mov %dx, 4(%rdi)
1613 RETURN
1614
1615 .p2align 4
1616L(Fill7):
1617 mov %rdx, -1(%rdi)
1618 RETURN
1619
1620 .p2align 4
1621L(Fill8):
1622 mov %rdx, (%rdi)
1623 RETURN
1624
1625 .p2align 4
1626L(Fill9):
1627 mov %rdx, (%rdi)
1628 mov %dl, 8(%rdi)
1629 RETURN
1630
1631 .p2align 4
1632L(Fill10):
1633 mov %rdx, (%rdi)
1634 mov %dx, 8(%rdi)
1635 RETURN
1636
1637 .p2align 4
1638L(Fill11):
1639 mov %rdx, (%rdi)
1640 mov %edx, 7(%rdi)
1641 RETURN
1642
1643 .p2align 4
1644L(Fill12):
1645 mov %rdx, (%rdi)
1646 mov %edx, 8(%rdi)
1647 RETURN
1648
1649 .p2align 4
1650L(Fill13):
1651 mov %rdx, (%rdi)
1652 mov %rdx, 5(%rdi)
1653 RETURN
1654
1655 .p2align 4
1656L(Fill14):
1657 mov %rdx, (%rdi)
1658 mov %rdx, 6(%rdi)
1659 RETURN
1660
1661 .p2align 4
1662L(Fill15):
1663 movdqu %xmm0, -1(%rdi)
1664 RETURN
1665
1666 .p2align 4
1667L(Fill16):
1668 movdqu %xmm0, (%rdi)
1669 RETURN
1670
1671 .p2align 4
1672L(CopyFrom1To16BytesUnalignedXmm2):
1673 movdqu %xmm2, (%rdi, %rcx)
1674
1675 .p2align 4
1676L(CopyFrom1To16BytesXmmExit):
1677 bsf %rdx, %rdx
1678 add $15, %r8
1679 add %rcx, %rdi
1680#ifdef USE_AS_STPCPY
1681 lea (%rdi, %rdx), %rax
1682#endif
1683 sub %rdx, %r8
1684 lea 1(%rdi, %rdx), %rdi
1685
1686 .p2align 4
1687L(StrncpyFillTailWithZero):
1688 pxor %xmm0, %xmm0
1689 xor %rdx, %rdx
1690 sub $16, %r8
1691 jbe L(StrncpyFillExit)
1692
1693 movdqu %xmm0, (%rdi)
1694 add $16, %rdi
1695
1696 mov %rdi, %rsi
1697 and $0xf, %rsi
1698 sub %rsi, %rdi
1699 add %rsi, %r8
1700 sub $64, %r8
1701 jb L(StrncpyFillLess64)
1702
1703L(StrncpyFillLoopMovdqa):
1704 movdqa %xmm0, (%rdi)
1705 movdqa %xmm0, 16(%rdi)
1706 movdqa %xmm0, 32(%rdi)
1707 movdqa %xmm0, 48(%rdi)
1708 add $64, %rdi
1709 sub $64, %r8
1710 jae L(StrncpyFillLoopMovdqa)
1711
1712L(StrncpyFillLess64):
1713 add $32, %r8
1714 jl L(StrncpyFillLess32)
1715 movdqa %xmm0, (%rdi)
1716 movdqa %xmm0, 16(%rdi)
1717 add $32, %rdi
1718 sub $16, %r8
1719 jl L(StrncpyFillExit)
1720 movdqa %xmm0, (%rdi)
1721 add $16, %rdi
1722 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1723
1724L(StrncpyFillLess32):
1725 add $16, %r8
1726 jl L(StrncpyFillExit)
1727 movdqa %xmm0, (%rdi)
1728 add $16, %rdi
1729 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1730
1731L(StrncpyFillExit):
1732 add $16, %r8
1733 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1734
1735/* end of ifndef USE_AS_STRCAT */
1736#endif
1737
1738 .p2align 4
1739L(UnalignedLeaveCase2OrCase3):
1740 test %rdx, %rdx
1741 jnz L(Unaligned64LeaveCase2)
1742L(Unaligned64LeaveCase3):
1743 lea 64(%r8), %rcx
1744 and $-16, %rcx
1745 add $48, %r8
1746 jl L(CopyFrom1To16BytesCase3)
1747 movdqu %xmm4, (%rdi)
1748 sub $16, %r8
1749 jb L(CopyFrom1To16BytesCase3)
1750 movdqu %xmm5, 16(%rdi)
1751 sub $16, %r8
1752 jb L(CopyFrom1To16BytesCase3)
1753 movdqu %xmm6, 32(%rdi)
1754 sub $16, %r8
1755 jb L(CopyFrom1To16BytesCase3)
1756 movdqu %xmm7, 48(%rdi)
1757#ifdef USE_AS_STPCPY
1758 lea 64(%rdi), %rax
1759#endif
1760#ifdef USE_AS_STRCAT
1761 xor %ch, %ch
1762 movb %ch, 64(%rdi)
1763#endif
1764 RETURN
1765
1766 .p2align 4
1767L(Unaligned64LeaveCase2):
1768 xor %rcx, %rcx
1769 pcmpeqb %xmm4, %xmm0
1770 pmovmskb %xmm0, %rdx
1771 add $48, %r8
1772 jle L(CopyFrom1To16BytesCase2OrCase3)
1773 test %rdx, %rdx
1774#ifndef USE_AS_STRCAT
1775 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1776#else
1777 jnz L(CopyFrom1To16Bytes)
1778#endif
1779 pcmpeqb %xmm5, %xmm0
1780 pmovmskb %xmm0, %rdx
1781 movdqu %xmm4, (%rdi)
1782 add $16, %rcx
1783 sub $16, %r8
1784 jbe L(CopyFrom1To16BytesCase2OrCase3)
1785 test %rdx, %rdx
1786#ifndef USE_AS_STRCAT
1787 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1788#else
1789 jnz L(CopyFrom1To16Bytes)
1790#endif
1791
1792 pcmpeqb %xmm6, %xmm0
1793 pmovmskb %xmm0, %rdx
1794 movdqu %xmm5, 16(%rdi)
1795 add $16, %rcx
1796 sub $16, %r8
1797 jbe L(CopyFrom1To16BytesCase2OrCase3)
1798 test %rdx, %rdx
1799#ifndef USE_AS_STRCAT
1800 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1801#else
1802 jnz L(CopyFrom1To16Bytes)
1803#endif
1804
1805 pcmpeqb %xmm7, %xmm0
1806 pmovmskb %xmm0, %rdx
1807 movdqu %xmm6, 32(%rdi)
1808 lea 16(%rdi, %rcx), %rdi
1809 lea 16(%rsi, %rcx), %rsi
1810 bsf %rdx, %rdx
1811 cmp %r8, %rdx
1812 jb L(CopyFrom1To16BytesExit)
1813 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1814
1815 .p2align 4
1816L(ExitZero):
1817#ifndef USE_AS_STRCAT
1818 mov %rdi, %rax
1819#endif
1820 RETURN
1821
1822#endif
1823
1824#ifndef USE_AS_STRCAT
1825END (STRCPY)
1826#else
1827END (STRCAT)
1828#endif
1829 .p2align 4
1830 .section .rodata
1831L(ExitTable):
1832 .int JMPTBL(L(Exit1), L(ExitTable))
1833 .int JMPTBL(L(Exit2), L(ExitTable))
1834 .int JMPTBL(L(Exit3), L(ExitTable))
1835 .int JMPTBL(L(Exit4), L(ExitTable))
1836 .int JMPTBL(L(Exit5), L(ExitTable))
1837 .int JMPTBL(L(Exit6), L(ExitTable))
1838 .int JMPTBL(L(Exit7), L(ExitTable))
1839 .int JMPTBL(L(Exit8), L(ExitTable))
1840 .int JMPTBL(L(Exit9), L(ExitTable))
1841 .int JMPTBL(L(Exit10), L(ExitTable))
1842 .int JMPTBL(L(Exit11), L(ExitTable))
1843 .int JMPTBL(L(Exit12), L(ExitTable))
1844 .int JMPTBL(L(Exit13), L(ExitTable))
1845 .int JMPTBL(L(Exit14), L(ExitTable))
1846 .int JMPTBL(L(Exit15), L(ExitTable))
1847 .int JMPTBL(L(Exit16), L(ExitTable))
1848 .int JMPTBL(L(Exit17), L(ExitTable))
1849 .int JMPTBL(L(Exit18), L(ExitTable))
1850 .int JMPTBL(L(Exit19), L(ExitTable))
1851 .int JMPTBL(L(Exit20), L(ExitTable))
1852 .int JMPTBL(L(Exit21), L(ExitTable))
1853 .int JMPTBL(L(Exit22), L(ExitTable))
1854 .int JMPTBL(L(Exit23), L(ExitTable))
1855 .int JMPTBL(L(Exit24), L(ExitTable))
1856 .int JMPTBL(L(Exit25), L(ExitTable))
1857 .int JMPTBL(L(Exit26), L(ExitTable))
1858 .int JMPTBL(L(Exit27), L(ExitTable))
1859 .int JMPTBL(L(Exit28), L(ExitTable))
1860 .int JMPTBL(L(Exit29), L(ExitTable))
1861 .int JMPTBL(L(Exit30), L(ExitTable))
1862 .int JMPTBL(L(Exit31), L(ExitTable))
1863 .int JMPTBL(L(Exit32), L(ExitTable))
1864#ifdef USE_AS_STRNCPY
1865L(ExitStrncpyTable):
1866 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1867 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1868 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1869 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1870 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1871 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1872 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1873 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1874 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1875 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1876 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1877 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1878 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1879 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1880 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1881 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1882 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1883 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1884 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1885 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1886 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1887 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1888 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1889 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1890 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1891 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1892 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1893 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1894 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1895 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1896 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1897 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1898 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1899 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1900# ifndef USE_AS_STRCAT
1901 .p2align 4
1902L(FillTable):
1903 .int JMPTBL(L(Fill0), L(FillTable))
1904 .int JMPTBL(L(Fill1), L(FillTable))
1905 .int JMPTBL(L(Fill2), L(FillTable))
1906 .int JMPTBL(L(Fill3), L(FillTable))
1907 .int JMPTBL(L(Fill4), L(FillTable))
1908 .int JMPTBL(L(Fill5), L(FillTable))
1909 .int JMPTBL(L(Fill6), L(FillTable))
1910 .int JMPTBL(L(Fill7), L(FillTable))
1911 .int JMPTBL(L(Fill8), L(FillTable))
1912 .int JMPTBL(L(Fill9), L(FillTable))
1913 .int JMPTBL(L(Fill10), L(FillTable))
1914 .int JMPTBL(L(Fill11), L(FillTable))
1915 .int JMPTBL(L(Fill12), L(FillTable))
1916 .int JMPTBL(L(Fill13), L(FillTable))
1917 .int JMPTBL(L(Fill14), L(FillTable))
1918 .int JMPTBL(L(Fill15), L(FillTable))
1919 .int JMPTBL(L(Fill16), L(FillTable))
1920# endif
1921#endif