blob: 9d4b52f6068b411eae9f75b54d5394e0c17df2b5 [file] [log] [blame]
Varvara Rainchik2e7145c2014-11-10 15:15:58 +03001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef ENTRY
44# define ENTRY(name) \
45 .type name, @function; \
46 .globl name; \
47 .p2align 4; \
48name: \
49 cfi_startproc
50#endif
51
52#ifndef END
53# define END(name) \
54 cfi_endproc; \
55 .size name, .-name
56#endif
57
58
59#ifndef STRLCPY
60# define STRLCPY strlcpy
61#endif
62
63#define JMPTBL(I, B) I - B
64#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
65 lea TABLE(%rip), %r11; \
66 movslq (%r11, INDEX, SCALE), %rcx; \
67 lea (%r11, %rcx), %rcx; \
68 jmp *%rcx
69
70#define RETURN \
71 add %r9, %rax; \
72 ret
73
74.text
75ENTRY (STRLCPY)
76 xor %rax, %rax
77 xor %r9, %r9
78 mov %rdx, %r8
79 cmp $0, %r8
80 jz L(CalculateSrcLen)
81
82#ifdef USE_AS_STRLCAT
83 xor %rcx, %rcx
84 pxor %xmm0, %xmm0
85
86 movdqu (%rdi), %xmm1
87 pcmpeqb %xmm1, %xmm0
88 pmovmskb %xmm0, %rdx
89
90 cmp $17, %r8
91 jb L(SizeEndCase1)
92 test %rdx, %rdx
93 jnz L(StringEndCase1)
94
95 add $16, %rax
96 movdqu 16(%rdi), %xmm1
97 pcmpeqb %xmm1, %xmm0
98 pmovmskb %xmm0, %rdx
99
100 cmp $33, %r8
101 jb L(SizeEndCase1)
102 test %rdx, %rdx
103 jnz L(StringEndCase1)
104
105 mov %rdi, %rcx
106 and $15, %rcx
107 and $-16, %rdi
108
109 add %rcx, %r8
110 sub $16, %r8
111
112L(DstLenLoop):
113 movdqa (%rdi, %rax), %xmm1
114 pcmpeqb %xmm1, %xmm0
115 pmovmskb %xmm0, %rdx
116 sub $16, %r8
117 jbe L(SizeEndCase2)
118 test %rdx, %rdx
119 jnz L(StringEndCase2)
120 add $16, %rax
121 jmp L(DstLenLoop)
122
123L(StringEndCase2):
124 add $16, %r8
125 bsf %rdx, %rdx
126 sub %rdx, %r8
127 add %rdx, %rax
128 sub %rcx, %r9
129 add %rax, %rdi
130 jmp L(CopySrcString)
131
132L(SizeEndCase1):
133 test %rdx, %rdx
134 jz L(SizeEnd)
135 bsf %rdx, %rdx
136 add %rdx, %rax
137 cmp %r8, %rax
138 jb L(StringEnd)
139L(SizeEnd):
140 mov %r8, %r9
141 jmp L(CalculateSrcLenCase1)
142
143L(SizeEndCase2):
144 add $16, %r8
145 test %rdx, %rdx
146 jz L(StringEndCase4)
147 bsf %rdx, %rdx
148 cmp %r8, %rdx
149 jb L(StringEndCase3)
150L(StringEndCase4):
151 add %r8, %rax
152 sub %rcx, %rax
153 mov %rax, %r9
154 jmp L(CalculateSrcLenCase1)
155
156L(StringEndCase3):
157 add %rdx, %rax
158 sub %rcx, %r9
159 add %rax, %rdi
160 sub %rdx, %r8
161 jmp L(CopySrcString)
162
163L(StringEndCase1):
164 bsf %rdx, %rdx
165 add %rdx, %rax
166 sub %rcx, %rax
167L(StringEnd):
168 add %rax, %rdi
169 sub %rax, %r8
170#endif
171
172 mov %rsi, %rcx
173 and $63, %rcx
174 cmp $32, %rcx
175 jbe L(CopySrcString)
176
177 and $-16, %rsi
178 and $15, %rcx
179 pxor %xmm0, %xmm0
180 pxor %xmm1, %xmm1
181
182 pcmpeqb (%rsi), %xmm1
183 pmovmskb %xmm1, %rdx
184 shr %cl, %rdx
185 mov $16, %r10
186 sub %rcx, %r10
187 cmp %r10, %r8
188 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
189 test %rdx, %rdx
190 jnz L(CopyFrom1To16BytesTail)
191
192 pcmpeqb 16(%rsi), %xmm0
193 pmovmskb %xmm0, %rdx
194 add $16, %r10
195 cmp %r10, %r8
196 jbe L(CopyFrom1To32BytesCase2OrCase3)
197 test %rdx, %rdx
198 jnz L(CopyFrom1To32Bytes)
199
200 movdqu (%rsi, %rcx), %xmm1
201 movdqu %xmm1, (%rdi)
202#ifdef USE_AS_STRLCAT
203 add %rax, %r9
204#endif
205 jmp L(LoopStart)
206
207 .p2align 4
208L(CopySrcString):
209#ifdef USE_AS_STRLCAT
210 add %rax, %r9
211 xor %rax, %rax
212#endif
213 pxor %xmm0, %xmm0
214 movdqu (%rsi), %xmm1
215 pcmpeqb %xmm1, %xmm0
216 pmovmskb %xmm0, %rdx
217
218 cmp $17, %r8
219 jb L(CopyFrom1To16BytesTail1Case2OrCase3)
220 test %rdx, %rdx
221 jnz L(CopyFrom1To16BytesTail1)
222
223 movdqu 16(%rsi), %xmm2
224 pcmpeqb %xmm2, %xmm0
225 movdqu %xmm1, (%rdi)
226 pmovmskb %xmm0, %rdx
227 add $16, %rax
228
229 cmp $33, %r8
230 jb L(CopyFrom1To32Bytes1Case2OrCase3)
231 test %rdx, %rdx
232 jnz L(CopyFrom1To32Bytes1)
233
234 mov %rsi, %rcx
235 and $15, %rcx
236 and $-16, %rsi
237
238L(LoopStart):
239 sub %rcx, %rdi
240 add %rcx, %r8
241 sub $16, %r8
242 mov $16, %rax
243
244L(16Loop):
245 movdqa (%rsi, %rax), %xmm1
246 pcmpeqb %xmm1, %xmm0
247 pmovmskb %xmm0, %rdx
248 sub $16, %r8
249 jbe L(CopyFrom1To16BytesCase2OrCase3)
250 test %rdx, %rdx
251 jnz L(CopyFrom1To16BytesXmmExit)
252 movdqu %xmm1, (%rdi, %rax)
253 add $16, %rax
254 jmp L(16Loop)
255
256/*------End of main part with loops---------------------*/
257
258/* Case1 */
259 .p2align 4
260L(CopyFrom1To16Bytes):
261 add %rcx, %rdi
262 add %rcx, %rsi
263 bsf %rdx, %rdx
264 add %rdx, %rax
265 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
266
267 .p2align 4
268L(CopyFrom1To16BytesTail):
269 add %rcx, %rsi
270 bsf %rdx, %rdx
271 add %rdx, %rax
272 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
273
274 .p2align 4
275L(CopyFrom1To32Bytes1):
276 add $16, %rsi
277 add $16, %rdi
278 sub $16, %r8
279L(CopyFrom1To16BytesTail1):
280 bsf %rdx, %rdx
281 add %rdx, %rax
282 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
283
284 .p2align 4
285L(CopyFrom1To32Bytes):
286 bsf %rdx, %rdx
287 add %rcx, %rsi
288 add $16, %rdx
289 sub %rcx, %rdx
290 add %rdx, %rax
291 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
292
293 .p2align 4
294L(CopyFrom1To16BytesExit):
295 add %rdx, %rax
296 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
297
298/* Case2 */
299
300 .p2align 4
301L(CopyFrom1To16BytesCase2):
302 add $16, %r8
303 add %rax, %rdi
304 add %rax, %rsi
305 bsf %rdx, %rdx
306 sub %rcx, %rax
307 cmp %r8, %rdx
308 jb L(CopyFrom1To16BytesExit)
309 add %r8, %rax
310 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
311
312 .p2align 4
313L(CopyFrom1To32BytesCase2):
314 add %rcx, %rsi
315 bsf %rdx, %rdx
316 add $16, %rdx
317 sub %rcx, %rdx
318 cmp %r8, %rdx
319 jb L(CopyFrom1To16BytesExit)
320 add %r8, %rax
321 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
322
323L(CopyFrom1To16BytesTailCase2):
324 add %rcx, %rsi
325 bsf %rdx, %rdx
326 cmp %r8, %rdx
327 jb L(CopyFrom1To16BytesExit)
328 add %r8, %rax
329 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
330
331 .p2align 4
332L(CopyFrom1To16BytesTail1Case2):
333 bsf %rdx, %rdx
334 cmp %r8, %rdx
335 jb L(CopyFrom1To16BytesExit)
336 add %r8, %rax
337 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
338
339/* Case2 or Case3, Case3 */
340
341 .p2align 4
342L(CopyFrom1To16BytesCase2OrCase3):
343 test %rdx, %rdx
344 jnz L(CopyFrom1To16BytesCase2)
345 add $16, %r8
346 add %rax, %rdi
347 add %rax, %rsi
348 add %r8, %rax
349 sub %rcx, %rax
350 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
351
352 .p2align 4
353L(CopyFrom1To32BytesCase2OrCase3):
354 test %rdx, %rdx
355 jnz L(CopyFrom1To32BytesCase2)
356 add %rcx, %rsi
357 add %r8, %rax
358 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
359
360 .p2align 4
361L(CopyFrom1To16BytesTailCase2OrCase3):
362 test %rdx, %rdx
363 jnz L(CopyFrom1To16BytesTailCase2)
364 add %rcx, %rsi
365 add %r8, %rax
366 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
367
368 .p2align 4
369L(CopyFrom1To32Bytes1Case2OrCase3):
370 add $16, %rdi
371 add $16, %rsi
372 sub $16, %r8
373L(CopyFrom1To16BytesTail1Case2OrCase3):
374 test %rdx, %rdx
375 jnz L(CopyFrom1To16BytesTail1Case2)
376 add %r8, %rax
377 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
378
379 .p2align 4
380L(CopyFrom1To16BytesXmmExit):
381 bsf %rdx, %rdx
382 add %rax, %rdi
383 add %rax, %rsi
384 add %rdx, %rax
385 sub %rcx, %rax
386 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
387
388/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
389
390
391 .p2align 4
392L(Exit0):
393 RETURN
394
395 .p2align 4
396L(Exit1):
397 movb $0, (%rdi)
398 jmp L(CalculateSrcLen)
399
400 .p2align 4
401L(Exit2):
402 movb (%rsi), %dh
403 movb %dh, (%rdi)
404 movb $0, 1(%rdi)
405 jmp L(CalculateSrcLen)
406
407 .p2align 4
408L(Exit3):
409 movw (%rsi), %dx
410 movw %dx, (%rdi)
411 movb $0, 2(%rdi)
412 jmp L(CalculateSrcLen)
413
414 .p2align 4
415L(Exit4):
416 movw (%rsi), %cx
417 movb 2(%rsi), %dh
418 movw %cx, (%rdi)
419 movb %dh, 2(%rdi)
420 movb $0, 3(%rdi)
421 jmp L(CalculateSrcLen)
422
423 .p2align 4
424L(Exit5):
425 movl (%rsi), %edx
426 movl %edx, (%rdi)
427 movb $0, 4(%rdi)
428 jmp L(CalculateSrcLen)
429
430 .p2align 4
431L(Exit6):
432 movl (%rsi), %ecx
433 movb 4(%rsi), %dh
434 movl %ecx, (%rdi)
435 movb %dh, 4(%rdi)
436 movb $0, 5(%rdi)
437 jmp L(CalculateSrcLen)
438
439 .p2align 4
440L(Exit7):
441 movl (%rsi), %ecx
442 movw 4(%rsi), %dx
443 movl %ecx, (%rdi)
444 movw %dx, 4(%rdi)
445 movb $0, 6(%rdi)
446 jmp L(CalculateSrcLen)
447
448 .p2align 4
449L(Exit8):
450 movl (%rsi), %ecx
451 movl 3(%rsi), %edx
452 movl %ecx, (%rdi)
453 movl %edx, 3(%rdi)
454 movb $0, 7(%rdi)
455 jmp L(CalculateSrcLen)
456
457 .p2align 4
458L(Exit9):
459 movq (%rsi), %rdx
460 movq %rdx, (%rdi)
461 movb $0, 8(%rdi)
462 jmp L(CalculateSrcLen)
463
464 .p2align 4
465L(Exit10):
466 movq (%rsi), %rcx
467 movb 8(%rsi), %dh
468 movq %rcx, (%rdi)
469 movb %dh, 8(%rdi)
470 movb $0, 9(%rdi)
471 jmp L(CalculateSrcLen)
472
473 .p2align 4
474L(Exit11):
475 movq (%rsi), %rcx
476 movw 8(%rsi), %dx
477 movq %rcx, (%rdi)
478 movw %dx, 8(%rdi)
479 movb $0, 10(%rdi)
480 jmp L(CalculateSrcLen)
481
482 .p2align 4
483L(Exit12):
484 movq (%rsi), %rcx
485 movl 7(%rsi), %edx
486 movq %rcx, (%rdi)
487 movl %edx, 7(%rdi)
488 movb $0, 11(%rdi)
489 jmp L(CalculateSrcLen)
490
491 .p2align 4
492L(Exit13):
493 movq (%rsi), %rcx
494 movl 8(%rsi), %edx
495 movq %rcx, (%rdi)
496 movl %edx, 8(%rdi)
497 movb $0, 12(%rdi)
498 jmp L(CalculateSrcLen)
499
500 .p2align 4
501L(Exit14):
502 movq (%rsi), %rcx
503 movq 5(%rsi), %rdx
504 movq %rcx, (%rdi)
505 movq %rdx, 5(%rdi)
506 movb $0, 13(%rdi)
507 jmp L(CalculateSrcLen)
508
509 .p2align 4
510L(Exit15):
511 movq (%rsi), %rcx
512 movq 6(%rsi), %rdx
513 movq %rcx, (%rdi)
514 movq %rdx, 6(%rdi)
515 movb $0, 14(%rdi)
516 jmp L(CalculateSrcLen)
517
518 .p2align 4
519L(Exit16):
520 movq (%rsi), %rcx
521 movq 7(%rsi), %rdx
522 movq %rcx, (%rdi)
523 movq %rdx, 7(%rdi)
524 movb $0, 15(%rdi)
525 jmp L(CalculateSrcLen)
526
527 .p2align 4
528L(Exit17):
529 movdqu (%rsi), %xmm0
530 movdqu %xmm0, (%rdi)
531 movb $0, 16(%rdi)
532 jmp L(CalculateSrcLen)
533
534 .p2align 4
535L(Exit18):
536 movdqu (%rsi), %xmm0
537 movb 16(%rsi), %dh
538 movdqu %xmm0, (%rdi)
539 movb %dh, 16(%rdi)
540 movb $0, 17(%rdi)
541 jmp L(CalculateSrcLen)
542
543 .p2align 4
544L(Exit19):
545 movdqu (%rsi), %xmm0
546 movw 16(%rsi), %cx
547 movdqu %xmm0, (%rdi)
548 movw %cx, 16(%rdi)
549 movb $0, 18(%rdi)
550 jmp L(CalculateSrcLen)
551
552 .p2align 4
553L(Exit20):
554 movdqu (%rsi), %xmm0
555 movl 15(%rsi), %ecx
556 movdqu %xmm0, (%rdi)
557 movl %ecx, 15(%rdi)
558 movb $0, 19(%rdi)
559 jmp L(CalculateSrcLen)
560
561 .p2align 4
562L(Exit21):
563 movdqu (%rsi), %xmm0
564 movl 16(%rsi), %ecx
565 movdqu %xmm0, (%rdi)
566 movl %ecx, 16(%rdi)
567 movb $0, 20(%rdi)
568 jmp L(CalculateSrcLen)
569
570 .p2align 4
571L(Exit22):
572 movdqu (%rsi), %xmm0
573 movl 16(%rsi), %ecx
574 movb 20(%rsi), %dh
575 movdqu %xmm0, (%rdi)
576 movl %ecx, 16(%rdi)
577 movb %dh, 20(%rdi)
578 movb $0, 21(%rdi)
579 jmp L(CalculateSrcLen)
580
581 .p2align 4
582L(Exit23):
583 movdqu (%rsi), %xmm0
584 movq 14(%rsi), %rcx
585 movdqu %xmm0, (%rdi)
586 movq %rcx, 14(%rdi)
587 movb $0, 22(%rdi)
588 jmp L(CalculateSrcLen)
589
590 .p2align 4
591L(Exit24):
592 movdqu (%rsi), %xmm0
593 movq 15(%rsi), %rcx
594 movdqu %xmm0, (%rdi)
595 movq %rcx, 15(%rdi)
596 movb $0, 23(%rdi)
597 jmp L(CalculateSrcLen)
598
599 .p2align 4
600L(Exit25):
601 movdqu (%rsi), %xmm0
602 movq 16(%rsi), %rcx
603 movdqu %xmm0, (%rdi)
604 movq %rcx, 16(%rdi)
605 movb $0, 24(%rdi)
606 jmp L(CalculateSrcLen)
607
608 .p2align 4
609L(Exit26):
610 movdqu (%rsi), %xmm0
611 movq 16(%rsi), %rcx
612 movb 24(%rsi), %dh
613 movdqu %xmm0, (%rdi)
614 movq %rcx, 16(%rdi)
615 mov %dh, 24(%rdi)
616 movb $0, 25(%rdi)
617 jmp L(CalculateSrcLen)
618
619 .p2align 4
620L(Exit27):
621 movdqu (%rsi), %xmm0
622 movq 16(%rsi), %rdx
623 movw 24(%rsi), %cx
624 movdqu %xmm0, (%rdi)
625 movq %rdx, 16(%rdi)
626 movw %cx, 24(%rdi)
627 movb $0, 26(%rdi)
628 jmp L(CalculateSrcLen)
629
630 .p2align 4
631L(Exit28):
632 movdqu (%rsi), %xmm0
633 movq 16(%rsi), %rdx
634 movl 23(%rsi), %ecx
635 movdqu %xmm0, (%rdi)
636 movq %rdx, 16(%rdi)
637 movl %ecx, 23(%rdi)
638 movb $0, 27(%rdi)
639 jmp L(CalculateSrcLen)
640
641 .p2align 4
642L(Exit29):
643 movdqu (%rsi), %xmm0
644 movq 16(%rsi), %rdx
645 movl 24(%rsi), %ecx
646 movdqu %xmm0, (%rdi)
647 movq %rdx, 16(%rdi)
648 movl %ecx, 24(%rdi)
649 movb $0, 28(%rdi)
650 jmp L(CalculateSrcLen)
651
652 .p2align 4
653L(Exit30):
654 movdqu (%rsi), %xmm0
655 movdqu 13(%rsi), %xmm2
656 movdqu %xmm0, (%rdi)
657 movdqu %xmm2, 13(%rdi)
658 movb $0, 29(%rdi)
659 jmp L(CalculateSrcLen)
660
661 .p2align 4
662L(Exit31):
663 movdqu (%rsi), %xmm0
664 movdqu 14(%rsi), %xmm2
665 movdqu %xmm0, (%rdi)
666 movdqu %xmm2, 14(%rdi)
667 movb $0, 30(%rdi)
668 jmp L(CalculateSrcLen)
669
670 .p2align 4
671L(Exit32):
672 movdqu (%rsi), %xmm0
673 movdqu 15(%rsi), %xmm2
674 movdqu %xmm0, (%rdi)
675 movdqu %xmm2, 15(%rdi)
676 movb $0, 31(%rdi)
677 jmp L(CalculateSrcLen)
678
679 .p2align 4
680L(StringTail0):
681 mov (%rsi), %dl
682 mov %dl, (%rdi)
683 RETURN
684
685 .p2align 4
686L(StringTail1):
687 mov (%rsi), %dx
688 mov %dx, (%rdi)
689 RETURN
690
691 .p2align 4
692L(StringTail2):
693 mov (%rsi), %cx
694 mov 2(%rsi), %dl
695 mov %cx, (%rdi)
696 mov %dl, 2(%rdi)
697 RETURN
698
699 .p2align 4
700L(StringTail3):
701 mov (%rsi), %edx
702 mov %edx, (%rdi)
703 RETURN
704
705 .p2align 4
706L(StringTail4):
707 mov (%rsi), %ecx
708 mov 4(%rsi), %dl
709 mov %ecx, (%rdi)
710 mov %dl, 4(%rdi)
711 RETURN
712
713 .p2align 4
714L(StringTail5):
715 mov (%rsi), %ecx
716 mov 4(%rsi), %dx
717 mov %ecx, (%rdi)
718 mov %dx, 4(%rdi)
719 RETURN
720
721 .p2align 4
722L(StringTail6):
723 mov (%rsi), %ecx
724 mov 3(%rsi), %edx
725 mov %ecx, (%rdi)
726 mov %edx, 3(%rdi)
727 RETURN
728
729 .p2align 4
730L(StringTail7):
731 mov (%rsi), %rdx
732 mov %rdx, (%rdi)
733 RETURN
734
735 .p2align 4
736L(StringTail8):
737 mov (%rsi), %rcx
738 mov 8(%rsi), %dl
739 mov %rcx, (%rdi)
740 mov %dl, 8(%rdi)
741 RETURN
742
743 .p2align 4
744L(StringTail9):
745 mov (%rsi), %rcx
746 mov 8(%rsi), %dx
747 mov %rcx, (%rdi)
748 mov %dx, 8(%rdi)
749 RETURN
750
751 .p2align 4
752L(StringTail10):
753 mov (%rsi), %rcx
754 mov 7(%rsi), %edx
755 mov %rcx, (%rdi)
756 mov %edx, 7(%rdi)
757 RETURN
758
759 .p2align 4
760L(StringTail11):
761 mov (%rsi), %rcx
762 mov 8(%rsi), %edx
763 mov %rcx, (%rdi)
764 mov %edx, 8(%rdi)
765 RETURN
766
767 .p2align 4
768L(StringTail12):
769 mov (%rsi), %rcx
770 mov 5(%rsi), %rdx
771 mov %rcx, (%rdi)
772 mov %rdx, 5(%rdi)
773 RETURN
774
775 .p2align 4
776L(StringTail13):
777 mov (%rsi), %rcx
778 mov 6(%rsi), %rdx
779 mov %rcx, (%rdi)
780 mov %rdx, 6(%rdi)
781 RETURN
782
783 .p2align 4
784L(StringTail14):
785 mov (%rsi), %rcx
786 mov 7(%rsi), %rdx
787 mov %rcx, (%rdi)
788 mov %rdx, 7(%rdi)
789 RETURN
790
791 .p2align 4
792L(StringTail15):
793 movdqu (%rsi), %xmm0
794 movdqu %xmm0, (%rdi)
795 RETURN
796
797 .p2align 4
798L(StringTail16):
799 movdqu (%rsi), %xmm0
800 mov 16(%rsi), %cl
801 movdqu %xmm0, (%rdi)
802 mov %cl, 16(%rdi)
803 RETURN
804
805 .p2align 4
806L(StringTail17):
807 movdqu (%rsi), %xmm0
808 mov 16(%rsi), %cx
809 movdqu %xmm0, (%rdi)
810 mov %cx, 16(%rdi)
811 RETURN
812
813 .p2align 4
814L(StringTail18):
815 movdqu (%rsi), %xmm0
816 mov 15(%rsi), %ecx
817 movdqu %xmm0, (%rdi)
818 mov %ecx, 15(%rdi)
819 RETURN
820
821 .p2align 4
822L(StringTail19):
823 movdqu (%rsi), %xmm0
824 mov 16(%rsi), %ecx
825 movdqu %xmm0, (%rdi)
826 mov %ecx, 16(%rdi)
827 RETURN
828
829 .p2align 4
830L(StringTail20):
831 movdqu (%rsi), %xmm0
832 mov 16(%rsi), %ecx
833 mov 20(%rsi), %dl
834 movdqu %xmm0, (%rdi)
835 mov %ecx, 16(%rdi)
836 mov %dl, 20(%rdi)
837 RETURN
838
839 .p2align 4
840L(StringTail21):
841 movdqu (%rsi), %xmm0
842 mov 14(%rsi), %rcx
843 movdqu %xmm0, (%rdi)
844 mov %rcx, 14(%rdi)
845 RETURN
846
847 .p2align 4
848L(StringTail22):
849 movdqu (%rsi), %xmm0
850 mov 15(%rsi), %rcx
851 movdqu %xmm0, (%rdi)
852 mov %rcx, 15(%rdi)
853 RETURN
854
855 .p2align 4
856L(StringTail23):
857 movdqu (%rsi), %xmm0
858 mov 16(%rsi), %rcx
859 movdqu %xmm0, (%rdi)
860 mov %rcx, 16(%rdi)
861 RETURN
862
863 .p2align 4
864L(StringTail24):
865 movdqu (%rsi), %xmm0
866 mov 16(%rsi), %rdx
867 mov 24(%rsi), %cl
868 movdqu %xmm0, (%rdi)
869 mov %rdx, 16(%rdi)
870 mov %cl, 24(%rdi)
871 RETURN
872
873 .p2align 4
874L(StringTail25):
875 movdqu (%rsi), %xmm0
876 mov 16(%rsi), %rdx
877 mov 24(%rsi), %cx
878 movdqu %xmm0, (%rdi)
879 mov %rdx, 16(%rdi)
880 mov %cx, 24(%rdi)
881 RETURN
882
883 .p2align 4
884L(StringTail26):
885 movdqu (%rsi), %xmm0
886 mov 16(%rsi), %rdx
887 mov 23(%rsi), %ecx
888 movdqu %xmm0, (%rdi)
889 mov %rdx, 16(%rdi)
890 mov %ecx, 23(%rdi)
891 RETURN
892
893 .p2align 4
894L(StringTail27):
895 movdqu (%rsi), %xmm0
896 mov 16(%rsi), %rdx
897 mov 24(%rsi), %ecx
898 movdqu %xmm0, (%rdi)
899 mov %rdx, 16(%rdi)
900 mov %ecx, 24(%rdi)
901 RETURN
902
903 .p2align 4
904L(StringTail28):
905 movdqu (%rsi), %xmm0
906 movdqu 13(%rsi), %xmm2
907 movdqu %xmm0, (%rdi)
908 movdqu %xmm2, 13(%rdi)
909 RETURN
910
911 .p2align 4
912L(StringTail29):
913 movdqu (%rsi), %xmm0
914 movdqu 14(%rsi), %xmm2
915 movdqu %xmm0, (%rdi)
916 movdqu %xmm2, 14(%rdi)
917 RETURN
918
919 .p2align 4
920L(StringTail30):
921 movdqu (%rsi), %xmm0
922 movdqu 15(%rsi), %xmm2
923 movdqu %xmm0, (%rdi)
924 movdqu %xmm2, 15(%rdi)
925 RETURN
926
927 .p2align 4
928L(StringTail31):
929 movdqu (%rsi), %xmm0
930 movdqu 16(%rsi), %xmm2
931 movdqu %xmm0, (%rdi)
932 movdqu %xmm2, 16(%rdi)
933 RETURN
934
935 .p2align 4
936L(StringTail32):
937 movdqu (%rsi), %xmm0
938 movdqu 16(%rsi), %xmm2
939 mov 32(%rsi), %cl
940 movdqu %xmm0, (%rdi)
941 movdqu %xmm2, 16(%rdi)
942 mov %cl, 32(%rdi)
943 RETURN
944
945 .p2align 4
946L(StringTail33):
947 movdqu (%rsi), %xmm0
948 movdqu 16(%rsi), %xmm2
949 mov 32(%rsi), %cl
950 movdqu %xmm0, (%rdi)
951 movdqu %xmm2, 16(%rdi)
952 mov %cl, 32(%rdi)
953 RETURN
954
955 .p2align 4
956L(CalculateSrcLenCase1):
957 xor %r8, %r8
958 xor %rax, %rax
959L(CalculateSrcLen):
960 pxor %xmm0, %xmm0
961 xor %rcx, %rcx
962 add %r8, %rsi
963 movdqu (%rsi), %xmm1
964 pcmpeqb %xmm1, %xmm0
965 pmovmskb %xmm0, %rdx
966 test %rdx, %rdx
967 jnz L(SrcLenLoopEnd)
968
969 add %rax, %r9
970 mov $16, %rax
971 mov %rsi, %rcx
972 and $15, %rcx
973 and $-16, %rsi
974L(SrcLenLoop):
975 movdqa (%rsi, %rax), %xmm1
976 pcmpeqb %xmm1, %xmm0
977 pmovmskb %xmm0, %rdx
978 test %rdx, %rdx
979 jnz L(SrcLenLoopEnd)
980 add $16, %rax
981 jmp L(SrcLenLoop)
982
983 .p2align 4
984L(SrcLenLoopEnd):
985 bsf %rdx, %rdx
986 add %rdx, %rax
987 sub %rcx, %rax
988 RETURN
989
990END (STRLCPY)
991
992 .p2align 4
993 .section .rodata
994L(ExitTable):
995 .int JMPTBL(L(Exit0), L(ExitTable))
996 .int JMPTBL(L(Exit1), L(ExitTable))
997 .int JMPTBL(L(Exit2), L(ExitTable))
998 .int JMPTBL(L(Exit3), L(ExitTable))
999 .int JMPTBL(L(Exit4), L(ExitTable))
1000 .int JMPTBL(L(Exit5), L(ExitTable))
1001 .int JMPTBL(L(Exit6), L(ExitTable))
1002 .int JMPTBL(L(Exit7), L(ExitTable))
1003 .int JMPTBL(L(Exit8), L(ExitTable))
1004 .int JMPTBL(L(Exit9), L(ExitTable))
1005 .int JMPTBL(L(Exit10), L(ExitTable))
1006 .int JMPTBL(L(Exit11), L(ExitTable))
1007 .int JMPTBL(L(Exit12), L(ExitTable))
1008 .int JMPTBL(L(Exit13), L(ExitTable))
1009 .int JMPTBL(L(Exit14), L(ExitTable))
1010 .int JMPTBL(L(Exit15), L(ExitTable))
1011 .int JMPTBL(L(Exit16), L(ExitTable))
1012 .int JMPTBL(L(Exit17), L(ExitTable))
1013 .int JMPTBL(L(Exit18), L(ExitTable))
1014 .int JMPTBL(L(Exit19), L(ExitTable))
1015 .int JMPTBL(L(Exit20), L(ExitTable))
1016 .int JMPTBL(L(Exit21), L(ExitTable))
1017 .int JMPTBL(L(Exit22), L(ExitTable))
1018 .int JMPTBL(L(Exit23), L(ExitTable))
1019 .int JMPTBL(L(Exit24), L(ExitTable))
1020 .int JMPTBL(L(Exit25), L(ExitTable))
1021 .int JMPTBL(L(Exit26), L(ExitTable))
1022 .int JMPTBL(L(Exit27), L(ExitTable))
1023 .int JMPTBL(L(Exit28), L(ExitTable))
1024 .int JMPTBL(L(Exit29), L(ExitTable))
1025 .int JMPTBL(L(Exit30), L(ExitTable))
1026 .int JMPTBL(L(Exit31), L(ExitTable))
1027 .int JMPTBL(L(Exit32), L(ExitTable))
1028L(ExitStringTailTable):
1029 .int JMPTBL(L(StringTail0), L(ExitStringTailTable))
1030 .int JMPTBL(L(StringTail1), L(ExitStringTailTable))
1031 .int JMPTBL(L(StringTail2), L(ExitStringTailTable))
1032 .int JMPTBL(L(StringTail3), L(ExitStringTailTable))
1033 .int JMPTBL(L(StringTail4), L(ExitStringTailTable))
1034 .int JMPTBL(L(StringTail5), L(ExitStringTailTable))
1035 .int JMPTBL(L(StringTail6), L(ExitStringTailTable))
1036 .int JMPTBL(L(StringTail7), L(ExitStringTailTable))
1037 .int JMPTBL(L(StringTail8), L(ExitStringTailTable))
1038 .int JMPTBL(L(StringTail9), L(ExitStringTailTable))
1039 .int JMPTBL(L(StringTail10), L(ExitStringTailTable))
1040 .int JMPTBL(L(StringTail11), L(ExitStringTailTable))
1041 .int JMPTBL(L(StringTail12), L(ExitStringTailTable))
1042 .int JMPTBL(L(StringTail13), L(ExitStringTailTable))
1043 .int JMPTBL(L(StringTail14), L(ExitStringTailTable))
1044 .int JMPTBL(L(StringTail15), L(ExitStringTailTable))
1045 .int JMPTBL(L(StringTail16), L(ExitStringTailTable))
1046 .int JMPTBL(L(StringTail17), L(ExitStringTailTable))
1047 .int JMPTBL(L(StringTail18), L(ExitStringTailTable))
1048 .int JMPTBL(L(StringTail19), L(ExitStringTailTable))
1049 .int JMPTBL(L(StringTail20), L(ExitStringTailTable))
1050 .int JMPTBL(L(StringTail21), L(ExitStringTailTable))
1051 .int JMPTBL(L(StringTail22), L(ExitStringTailTable))
1052 .int JMPTBL(L(StringTail23), L(ExitStringTailTable))
1053 .int JMPTBL(L(StringTail24), L(ExitStringTailTable))
1054 .int JMPTBL(L(StringTail25), L(ExitStringTailTable))
1055 .int JMPTBL(L(StringTail26), L(ExitStringTailTable))
1056 .int JMPTBL(L(StringTail27), L(ExitStringTailTable))
1057 .int JMPTBL(L(StringTail28), L(ExitStringTailTable))
1058 .int JMPTBL(L(StringTail29), L(ExitStringTailTable))
1059 .int JMPTBL(L(StringTail30), L(ExitStringTailTable))
1060 .int JMPTBL(L(StringTail31), L(ExitStringTailTable))
1061 .int JMPTBL(L(StringTail32), L(ExitStringTailTable))
1062 .int JMPTBL(L(StringTail33), L(ExitStringTailTable))