blob: 30254caae4f62a2647dcd8e59e1f4be4803a546a [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef USE_AS_STRCAT
32
33# ifndef L
34# define L(label) .L##label
35# endif
36
37# ifndef cfi_startproc
38# define cfi_startproc .cfi_startproc
39# endif
40
41# ifndef cfi_endproc
42# define cfi_endproc .cfi_endproc
43# endif
44
45# ifndef cfi_rel_offset
46# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
47# endif
48
49# ifndef cfi_restore
50# define cfi_restore(reg) .cfi_restore reg
51# endif
52
53# ifndef cfi_adjust_cfa_offset
54# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
55# endif
56
57# ifndef ENTRY
58# define ENTRY(name) \
59 .type name, @function; \
60 .globl name; \
61 .p2align 4; \
62name: \
63 cfi_startproc
64# endif
65
66# ifndef END
67# define END(name) \
68 cfi_endproc; \
69 .size name, .-name
70# endif
71
72# define CFI_PUSH(REG) \
73 cfi_adjust_cfa_offset (4); \
74 cfi_rel_offset (REG, 0)
75
76# define CFI_POP(REG) \
77 cfi_adjust_cfa_offset (-4); \
78 cfi_restore (REG)
79
80# define PUSH(REG) pushl REG; CFI_PUSH (REG)
81# define POP(REG) popl REG; CFI_POP (REG)
82
83# ifndef STRCPY
84# define STRCPY strcpy
85# endif
86
87# ifdef USE_AS_STRNCPY
88# define PARMS 8
89# define ENTRANCE PUSH (%ebx)
90# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
91# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
92# else
93# define PARMS 4
94# define ENTRANCE
95# define RETURN ret
96# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
97# endif
98
99# ifdef USE_AS_STPCPY
100# define SAVE_RESULT(n) lea n(%edx), %eax
101# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
102# else
103# define SAVE_RESULT(n) movl %edi, %eax
104# define SAVE_RESULT_TAIL(n) movl %edx, %eax
105# endif
106
107# define STR1 PARMS
108# define STR2 STR1+4
109# define LEN STR2+4
110
111/* In this code following instructions are used for copying:
112 movb - 1 byte
113 movw - 2 byte
114 movl - 4 byte
115 movlpd - 8 byte
116 movaps - 16 byte - requires 16 byte alignment
117 of sourse and destination adresses.
118*/
119
120.text
121ENTRY (STRCPY)
122 ENTRANCE
123 mov STR1(%esp), %edx
124 mov STR2(%esp), %ecx
125# ifdef USE_AS_STRNCPY
126 movl LEN(%esp), %ebx
127 cmp $8, %ebx
128 jbe L(StrncpyExit8Bytes)
129# endif
130 cmpb $0, (%ecx)
131 jz L(ExitTail1)
132 cmpb $0, 1(%ecx)
133 jz L(ExitTail2)
134 cmpb $0, 2(%ecx)
135 jz L(ExitTail3)
136 cmpb $0, 3(%ecx)
137 jz L(ExitTail4)
138 cmpb $0, 4(%ecx)
139 jz L(ExitTail5)
140 cmpb $0, 5(%ecx)
141 jz L(ExitTail6)
142 cmpb $0, 6(%ecx)
143 jz L(ExitTail7)
144 cmpb $0, 7(%ecx)
145 jz L(ExitTail8)
146# ifdef USE_AS_STRNCPY
147 cmp $16, %ebx
148 jb L(StrncpyExit15Bytes)
149# endif
150 cmpb $0, 8(%ecx)
151 jz L(ExitTail9)
152 cmpb $0, 9(%ecx)
153 jz L(ExitTail10)
154 cmpb $0, 10(%ecx)
155 jz L(ExitTail11)
156 cmpb $0, 11(%ecx)
157 jz L(ExitTail12)
158 cmpb $0, 12(%ecx)
159 jz L(ExitTail13)
160 cmpb $0, 13(%ecx)
161 jz L(ExitTail14)
162 cmpb $0, 14(%ecx)
163 jz L(ExitTail15)
164# if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY
165 cmp $16, %ebx
166 je L(ExitTail16)
167# endif
168 cmpb $0, 15(%ecx)
169 jz L(ExitTail16)
170
171# if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY
172 cmp $16, %ebx
173 je L(StrlcpyExitTail16)
174# endif
175
176 PUSH (%edi)
177# ifndef USE_AS_STRLCPY
178 mov %edx, %edi
179# else
180 mov %ecx, %edi
181# endif
182#endif
183 PUSH (%esi)
184#ifdef USE_AS_STRNCPY
185 mov %ecx, %esi
186 sub $16, %ebx
187 and $0xf, %esi
188
189/* add 16 bytes ecx_offset to ebx */
190
191 add %esi, %ebx
192#endif
193 lea 16(%ecx), %esi
194 and $-16, %esi
195 pxor %xmm0, %xmm0
196 movlpd (%ecx), %xmm1
197 movlpd %xmm1, (%edx)
198
199 pcmpeqb (%esi), %xmm0
200 movlpd 8(%ecx), %xmm1
201 movlpd %xmm1, 8(%edx)
202
203 pmovmskb %xmm0, %eax
204 sub %ecx, %esi
205
206#ifdef USE_AS_STRNCPY
207 sub $16, %ebx
208 jbe L(CopyFrom1To16BytesCase2OrCase3)
209#endif
210 test %eax, %eax
211 jnz L(CopyFrom1To16Bytes)
212
213 mov %edx, %eax
214 lea 16(%edx), %edx
215 and $-16, %edx
216 sub %edx, %eax
217
218#ifdef USE_AS_STRNCPY
219 add %eax, %esi
220 lea -1(%esi), %esi
221 and $1<<31, %esi
222 test %esi, %esi
223 jnz L(ContinueCopy)
224 lea 16(%ebx), %ebx
225
226L(ContinueCopy):
227#endif
228 sub %eax, %ecx
229 mov %ecx, %eax
230 and $0xf, %eax
231 mov $0, %esi
232
233/* case: ecx_offset == edx_offset */
234
235 jz L(Align16Both)
236
237 cmp $8, %eax
238 jae L(ShlHigh8)
239 cmp $1, %eax
240 je L(Shl1)
241 cmp $2, %eax
242 je L(Shl2)
243 cmp $3, %eax
244 je L(Shl3)
245 cmp $4, %eax
246 je L(Shl4)
247 cmp $5, %eax
248 je L(Shl5)
249 cmp $6, %eax
250 je L(Shl6)
251 jmp L(Shl7)
252
253L(ShlHigh8):
254 je L(Shl8)
255 cmp $9, %eax
256 je L(Shl9)
257 cmp $10, %eax
258 je L(Shl10)
259 cmp $11, %eax
260 je L(Shl11)
261 cmp $12, %eax
262 je L(Shl12)
263 cmp $13, %eax
264 je L(Shl13)
265 cmp $14, %eax
266 je L(Shl14)
267 jmp L(Shl15)
268
269L(Align16Both):
270 movaps (%ecx), %xmm1
271 movaps 16(%ecx), %xmm2
272 movaps %xmm1, (%edx)
273 pcmpeqb %xmm2, %xmm0
274 pmovmskb %xmm0, %eax
275 lea 16(%esi), %esi
276#ifdef USE_AS_STRNCPY
277 sub $16, %ebx
278 jbe L(CopyFrom1To16BytesCase2OrCase3)
279#endif
280 test %eax, %eax
281 jnz L(CopyFrom1To16Bytes)
282
283 movaps 16(%ecx, %esi), %xmm3
284 movaps %xmm2, (%edx, %esi)
285 pcmpeqb %xmm3, %xmm0
286 pmovmskb %xmm0, %eax
287 lea 16(%esi), %esi
288#ifdef USE_AS_STRNCPY
289 sub $16, %ebx
290 jbe L(CopyFrom1To16BytesCase2OrCase3)
291#endif
292 test %eax, %eax
293 jnz L(CopyFrom1To16Bytes)
294
295 movaps 16(%ecx, %esi), %xmm4
296 movaps %xmm3, (%edx, %esi)
297 pcmpeqb %xmm4, %xmm0
298 pmovmskb %xmm0, %eax
299 lea 16(%esi), %esi
300#ifdef USE_AS_STRNCPY
301 sub $16, %ebx
302 jbe L(CopyFrom1To16BytesCase2OrCase3)
303#endif
304 test %eax, %eax
305 jnz L(CopyFrom1To16Bytes)
306
307 movaps 16(%ecx, %esi), %xmm1
308 movaps %xmm4, (%edx, %esi)
309 pcmpeqb %xmm1, %xmm0
310 pmovmskb %xmm0, %eax
311 lea 16(%esi), %esi
312#ifdef USE_AS_STRNCPY
313 sub $16, %ebx
314 jbe L(CopyFrom1To16BytesCase2OrCase3)
315#endif
316 test %eax, %eax
317 jnz L(CopyFrom1To16Bytes)
318
319 movaps 16(%ecx, %esi), %xmm2
320 movaps %xmm1, (%edx, %esi)
321 pcmpeqb %xmm2, %xmm0
322 pmovmskb %xmm0, %eax
323 lea 16(%esi), %esi
324#ifdef USE_AS_STRNCPY
325 sub $16, %ebx
326 jbe L(CopyFrom1To16BytesCase2OrCase3)
327#endif
328 test %eax, %eax
329 jnz L(CopyFrom1To16Bytes)
330
331 movaps 16(%ecx, %esi), %xmm3
332 movaps %xmm2, (%edx, %esi)
333 pcmpeqb %xmm3, %xmm0
334 pmovmskb %xmm0, %eax
335 lea 16(%esi), %esi
336#ifdef USE_AS_STRNCPY
337 sub $16, %ebx
338 jbe L(CopyFrom1To16BytesCase2OrCase3)
339#endif
340 test %eax, %eax
341 jnz L(CopyFrom1To16Bytes)
342
343 movaps %xmm3, (%edx, %esi)
344 mov %ecx, %eax
345 lea 16(%ecx, %esi), %ecx
346 and $-0x40, %ecx
347 sub %ecx, %eax
348 sub %eax, %edx
349#ifdef USE_AS_STRNCPY
350 lea 112(%ebx, %eax), %ebx
351#endif
352 mov $-0x40, %esi
353
354L(Aligned64Loop):
355 movaps (%ecx), %xmm2
356 movaps 32(%ecx), %xmm3
357 movaps %xmm2, %xmm4
358 movaps 16(%ecx), %xmm5
359 movaps %xmm3, %xmm6
360 movaps 48(%ecx), %xmm7
361 pminub %xmm5, %xmm2
362 pminub %xmm7, %xmm3
363 pminub %xmm2, %xmm3
364 lea 64(%edx), %edx
365 pcmpeqb %xmm0, %xmm3
366 lea 64(%ecx), %ecx
367 pmovmskb %xmm3, %eax
368#ifdef USE_AS_STRNCPY
369 sub $64, %ebx
370 jbe L(StrncpyLeaveCase2OrCase3)
371#endif
372 test %eax, %eax
373 jnz L(Aligned64Leave)
374 movaps %xmm4, -64(%edx)
375 movaps %xmm5, -48(%edx)
376 movaps %xmm6, -32(%edx)
377 movaps %xmm7, -16(%edx)
378 jmp L(Aligned64Loop)
379
380L(Aligned64Leave):
381#ifdef USE_AS_STRNCPY
382 lea 48(%ebx), %ebx
383#endif
384 pcmpeqb %xmm4, %xmm0
385 pmovmskb %xmm0, %eax
386 test %eax, %eax
387 jnz L(CopyFrom1To16Bytes)
388
389 pcmpeqb %xmm5, %xmm0
390#ifdef USE_AS_STRNCPY
391 lea -16(%ebx), %ebx
392#endif
393 pmovmskb %xmm0, %eax
394 movaps %xmm4, -64(%edx)
395 lea 16(%esi), %esi
396 test %eax, %eax
397 jnz L(CopyFrom1To16Bytes)
398
399 pcmpeqb %xmm6, %xmm0
400#ifdef USE_AS_STRNCPY
401 lea -16(%ebx), %ebx
402#endif
403 pmovmskb %xmm0, %eax
404 movaps %xmm5, -48(%edx)
405 lea 16(%esi), %esi
406 test %eax, %eax
407 jnz L(CopyFrom1To16Bytes)
408
409 movaps %xmm6, -32(%edx)
410 pcmpeqb %xmm7, %xmm0
411#ifdef USE_AS_STRNCPY
412 lea -16(%ebx), %ebx
413#endif
414 pmovmskb %xmm0, %eax
415 lea 16(%esi), %esi
416 jmp L(CopyFrom1To16Bytes)
417
418 .p2align 4
419L(Shl1):
420 movaps -1(%ecx), %xmm1
421 movaps 15(%ecx), %xmm2
422L(Shl1Start):
423 pcmpeqb %xmm2, %xmm0
424 pmovmskb %xmm0, %eax
425 movaps %xmm2, %xmm3
426#ifdef USE_AS_STRNCPY
427 sub $16, %ebx
428 jbe L(StrncpyExit1Case2OrCase3)
429#endif
430 test %eax, %eax
431 jnz L(Shl1LoopExit)
432
433 palignr $1, %xmm1, %xmm2
434 movaps %xmm3, %xmm1
435 movaps %xmm2, (%edx)
436 movaps 31(%ecx), %xmm2
437
438 pcmpeqb %xmm2, %xmm0
439 lea 16(%edx), %edx
440 pmovmskb %xmm0, %eax
441 lea 16(%ecx), %ecx
442 movaps %xmm2, %xmm3
443#ifdef USE_AS_STRNCPY
444 sub $16, %ebx
445 jbe L(StrncpyExit1Case2OrCase3)
446#endif
447 test %eax, %eax
448 jnz L(Shl1LoopExit)
449
450 palignr $1, %xmm1, %xmm2
451 movaps %xmm2, (%edx)
452 movaps 31(%ecx), %xmm2
453 movaps %xmm3, %xmm1
454
455 pcmpeqb %xmm2, %xmm0
456 lea 16(%edx), %edx
457 pmovmskb %xmm0, %eax
458 lea 16(%ecx), %ecx
459 movaps %xmm2, %xmm3
460#ifdef USE_AS_STRNCPY
461 sub $16, %ebx
462 jbe L(StrncpyExit1Case2OrCase3)
463#endif
464 test %eax, %eax
465 jnz L(Shl1LoopExit)
466
467 palignr $1, %xmm1, %xmm2
468 movaps %xmm2, (%edx)
469 movaps 31(%ecx), %xmm2
470
471 pcmpeqb %xmm2, %xmm0
472 lea 16(%edx), %edx
473 pmovmskb %xmm0, %eax
474 lea 16(%ecx), %ecx
475#ifdef USE_AS_STRNCPY
476 sub $16, %ebx
477 jbe L(StrncpyExit1Case2OrCase3)
478#endif
479 test %eax, %eax
480 jnz L(Shl1LoopExit)
481
482 palignr $1, %xmm3, %xmm2
483 movaps %xmm2, (%edx)
484 lea 31(%ecx), %ecx
485 lea 16(%edx), %edx
486
487 mov %ecx, %eax
488 and $-0x40, %ecx
489 sub %ecx, %eax
490 lea -15(%ecx), %ecx
491 sub %eax, %edx
492#ifdef USE_AS_STRNCPY
493 add %eax, %ebx
494#endif
495 movaps -1(%ecx), %xmm1
496
497L(Shl1LoopStart):
498 movaps 15(%ecx), %xmm2
499 movaps 31(%ecx), %xmm3
500 movaps %xmm3, %xmm6
501 movaps 47(%ecx), %xmm4
502 movaps %xmm4, %xmm7
503 movaps 63(%ecx), %xmm5
504 pminub %xmm2, %xmm6
505 pminub %xmm5, %xmm7
506 pminub %xmm6, %xmm7
507 pcmpeqb %xmm0, %xmm7
508 pmovmskb %xmm7, %eax
509 movaps %xmm5, %xmm7
510 palignr $1, %xmm4, %xmm5
511 palignr $1, %xmm3, %xmm4
512 test %eax, %eax
513 jnz L(Shl1Start)
514#ifdef USE_AS_STRNCPY
515 sub $64, %ebx
516 jbe L(StrncpyLeave1)
517#endif
518 palignr $1, %xmm2, %xmm3
519 lea 64(%ecx), %ecx
520 palignr $1, %xmm1, %xmm2
521 movaps %xmm7, %xmm1
522 movaps %xmm5, 48(%edx)
523 movaps %xmm4, 32(%edx)
524 movaps %xmm3, 16(%edx)
525 movaps %xmm2, (%edx)
526 lea 64(%edx), %edx
527 jmp L(Shl1LoopStart)
528
529L(Shl1LoopExit):
530 movlpd (%ecx), %xmm0
531 movlpd %xmm0, (%edx)
532 movlpd 7(%ecx), %xmm0
533 movlpd %xmm0, 7(%edx)
534 mov $15, %esi
535 jmp L(CopyFrom1To16Bytes)
536
537 .p2align 4
538L(Shl2):
539 movaps -2(%ecx), %xmm1
540 movaps 14(%ecx), %xmm2
541L(Shl2Start):
542 pcmpeqb %xmm2, %xmm0
543 pmovmskb %xmm0, %eax
544 movaps %xmm2, %xmm3
545#ifdef USE_AS_STRNCPY
546 sub $16, %ebx
547 jbe L(StrncpyExit2Case2OrCase3)
548#endif
549 test %eax, %eax
550 jnz L(Shl2LoopExit)
551
552 palignr $2, %xmm1, %xmm2
553 movaps %xmm3, %xmm1
554 movaps %xmm2, (%edx)
555 movaps 30(%ecx), %xmm2
556
557 pcmpeqb %xmm2, %xmm0
558 lea 16(%edx), %edx
559 pmovmskb %xmm0, %eax
560 lea 16(%ecx), %ecx
561 movaps %xmm2, %xmm3
562#ifdef USE_AS_STRNCPY
563 sub $16, %ebx
564 jbe L(StrncpyExit2Case2OrCase3)
565#endif
566 test %eax, %eax
567 jnz L(Shl2LoopExit)
568
569 palignr $2, %xmm1, %xmm2
570 movaps %xmm2, (%edx)
571 movaps 30(%ecx), %xmm2
572 movaps %xmm3, %xmm1
573
574 pcmpeqb %xmm2, %xmm0
575 lea 16(%edx), %edx
576 pmovmskb %xmm0, %eax
577 lea 16(%ecx), %ecx
578 movaps %xmm2, %xmm3
579#ifdef USE_AS_STRNCPY
580 sub $16, %ebx
581 jbe L(StrncpyExit2Case2OrCase3)
582#endif
583 test %eax, %eax
584 jnz L(Shl2LoopExit)
585
586 palignr $2, %xmm1, %xmm2
587 movaps %xmm2, (%edx)
588 movaps 30(%ecx), %xmm2
589
590 pcmpeqb %xmm2, %xmm0
591 lea 16(%edx), %edx
592 pmovmskb %xmm0, %eax
593 lea 16(%ecx), %ecx
594#ifdef USE_AS_STRNCPY
595 sub $16, %ebx
596 jbe L(StrncpyExit2Case2OrCase3)
597#endif
598 test %eax, %eax
599 jnz L(Shl2LoopExit)
600
601 palignr $2, %xmm3, %xmm2
602 movaps %xmm2, (%edx)
603 lea 30(%ecx), %ecx
604 lea 16(%edx), %edx
605
606 mov %ecx, %eax
607 and $-0x40, %ecx
608 sub %ecx, %eax
609 lea -14(%ecx), %ecx
610 sub %eax, %edx
611#ifdef USE_AS_STRNCPY
612 add %eax, %ebx
613#endif
614 movaps -2(%ecx), %xmm1
615
616L(Shl2LoopStart):
617 movaps 14(%ecx), %xmm2
618 movaps 30(%ecx), %xmm3
619 movaps %xmm3, %xmm6
620 movaps 46(%ecx), %xmm4
621 movaps %xmm4, %xmm7
622 movaps 62(%ecx), %xmm5
623 pminub %xmm2, %xmm6
624 pminub %xmm5, %xmm7
625 pminub %xmm6, %xmm7
626 pcmpeqb %xmm0, %xmm7
627 pmovmskb %xmm7, %eax
628 movaps %xmm5, %xmm7
629 palignr $2, %xmm4, %xmm5
630 palignr $2, %xmm3, %xmm4
631 test %eax, %eax
632 jnz L(Shl2Start)
633#ifdef USE_AS_STRNCPY
634 sub $64, %ebx
635 jbe L(StrncpyLeave2)
636#endif
637 palignr $2, %xmm2, %xmm3
638 lea 64(%ecx), %ecx
639 palignr $2, %xmm1, %xmm2
640 movaps %xmm7, %xmm1
641 movaps %xmm5, 48(%edx)
642 movaps %xmm4, 32(%edx)
643 movaps %xmm3, 16(%edx)
644 movaps %xmm2, (%edx)
645 lea 64(%edx), %edx
646 jmp L(Shl2LoopStart)
647
648L(Shl2LoopExit):
649 movlpd (%ecx), %xmm0
650 movlpd 6(%ecx), %xmm1
651 movlpd %xmm0, (%edx)
652 movlpd %xmm1, 6(%edx)
653 mov $14, %esi
654 jmp L(CopyFrom1To16Bytes)
655
656 .p2align 4
657L(Shl3):
658 movaps -3(%ecx), %xmm1
659 movaps 13(%ecx), %xmm2
660L(Shl3Start):
661 pcmpeqb %xmm2, %xmm0
662 pmovmskb %xmm0, %eax
663 movaps %xmm2, %xmm3
664#ifdef USE_AS_STRNCPY
665 sub $16, %ebx
666 jbe L(StrncpyExit3Case2OrCase3)
667#endif
668 test %eax, %eax
669 jnz L(Shl3LoopExit)
670
671 palignr $3, %xmm1, %xmm2
672 movaps %xmm3, %xmm1
673 movaps %xmm2, (%edx)
674 movaps 29(%ecx), %xmm2
675
676 pcmpeqb %xmm2, %xmm0
677 lea 16(%edx), %edx
678 pmovmskb %xmm0, %eax
679 lea 16(%ecx), %ecx
680 movaps %xmm2, %xmm3
681#ifdef USE_AS_STRNCPY
682 sub $16, %ebx
683 jbe L(StrncpyExit3Case2OrCase3)
684#endif
685 test %eax, %eax
686 jnz L(Shl3LoopExit)
687
688 palignr $3, %xmm1, %xmm2
689 movaps %xmm2, (%edx)
690 movaps 29(%ecx), %xmm2
691 movaps %xmm3, %xmm1
692
693 pcmpeqb %xmm2, %xmm0
694 lea 16(%edx), %edx
695 pmovmskb %xmm0, %eax
696 lea 16(%ecx), %ecx
697 movaps %xmm2, %xmm3
698#ifdef USE_AS_STRNCPY
699 sub $16, %ebx
700 jbe L(StrncpyExit3Case2OrCase3)
701#endif
702 test %eax, %eax
703 jnz L(Shl3LoopExit)
704
705 palignr $3, %xmm1, %xmm2
706 movaps %xmm2, (%edx)
707 movaps 29(%ecx), %xmm2
708
709 pcmpeqb %xmm2, %xmm0
710 lea 16(%edx), %edx
711 pmovmskb %xmm0, %eax
712 lea 16(%ecx), %ecx
713#ifdef USE_AS_STRNCPY
714 sub $16, %ebx
715 jbe L(StrncpyExit3Case2OrCase3)
716#endif
717 test %eax, %eax
718 jnz L(Shl3LoopExit)
719
720 palignr $3, %xmm3, %xmm2
721 movaps %xmm2, (%edx)
722 lea 29(%ecx), %ecx
723 lea 16(%edx), %edx
724
725 mov %ecx, %eax
726 and $-0x40, %ecx
727 sub %ecx, %eax
728 lea -13(%ecx), %ecx
729 sub %eax, %edx
730#ifdef USE_AS_STRNCPY
731 add %eax, %ebx
732#endif
733 movaps -3(%ecx), %xmm1
734
735L(Shl3LoopStart):
736 movaps 13(%ecx), %xmm2
737 movaps 29(%ecx), %xmm3
738 movaps %xmm3, %xmm6
739 movaps 45(%ecx), %xmm4
740 movaps %xmm4, %xmm7
741 movaps 61(%ecx), %xmm5
742 pminub %xmm2, %xmm6
743 pminub %xmm5, %xmm7
744 pminub %xmm6, %xmm7
745 pcmpeqb %xmm0, %xmm7
746 pmovmskb %xmm7, %eax
747 movaps %xmm5, %xmm7
748 palignr $3, %xmm4, %xmm5
749 palignr $3, %xmm3, %xmm4
750 test %eax, %eax
751 jnz L(Shl3Start)
752#ifdef USE_AS_STRNCPY
753 sub $64, %ebx
754 jbe L(StrncpyLeave3)
755#endif
756 palignr $3, %xmm2, %xmm3
757 lea 64(%ecx), %ecx
758 palignr $3, %xmm1, %xmm2
759 movaps %xmm7, %xmm1
760 movaps %xmm5, 48(%edx)
761 movaps %xmm4, 32(%edx)
762 movaps %xmm3, 16(%edx)
763 movaps %xmm2, (%edx)
764 lea 64(%edx), %edx
765 jmp L(Shl3LoopStart)
766
767L(Shl3LoopExit):
768 movlpd (%ecx), %xmm0
769 movlpd 5(%ecx), %xmm1
770 movlpd %xmm0, (%edx)
771 movlpd %xmm1, 5(%edx)
772 mov $13, %esi
773 jmp L(CopyFrom1To16Bytes)
774
775 .p2align 4
776L(Shl4):
777 movaps -4(%ecx), %xmm1
778 movaps 12(%ecx), %xmm2
779L(Shl4Start):
780 pcmpeqb %xmm2, %xmm0
781 pmovmskb %xmm0, %eax
782 movaps %xmm2, %xmm3
783#ifdef USE_AS_STRNCPY
784 sub $16, %ebx
785 jbe L(StrncpyExit4Case2OrCase3)
786#endif
787 test %eax, %eax
788 jnz L(Shl4LoopExit)
789
790 palignr $4, %xmm1, %xmm2
791 movaps %xmm3, %xmm1
792 movaps %xmm2, (%edx)
793 movaps 28(%ecx), %xmm2
794
795 pcmpeqb %xmm2, %xmm0
796 lea 16(%edx), %edx
797 pmovmskb %xmm0, %eax
798 lea 16(%ecx), %ecx
799 movaps %xmm2, %xmm3
800#ifdef USE_AS_STRNCPY
801 sub $16, %ebx
802 jbe L(StrncpyExit4Case2OrCase3)
803#endif
804 test %eax, %eax
805 jnz L(Shl4LoopExit)
806
807 palignr $4, %xmm1, %xmm2
808 movaps %xmm2, (%edx)
809 movaps 28(%ecx), %xmm2
810 movaps %xmm3, %xmm1
811
812 pcmpeqb %xmm2, %xmm0
813 lea 16(%edx), %edx
814 pmovmskb %xmm0, %eax
815 lea 16(%ecx), %ecx
816 movaps %xmm2, %xmm3
817#ifdef USE_AS_STRNCPY
818 sub $16, %ebx
819 jbe L(StrncpyExit4Case2OrCase3)
820#endif
821 test %eax, %eax
822 jnz L(Shl4LoopExit)
823
824 palignr $4, %xmm1, %xmm2
825 movaps %xmm2, (%edx)
826 movaps 28(%ecx), %xmm2
827
828 pcmpeqb %xmm2, %xmm0
829 lea 16(%edx), %edx
830 pmovmskb %xmm0, %eax
831 lea 16(%ecx), %ecx
832#ifdef USE_AS_STRNCPY
833 sub $16, %ebx
834 jbe L(StrncpyExit4Case2OrCase3)
835#endif
836 test %eax, %eax
837 jnz L(Shl4LoopExit)
838
839 palignr $4, %xmm3, %xmm2
840 movaps %xmm2, (%edx)
841 lea 28(%ecx), %ecx
842 lea 16(%edx), %edx
843
844 mov %ecx, %eax
845 and $-0x40, %ecx
846 sub %ecx, %eax
847 lea -12(%ecx), %ecx
848 sub %eax, %edx
849#ifdef USE_AS_STRNCPY
850 add %eax, %ebx
851#endif
852 movaps -4(%ecx), %xmm1
853
854L(Shl4LoopStart):
855 movaps 12(%ecx), %xmm2
856 movaps 28(%ecx), %xmm3
857 movaps %xmm3, %xmm6
858 movaps 44(%ecx), %xmm4
859 movaps %xmm4, %xmm7
860 movaps 60(%ecx), %xmm5
861 pminub %xmm2, %xmm6
862 pminub %xmm5, %xmm7
863 pminub %xmm6, %xmm7
864 pcmpeqb %xmm0, %xmm7
865 pmovmskb %xmm7, %eax
866 movaps %xmm5, %xmm7
867 palignr $4, %xmm4, %xmm5
868 palignr $4, %xmm3, %xmm4
869 test %eax, %eax
870 jnz L(Shl4Start)
871#ifdef USE_AS_STRNCPY
872 sub $64, %ebx
873 jbe L(StrncpyLeave4)
874#endif
875 palignr $4, %xmm2, %xmm3
876 lea 64(%ecx), %ecx
877 palignr $4, %xmm1, %xmm2
878 movaps %xmm7, %xmm1
879 movaps %xmm5, 48(%edx)
880 movaps %xmm4, 32(%edx)
881 movaps %xmm3, 16(%edx)
882 movaps %xmm2, (%edx)
883 lea 64(%edx), %edx
884 jmp L(Shl4LoopStart)
885
886L(Shl4LoopExit):
887 movlpd (%ecx), %xmm0
888 movl 8(%ecx), %esi
889 movlpd %xmm0, (%edx)
890 movl %esi, 8(%edx)
891 mov $12, %esi
892 jmp L(CopyFrom1To16Bytes)
893
894 .p2align 4
895L(Shl5):
896 movaps -5(%ecx), %xmm1
897 movaps 11(%ecx), %xmm2
898L(Shl5Start):
899 pcmpeqb %xmm2, %xmm0
900 pmovmskb %xmm0, %eax
901 movaps %xmm2, %xmm3
902#ifdef USE_AS_STRNCPY
903 sub $16, %ebx
904 jbe L(StrncpyExit5Case2OrCase3)
905#endif
906 test %eax, %eax
907 jnz L(Shl5LoopExit)
908
909 palignr $5, %xmm1, %xmm2
910 movaps %xmm3, %xmm1
911 movaps %xmm2, (%edx)
912 movaps 27(%ecx), %xmm2
913
914 pcmpeqb %xmm2, %xmm0
915 lea 16(%edx), %edx
916 pmovmskb %xmm0, %eax
917 lea 16(%ecx), %ecx
918 movaps %xmm2, %xmm3
919#ifdef USE_AS_STRNCPY
920 sub $16, %ebx
921 jbe L(StrncpyExit5Case2OrCase3)
922#endif
923 test %eax, %eax
924 jnz L(Shl5LoopExit)
925
926 palignr $5, %xmm1, %xmm2
927 movaps %xmm2, (%edx)
928 movaps 27(%ecx), %xmm2
929 movaps %xmm3, %xmm1
930
931 pcmpeqb %xmm2, %xmm0
932 lea 16(%edx), %edx
933 pmovmskb %xmm0, %eax
934 lea 16(%ecx), %ecx
935 movaps %xmm2, %xmm3
936#ifdef USE_AS_STRNCPY
937 sub $16, %ebx
938 jbe L(StrncpyExit5Case2OrCase3)
939#endif
940 test %eax, %eax
941 jnz L(Shl5LoopExit)
942
943 palignr $5, %xmm1, %xmm2
944 movaps %xmm2, (%edx)
945 movaps 27(%ecx), %xmm2
946
947 pcmpeqb %xmm2, %xmm0
948 lea 16(%edx), %edx
949 pmovmskb %xmm0, %eax
950 lea 16(%ecx), %ecx
951#ifdef USE_AS_STRNCPY
952 sub $16, %ebx
953 jbe L(StrncpyExit5Case2OrCase3)
954#endif
955 test %eax, %eax
956 jnz L(Shl5LoopExit)
957
958 palignr $5, %xmm3, %xmm2
959 movaps %xmm2, (%edx)
960 lea 27(%ecx), %ecx
961 lea 16(%edx), %edx
962
963 mov %ecx, %eax
964 and $-0x40, %ecx
965 sub %ecx, %eax
966 lea -11(%ecx), %ecx
967 sub %eax, %edx
968#ifdef USE_AS_STRNCPY
969 add %eax, %ebx
970#endif
971 movaps -5(%ecx), %xmm1
972
973L(Shl5LoopStart):
974 movaps 11(%ecx), %xmm2
975 movaps 27(%ecx), %xmm3
976 movaps %xmm3, %xmm6
977 movaps 43(%ecx), %xmm4
978 movaps %xmm4, %xmm7
979 movaps 59(%ecx), %xmm5
980 pminub %xmm2, %xmm6
981 pminub %xmm5, %xmm7
982 pminub %xmm6, %xmm7
983 pcmpeqb %xmm0, %xmm7
984 pmovmskb %xmm7, %eax
985 movaps %xmm5, %xmm7
986 palignr $5, %xmm4, %xmm5
987 palignr $5, %xmm3, %xmm4
988 test %eax, %eax
989 jnz L(Shl5Start)
990#ifdef USE_AS_STRNCPY
991 sub $64, %ebx
992 jbe L(StrncpyLeave5)
993#endif
994 palignr $5, %xmm2, %xmm3
995 lea 64(%ecx), %ecx
996 palignr $5, %xmm1, %xmm2
997 movaps %xmm7, %xmm1
998 movaps %xmm5, 48(%edx)
999 movaps %xmm4, 32(%edx)
1000 movaps %xmm3, 16(%edx)
1001 movaps %xmm2, (%edx)
1002 lea 64(%edx), %edx
1003 jmp L(Shl5LoopStart)
1004
1005L(Shl5LoopExit):
1006 movlpd (%ecx), %xmm0
1007 movl 7(%ecx), %esi
1008 movlpd %xmm0, (%edx)
1009 movl %esi, 7(%edx)
1010 mov $11, %esi
1011 jmp L(CopyFrom1To16Bytes)
1012
1013 .p2align 4
1014L(Shl6):
1015 movaps -6(%ecx), %xmm1
1016 movaps 10(%ecx), %xmm2
1017L(Shl6Start):
1018 pcmpeqb %xmm2, %xmm0
1019 pmovmskb %xmm0, %eax
1020 movaps %xmm2, %xmm3
1021#ifdef USE_AS_STRNCPY
1022 sub $16, %ebx
1023 jbe L(StrncpyExit6Case2OrCase3)
1024#endif
1025 test %eax, %eax
1026 jnz L(Shl6LoopExit)
1027
1028 palignr $6, %xmm1, %xmm2
1029 movaps %xmm3, %xmm1
1030 movaps %xmm2, (%edx)
1031 movaps 26(%ecx), %xmm2
1032
1033 pcmpeqb %xmm2, %xmm0
1034 lea 16(%edx), %edx
1035 pmovmskb %xmm0, %eax
1036 lea 16(%ecx), %ecx
1037 movaps %xmm2, %xmm3
1038#ifdef USE_AS_STRNCPY
1039 sub $16, %ebx
1040 jbe L(StrncpyExit6Case2OrCase3)
1041#endif
1042 test %eax, %eax
1043 jnz L(Shl6LoopExit)
1044
1045 palignr $6, %xmm1, %xmm2
1046 movaps %xmm2, (%edx)
1047 movaps 26(%ecx), %xmm2
1048 movaps %xmm3, %xmm1
1049
1050 pcmpeqb %xmm2, %xmm0
1051 lea 16(%edx), %edx
1052 pmovmskb %xmm0, %eax
1053 lea 16(%ecx), %ecx
1054 movaps %xmm2, %xmm3
1055#ifdef USE_AS_STRNCPY
1056 sub $16, %ebx
1057 jbe L(StrncpyExit6Case2OrCase3)
1058#endif
1059 test %eax, %eax
1060 jnz L(Shl6LoopExit)
1061
1062 palignr $6, %xmm1, %xmm2
1063 movaps %xmm2, (%edx)
1064 movaps 26(%ecx), %xmm2
1065
1066 pcmpeqb %xmm2, %xmm0
1067 lea 16(%edx), %edx
1068 pmovmskb %xmm0, %eax
1069 lea 16(%ecx), %ecx
1070#ifdef USE_AS_STRNCPY
1071 sub $16, %ebx
1072 jbe L(StrncpyExit6Case2OrCase3)
1073#endif
1074 test %eax, %eax
1075 jnz L(Shl6LoopExit)
1076
1077 palignr $6, %xmm3, %xmm2
1078 movaps %xmm2, (%edx)
1079 lea 26(%ecx), %ecx
1080 lea 16(%edx), %edx
1081
1082 mov %ecx, %eax
1083 and $-0x40, %ecx
1084 sub %ecx, %eax
1085 lea -10(%ecx), %ecx
1086 sub %eax, %edx
1087#ifdef USE_AS_STRNCPY
1088 add %eax, %ebx
1089#endif
1090 movaps -6(%ecx), %xmm1
1091
1092L(Shl6LoopStart):
1093 movaps 10(%ecx), %xmm2
1094 movaps 26(%ecx), %xmm3
1095 movaps %xmm3, %xmm6
1096 movaps 42(%ecx), %xmm4
1097 movaps %xmm4, %xmm7
1098 movaps 58(%ecx), %xmm5
1099 pminub %xmm2, %xmm6
1100 pminub %xmm5, %xmm7
1101 pminub %xmm6, %xmm7
1102 pcmpeqb %xmm0, %xmm7
1103 pmovmskb %xmm7, %eax
1104 movaps %xmm5, %xmm7
1105 palignr $6, %xmm4, %xmm5
1106 palignr $6, %xmm3, %xmm4
1107 test %eax, %eax
1108 jnz L(Shl6Start)
1109#ifdef USE_AS_STRNCPY
1110 sub $64, %ebx
1111 jbe L(StrncpyLeave6)
1112#endif
1113 palignr $6, %xmm2, %xmm3
1114 lea 64(%ecx), %ecx
1115 palignr $6, %xmm1, %xmm2
1116 movaps %xmm7, %xmm1
1117 movaps %xmm5, 48(%edx)
1118 movaps %xmm4, 32(%edx)
1119 movaps %xmm3, 16(%edx)
1120 movaps %xmm2, (%edx)
1121 lea 64(%edx), %edx
1122 jmp L(Shl6LoopStart)
1123
1124L(Shl6LoopExit):
1125 movlpd (%ecx), %xmm0
1126 movl 6(%ecx), %esi
1127 movlpd %xmm0, (%edx)
1128 movl %esi, 6(%edx)
1129 mov $10, %esi
1130 jmp L(CopyFrom1To16Bytes)
1131
1132 .p2align 4
1133L(Shl7):
1134 movaps -7(%ecx), %xmm1
1135 movaps 9(%ecx), %xmm2
1136L(Shl7Start):
1137 pcmpeqb %xmm2, %xmm0
1138 pmovmskb %xmm0, %eax
1139 movaps %xmm2, %xmm3
1140#ifdef USE_AS_STRNCPY
1141 sub $16, %ebx
1142 jbe L(StrncpyExit7Case2OrCase3)
1143#endif
1144 test %eax, %eax
1145 jnz L(Shl7LoopExit)
1146
1147 palignr $7, %xmm1, %xmm2
1148 movaps %xmm3, %xmm1
1149 movaps %xmm2, (%edx)
1150 movaps 25(%ecx), %xmm2
1151
1152 pcmpeqb %xmm2, %xmm0
1153 lea 16(%edx), %edx
1154 pmovmskb %xmm0, %eax
1155 lea 16(%ecx), %ecx
1156 movaps %xmm2, %xmm3
1157#ifdef USE_AS_STRNCPY
1158 sub $16, %ebx
1159 jbe L(StrncpyExit7Case2OrCase3)
1160#endif
1161 test %eax, %eax
1162 jnz L(Shl7LoopExit)
1163
1164 palignr $7, %xmm1, %xmm2
1165 movaps %xmm2, (%edx)
1166 movaps 25(%ecx), %xmm2
1167 movaps %xmm3, %xmm1
1168
1169 pcmpeqb %xmm2, %xmm0
1170 lea 16(%edx), %edx
1171 pmovmskb %xmm0, %eax
1172 lea 16(%ecx), %ecx
1173 movaps %xmm2, %xmm3
1174#ifdef USE_AS_STRNCPY
1175 sub $16, %ebx
1176 jbe L(StrncpyExit7Case2OrCase3)
1177#endif
1178 test %eax, %eax
1179 jnz L(Shl7LoopExit)
1180
1181 palignr $7, %xmm1, %xmm2
1182 movaps %xmm2, (%edx)
1183 movaps 25(%ecx), %xmm2
1184
1185 pcmpeqb %xmm2, %xmm0
1186 lea 16(%edx), %edx
1187 pmovmskb %xmm0, %eax
1188 lea 16(%ecx), %ecx
1189#ifdef USE_AS_STRNCPY
1190 sub $16, %ebx
1191 jbe L(StrncpyExit7Case2OrCase3)
1192#endif
1193 test %eax, %eax
1194 jnz L(Shl7LoopExit)
1195
1196 palignr $7, %xmm3, %xmm2
1197 movaps %xmm2, (%edx)
1198 lea 25(%ecx), %ecx
1199 lea 16(%edx), %edx
1200
1201 mov %ecx, %eax
1202 and $-0x40, %ecx
1203 sub %ecx, %eax
1204 lea -9(%ecx), %ecx
1205 sub %eax, %edx
1206#ifdef USE_AS_STRNCPY
1207 add %eax, %ebx
1208#endif
1209 movaps -7(%ecx), %xmm1
1210
1211L(Shl7LoopStart):
1212 movaps 9(%ecx), %xmm2
1213 movaps 25(%ecx), %xmm3
1214 movaps %xmm3, %xmm6
1215 movaps 41(%ecx), %xmm4
1216 movaps %xmm4, %xmm7
1217 movaps 57(%ecx), %xmm5
1218 pminub %xmm2, %xmm6
1219 pminub %xmm5, %xmm7
1220 pminub %xmm6, %xmm7
1221 pcmpeqb %xmm0, %xmm7
1222 pmovmskb %xmm7, %eax
1223 movaps %xmm5, %xmm7
1224 palignr $7, %xmm4, %xmm5
1225 palignr $7, %xmm3, %xmm4
1226 test %eax, %eax
1227 jnz L(Shl7Start)
1228#ifdef USE_AS_STRNCPY
1229 sub $64, %ebx
1230 jbe L(StrncpyLeave7)
1231#endif
1232 palignr $7, %xmm2, %xmm3
1233 lea 64(%ecx), %ecx
1234 palignr $7, %xmm1, %xmm2
1235 movaps %xmm7, %xmm1
1236 movaps %xmm5, 48(%edx)
1237 movaps %xmm4, 32(%edx)
1238 movaps %xmm3, 16(%edx)
1239 movaps %xmm2, (%edx)
1240 lea 64(%edx), %edx
1241 jmp L(Shl7LoopStart)
1242
1243L(Shl7LoopExit):
1244 movlpd (%ecx), %xmm0
1245 movl 5(%ecx), %esi
1246 movlpd %xmm0, (%edx)
1247 movl %esi, 5(%edx)
1248 mov $9, %esi
1249 jmp L(CopyFrom1To16Bytes)
1250
1251 .p2align 4
1252L(Shl8):
1253 movaps -8(%ecx), %xmm1
1254 movaps 8(%ecx), %xmm2
1255L(Shl8Start):
1256 pcmpeqb %xmm2, %xmm0
1257 pmovmskb %xmm0, %eax
1258 movaps %xmm2, %xmm3
1259#ifdef USE_AS_STRNCPY
1260 sub $16, %ebx
1261 jbe L(StrncpyExit8Case2OrCase3)
1262#endif
1263 test %eax, %eax
1264 jnz L(Shl8LoopExit)
1265
1266 palignr $8, %xmm1, %xmm2
1267 movaps %xmm3, %xmm1
1268 movaps %xmm2, (%edx)
1269 movaps 24(%ecx), %xmm2
1270
1271 pcmpeqb %xmm2, %xmm0
1272 lea 16(%edx), %edx
1273 pmovmskb %xmm0, %eax
1274 lea 16(%ecx), %ecx
1275 movaps %xmm2, %xmm3
1276#ifdef USE_AS_STRNCPY
1277 sub $16, %ebx
1278 jbe L(StrncpyExit8Case2OrCase3)
1279#endif
1280 test %eax, %eax
1281 jnz L(Shl8LoopExit)
1282
1283 palignr $8, %xmm1, %xmm2
1284 movaps %xmm2, (%edx)
1285 movaps 24(%ecx), %xmm2
1286 movaps %xmm3, %xmm1
1287
1288 pcmpeqb %xmm2, %xmm0
1289 lea 16(%edx), %edx
1290 pmovmskb %xmm0, %eax
1291 lea 16(%ecx), %ecx
1292 movaps %xmm2, %xmm3
1293#ifdef USE_AS_STRNCPY
1294 sub $16, %ebx
1295 jbe L(StrncpyExit8Case2OrCase3)
1296#endif
1297 test %eax, %eax
1298 jnz L(Shl8LoopExit)
1299
1300 palignr $8, %xmm1, %xmm2
1301 movaps %xmm2, (%edx)
1302 movaps 24(%ecx), %xmm2
1303
1304 pcmpeqb %xmm2, %xmm0
1305 lea 16(%edx), %edx
1306 pmovmskb %xmm0, %eax
1307 lea 16(%ecx), %ecx
1308#ifdef USE_AS_STRNCPY
1309 sub $16, %ebx
1310 jbe L(StrncpyExit8Case2OrCase3)
1311#endif
1312 test %eax, %eax
1313 jnz L(Shl8LoopExit)
1314
1315 palignr $8, %xmm3, %xmm2
1316 movaps %xmm2, (%edx)
1317 lea 24(%ecx), %ecx
1318 lea 16(%edx), %edx
1319
1320 mov %ecx, %eax
1321 and $-0x40, %ecx
1322 sub %ecx, %eax
1323 lea -8(%ecx), %ecx
1324 sub %eax, %edx
1325#ifdef USE_AS_STRNCPY
1326 add %eax, %ebx
1327#endif
1328 movaps -8(%ecx), %xmm1
1329
1330L(Shl8LoopStart):
1331 movaps 8(%ecx), %xmm2
1332 movaps 24(%ecx), %xmm3
1333 movaps %xmm3, %xmm6
1334 movaps 40(%ecx), %xmm4
1335 movaps %xmm4, %xmm7
1336 movaps 56(%ecx), %xmm5
1337 pminub %xmm2, %xmm6
1338 pminub %xmm5, %xmm7
1339 pminub %xmm6, %xmm7
1340 pcmpeqb %xmm0, %xmm7
1341 pmovmskb %xmm7, %eax
1342 movaps %xmm5, %xmm7
1343 palignr $8, %xmm4, %xmm5
1344 palignr $8, %xmm3, %xmm4
1345 test %eax, %eax
1346 jnz L(Shl8Start)
1347#ifdef USE_AS_STRNCPY
1348 sub $64, %ebx
1349 jbe L(StrncpyLeave8)
1350#endif
1351 palignr $8, %xmm2, %xmm3
1352 lea 64(%ecx), %ecx
1353 palignr $8, %xmm1, %xmm2
1354 movaps %xmm7, %xmm1
1355 movaps %xmm5, 48(%edx)
1356 movaps %xmm4, 32(%edx)
1357 movaps %xmm3, 16(%edx)
1358 movaps %xmm2, (%edx)
1359 lea 64(%edx), %edx
1360 jmp L(Shl8LoopStart)
1361
1362L(Shl8LoopExit):
1363 movlpd (%ecx), %xmm0
1364 movlpd %xmm0, (%edx)
1365 mov $8, %esi
1366 jmp L(CopyFrom1To16Bytes)
1367
1368 .p2align 4
1369L(Shl9):
1370 movaps -9(%ecx), %xmm1
1371 movaps 7(%ecx), %xmm2
1372L(Shl9Start):
1373 pcmpeqb %xmm2, %xmm0
1374 pmovmskb %xmm0, %eax
1375 movaps %xmm2, %xmm3
1376#ifdef USE_AS_STRNCPY
1377 sub $16, %ebx
1378 jbe L(StrncpyExit9Case2OrCase3)
1379#endif
1380 test %eax, %eax
1381 jnz L(Shl9LoopExit)
1382
1383 palignr $9, %xmm1, %xmm2
1384 movaps %xmm3, %xmm1
1385 movaps %xmm2, (%edx)
1386 movaps 23(%ecx), %xmm2
1387
1388 pcmpeqb %xmm2, %xmm0
1389 lea 16(%edx), %edx
1390 pmovmskb %xmm0, %eax
1391 lea 16(%ecx), %ecx
1392 movaps %xmm2, %xmm3
1393#ifdef USE_AS_STRNCPY
1394 sub $16, %ebx
1395 jbe L(StrncpyExit9Case2OrCase3)
1396#endif
1397 test %eax, %eax
1398 jnz L(Shl9LoopExit)
1399
1400 palignr $9, %xmm1, %xmm2
1401 movaps %xmm2, (%edx)
1402 movaps 23(%ecx), %xmm2
1403 movaps %xmm3, %xmm1
1404
1405 pcmpeqb %xmm2, %xmm0
1406 lea 16(%edx), %edx
1407 pmovmskb %xmm0, %eax
1408 lea 16(%ecx), %ecx
1409 movaps %xmm2, %xmm3
1410#ifdef USE_AS_STRNCPY
1411 sub $16, %ebx
1412 jbe L(StrncpyExit9Case2OrCase3)
1413#endif
1414 test %eax, %eax
1415 jnz L(Shl9LoopExit)
1416
1417 palignr $9, %xmm1, %xmm2
1418 movaps %xmm2, (%edx)
1419 movaps 23(%ecx), %xmm2
1420
1421 pcmpeqb %xmm2, %xmm0
1422 lea 16(%edx), %edx
1423 pmovmskb %xmm0, %eax
1424 lea 16(%ecx), %ecx
1425#ifdef USE_AS_STRNCPY
1426 sub $16, %ebx
1427 jbe L(StrncpyExit9Case2OrCase3)
1428#endif
1429 test %eax, %eax
1430 jnz L(Shl9LoopExit)
1431
1432 palignr $9, %xmm3, %xmm2
1433 movaps %xmm2, (%edx)
1434 lea 23(%ecx), %ecx
1435 lea 16(%edx), %edx
1436
1437 mov %ecx, %eax
1438 and $-0x40, %ecx
1439 sub %ecx, %eax
1440 lea -7(%ecx), %ecx
1441 sub %eax, %edx
1442#ifdef USE_AS_STRNCPY
1443 add %eax, %ebx
1444#endif
1445 movaps -9(%ecx), %xmm1
1446
1447L(Shl9LoopStart):
1448 movaps 7(%ecx), %xmm2
1449 movaps 23(%ecx), %xmm3
1450 movaps %xmm3, %xmm6
1451 movaps 39(%ecx), %xmm4
1452 movaps %xmm4, %xmm7
1453 movaps 55(%ecx), %xmm5
1454 pminub %xmm2, %xmm6
1455 pminub %xmm5, %xmm7
1456 pminub %xmm6, %xmm7
1457 pcmpeqb %xmm0, %xmm7
1458 pmovmskb %xmm7, %eax
1459 movaps %xmm5, %xmm7
1460 palignr $9, %xmm4, %xmm5
1461 palignr $9, %xmm3, %xmm4
1462 test %eax, %eax
1463 jnz L(Shl9Start)
1464#ifdef USE_AS_STRNCPY
1465 sub $64, %ebx
1466 jbe L(StrncpyLeave9)
1467#endif
1468 palignr $9, %xmm2, %xmm3
1469 lea 64(%ecx), %ecx
1470 palignr $9, %xmm1, %xmm2
1471 movaps %xmm7, %xmm1
1472 movaps %xmm5, 48(%edx)
1473 movaps %xmm4, 32(%edx)
1474 movaps %xmm3, 16(%edx)
1475 movaps %xmm2, (%edx)
1476 lea 64(%edx), %edx
1477 jmp L(Shl9LoopStart)
1478
1479L(Shl9LoopExit):
1480 movlpd -1(%ecx), %xmm0
1481 movlpd %xmm0, -1(%edx)
1482 mov $7, %esi
1483 jmp L(CopyFrom1To16Bytes)
1484
1485 .p2align 4
1486L(Shl10):
1487 movaps -10(%ecx), %xmm1
1488 movaps 6(%ecx), %xmm2
1489L(Shl10Start):
1490 pcmpeqb %xmm2, %xmm0
1491 pmovmskb %xmm0, %eax
1492 movaps %xmm2, %xmm3
1493#ifdef USE_AS_STRNCPY
1494 sub $16, %ebx
1495 jbe L(StrncpyExit10Case2OrCase3)
1496#endif
1497 test %eax, %eax
1498 jnz L(Shl10LoopExit)
1499
1500 palignr $10, %xmm1, %xmm2
1501 movaps %xmm3, %xmm1
1502 movaps %xmm2, (%edx)
1503 movaps 22(%ecx), %xmm2
1504
1505 pcmpeqb %xmm2, %xmm0
1506 lea 16(%edx), %edx
1507 pmovmskb %xmm0, %eax
1508 lea 16(%ecx), %ecx
1509 movaps %xmm2, %xmm3
1510#ifdef USE_AS_STRNCPY
1511 sub $16, %ebx
1512 jbe L(StrncpyExit10Case2OrCase3)
1513#endif
1514 test %eax, %eax
1515 jnz L(Shl10LoopExit)
1516
1517 palignr $10, %xmm1, %xmm2
1518 movaps %xmm2, (%edx)
1519 movaps 22(%ecx), %xmm2
1520 movaps %xmm3, %xmm1
1521
1522 pcmpeqb %xmm2, %xmm0
1523 lea 16(%edx), %edx
1524 pmovmskb %xmm0, %eax
1525 lea 16(%ecx), %ecx
1526 movaps %xmm2, %xmm3
1527#ifdef USE_AS_STRNCPY
1528 sub $16, %ebx
1529 jbe L(StrncpyExit10Case2OrCase3)
1530#endif
1531 test %eax, %eax
1532 jnz L(Shl10LoopExit)
1533
1534 palignr $10, %xmm1, %xmm2
1535 movaps %xmm2, (%edx)
1536 movaps 22(%ecx), %xmm2
1537
1538 pcmpeqb %xmm2, %xmm0
1539 lea 16(%edx), %edx
1540 pmovmskb %xmm0, %eax
1541 lea 16(%ecx), %ecx
1542#ifdef USE_AS_STRNCPY
1543 sub $16, %ebx
1544 jbe L(StrncpyExit10Case2OrCase3)
1545#endif
1546 test %eax, %eax
1547 jnz L(Shl10LoopExit)
1548
1549 palignr $10, %xmm3, %xmm2
1550 movaps %xmm2, (%edx)
1551 lea 22(%ecx), %ecx
1552 lea 16(%edx), %edx
1553
1554 mov %ecx, %eax
1555 and $-0x40, %ecx
1556 sub %ecx, %eax
1557 lea -6(%ecx), %ecx
1558 sub %eax, %edx
1559#ifdef USE_AS_STRNCPY
1560 add %eax, %ebx
1561#endif
1562 movaps -10(%ecx), %xmm1
1563
1564L(Shl10LoopStart):
1565 movaps 6(%ecx), %xmm2
1566 movaps 22(%ecx), %xmm3
1567 movaps %xmm3, %xmm6
1568 movaps 38(%ecx), %xmm4
1569 movaps %xmm4, %xmm7
1570 movaps 54(%ecx), %xmm5
1571 pminub %xmm2, %xmm6
1572 pminub %xmm5, %xmm7
1573 pminub %xmm6, %xmm7
1574 pcmpeqb %xmm0, %xmm7
1575 pmovmskb %xmm7, %eax
1576 movaps %xmm5, %xmm7
1577 palignr $10, %xmm4, %xmm5
1578 palignr $10, %xmm3, %xmm4
1579 test %eax, %eax
1580 jnz L(Shl10Start)
1581#ifdef USE_AS_STRNCPY
1582 sub $64, %ebx
1583 jbe L(StrncpyLeave10)
1584#endif
1585 palignr $10, %xmm2, %xmm3
1586 lea 64(%ecx), %ecx
1587 palignr $10, %xmm1, %xmm2
1588 movaps %xmm7, %xmm1
1589 movaps %xmm5, 48(%edx)
1590 movaps %xmm4, 32(%edx)
1591 movaps %xmm3, 16(%edx)
1592 movaps %xmm2, (%edx)
1593 lea 64(%edx), %edx
1594 jmp L(Shl10LoopStart)
1595
1596L(Shl10LoopExit):
1597 movlpd -2(%ecx), %xmm0
1598 movlpd %xmm0, -2(%edx)
1599 mov $6, %esi
1600 jmp L(CopyFrom1To16Bytes)
1601
1602 .p2align 4
1603L(Shl11):
1604 movaps -11(%ecx), %xmm1
1605 movaps 5(%ecx), %xmm2
1606L(Shl11Start):
1607 pcmpeqb %xmm2, %xmm0
1608 pmovmskb %xmm0, %eax
1609 movaps %xmm2, %xmm3
1610#ifdef USE_AS_STRNCPY
1611 sub $16, %ebx
1612 jbe L(StrncpyExit11Case2OrCase3)
1613#endif
1614 test %eax, %eax
1615 jnz L(Shl11LoopExit)
1616
1617 palignr $11, %xmm1, %xmm2
1618 movaps %xmm3, %xmm1
1619 movaps %xmm2, (%edx)
1620 movaps 21(%ecx), %xmm2
1621
1622 pcmpeqb %xmm2, %xmm0
1623 lea 16(%edx), %edx
1624 pmovmskb %xmm0, %eax
1625 lea 16(%ecx), %ecx
1626 movaps %xmm2, %xmm3
1627#ifdef USE_AS_STRNCPY
1628 sub $16, %ebx
1629 jbe L(StrncpyExit11Case2OrCase3)
1630#endif
1631 test %eax, %eax
1632 jnz L(Shl11LoopExit)
1633
1634 palignr $11, %xmm1, %xmm2
1635 movaps %xmm2, (%edx)
1636 movaps 21(%ecx), %xmm2
1637 movaps %xmm3, %xmm1
1638
1639 pcmpeqb %xmm2, %xmm0
1640 lea 16(%edx), %edx
1641 pmovmskb %xmm0, %eax
1642 lea 16(%ecx), %ecx
1643 movaps %xmm2, %xmm3
1644#ifdef USE_AS_STRNCPY
1645 sub $16, %ebx
1646 jbe L(StrncpyExit11Case2OrCase3)
1647#endif
1648 test %eax, %eax
1649 jnz L(Shl11LoopExit)
1650
1651 palignr $11, %xmm1, %xmm2
1652 movaps %xmm2, (%edx)
1653 movaps 21(%ecx), %xmm2
1654
1655 pcmpeqb %xmm2, %xmm0
1656 lea 16(%edx), %edx
1657 pmovmskb %xmm0, %eax
1658 lea 16(%ecx), %ecx
1659#ifdef USE_AS_STRNCPY
1660 sub $16, %ebx
1661 jbe L(StrncpyExit11Case2OrCase3)
1662#endif
1663 test %eax, %eax
1664 jnz L(Shl11LoopExit)
1665
1666 palignr $11, %xmm3, %xmm2
1667 movaps %xmm2, (%edx)
1668 lea 21(%ecx), %ecx
1669 lea 16(%edx), %edx
1670
1671 mov %ecx, %eax
1672 and $-0x40, %ecx
1673 sub %ecx, %eax
1674 lea -5(%ecx), %ecx
1675 sub %eax, %edx
1676#ifdef USE_AS_STRNCPY
1677 add %eax, %ebx
1678#endif
1679 movaps -11(%ecx), %xmm1
1680
1681L(Shl11LoopStart):
1682 movaps 5(%ecx), %xmm2
1683 movaps 21(%ecx), %xmm3
1684 movaps %xmm3, %xmm6
1685 movaps 37(%ecx), %xmm4
1686 movaps %xmm4, %xmm7
1687 movaps 53(%ecx), %xmm5
1688 pminub %xmm2, %xmm6
1689 pminub %xmm5, %xmm7
1690 pminub %xmm6, %xmm7
1691 pcmpeqb %xmm0, %xmm7
1692 pmovmskb %xmm7, %eax
1693 movaps %xmm5, %xmm7
1694 palignr $11, %xmm4, %xmm5
1695 palignr $11, %xmm3, %xmm4
1696 test %eax, %eax
1697 jnz L(Shl11Start)
1698#ifdef USE_AS_STRNCPY
1699 sub $64, %ebx
1700 jbe L(StrncpyLeave11)
1701#endif
1702 palignr $11, %xmm2, %xmm3
1703 lea 64(%ecx), %ecx
1704 palignr $11, %xmm1, %xmm2
1705 movaps %xmm7, %xmm1
1706 movaps %xmm5, 48(%edx)
1707 movaps %xmm4, 32(%edx)
1708 movaps %xmm3, 16(%edx)
1709 movaps %xmm2, (%edx)
1710 lea 64(%edx), %edx
1711 jmp L(Shl11LoopStart)
1712
1713L(Shl11LoopExit):
1714 movlpd -3(%ecx), %xmm0
1715 movlpd %xmm0, -3(%edx)
1716 mov $5, %esi
1717 jmp L(CopyFrom1To16Bytes)
1718
1719 .p2align 4
1720L(Shl12):
1721 movaps -12(%ecx), %xmm1
1722 movaps 4(%ecx), %xmm2
1723L(Shl12Start):
1724 pcmpeqb %xmm2, %xmm0
1725 pmovmskb %xmm0, %eax
1726 movaps %xmm2, %xmm3
1727#ifdef USE_AS_STRNCPY
1728 sub $16, %ebx
1729 jbe L(StrncpyExit12Case2OrCase3)
1730#endif
1731 test %eax, %eax
1732 jnz L(Shl12LoopExit)
1733
1734 palignr $12, %xmm1, %xmm2
1735 movaps %xmm3, %xmm1
1736 movaps %xmm2, (%edx)
1737 movaps 20(%ecx), %xmm2
1738
1739 pcmpeqb %xmm2, %xmm0
1740 lea 16(%edx), %edx
1741 pmovmskb %xmm0, %eax
1742 lea 16(%ecx), %ecx
1743 movaps %xmm2, %xmm3
1744#ifdef USE_AS_STRNCPY
1745 sub $16, %ebx
1746 jbe L(StrncpyExit12Case2OrCase3)
1747#endif
1748 test %eax, %eax
1749 jnz L(Shl12LoopExit)
1750
1751 palignr $12, %xmm1, %xmm2
1752 movaps %xmm2, (%edx)
1753 movaps 20(%ecx), %xmm2
1754 movaps %xmm3, %xmm1
1755
1756 pcmpeqb %xmm2, %xmm0
1757 lea 16(%edx), %edx
1758 pmovmskb %xmm0, %eax
1759 lea 16(%ecx), %ecx
1760 movaps %xmm2, %xmm3
1761#ifdef USE_AS_STRNCPY
1762 sub $16, %ebx
1763 jbe L(StrncpyExit12Case2OrCase3)
1764#endif
1765 test %eax, %eax
1766 jnz L(Shl12LoopExit)
1767
1768 palignr $12, %xmm1, %xmm2
1769 movaps %xmm2, (%edx)
1770 movaps 20(%ecx), %xmm2
1771
1772 pcmpeqb %xmm2, %xmm0
1773 lea 16(%edx), %edx
1774 pmovmskb %xmm0, %eax
1775 lea 16(%ecx), %ecx
1776#ifdef USE_AS_STRNCPY
1777 sub $16, %ebx
1778 jbe L(StrncpyExit12Case2OrCase3)
1779#endif
1780 test %eax, %eax
1781 jnz L(Shl12LoopExit)
1782
1783 palignr $12, %xmm3, %xmm2
1784 movaps %xmm2, (%edx)
1785 lea 20(%ecx), %ecx
1786 lea 16(%edx), %edx
1787
1788 mov %ecx, %eax
1789 and $-0x40, %ecx
1790 sub %ecx, %eax
1791 lea -4(%ecx), %ecx
1792 sub %eax, %edx
1793#ifdef USE_AS_STRNCPY
1794 add %eax, %ebx
1795#endif
1796 movaps -12(%ecx), %xmm1
1797
1798L(Shl12LoopStart):
1799 movaps 4(%ecx), %xmm2
1800 movaps 20(%ecx), %xmm3
1801 movaps %xmm3, %xmm6
1802 movaps 36(%ecx), %xmm4
1803 movaps %xmm4, %xmm7
1804 movaps 52(%ecx), %xmm5
1805 pminub %xmm2, %xmm6
1806 pminub %xmm5, %xmm7
1807 pminub %xmm6, %xmm7
1808 pcmpeqb %xmm0, %xmm7
1809 pmovmskb %xmm7, %eax
1810 movaps %xmm5, %xmm7
1811 palignr $12, %xmm4, %xmm5
1812 palignr $12, %xmm3, %xmm4
1813 test %eax, %eax
1814 jnz L(Shl12Start)
1815#ifdef USE_AS_STRNCPY
1816 sub $64, %ebx
1817 jbe L(StrncpyLeave12)
1818#endif
1819 palignr $12, %xmm2, %xmm3
1820 lea 64(%ecx), %ecx
1821 palignr $12, %xmm1, %xmm2
1822 movaps %xmm7, %xmm1
1823 movaps %xmm5, 48(%edx)
1824 movaps %xmm4, 32(%edx)
1825 movaps %xmm3, 16(%edx)
1826 movaps %xmm2, (%edx)
1827 lea 64(%edx), %edx
1828 jmp L(Shl12LoopStart)
1829
1830L(Shl12LoopExit):
1831 movl (%ecx), %esi
1832 movl %esi, (%edx)
1833 mov $4, %esi
1834 jmp L(CopyFrom1To16Bytes)
1835
1836 .p2align 4
1837L(Shl13):
1838 movaps -13(%ecx), %xmm1
1839 movaps 3(%ecx), %xmm2
1840L(Shl13Start):
1841 pcmpeqb %xmm2, %xmm0
1842 pmovmskb %xmm0, %eax
1843 movaps %xmm2, %xmm3
1844#ifdef USE_AS_STRNCPY
1845 sub $16, %ebx
1846 jbe L(StrncpyExit13Case2OrCase3)
1847#endif
1848 test %eax, %eax
1849 jnz L(Shl13LoopExit)
1850
1851 palignr $13, %xmm1, %xmm2
1852 movaps %xmm3, %xmm1
1853 movaps %xmm2, (%edx)
1854 movaps 19(%ecx), %xmm2
1855
1856 pcmpeqb %xmm2, %xmm0
1857 lea 16(%edx), %edx
1858 pmovmskb %xmm0, %eax
1859 lea 16(%ecx), %ecx
1860 movaps %xmm2, %xmm3
1861#ifdef USE_AS_STRNCPY
1862 sub $16, %ebx
1863 jbe L(StrncpyExit13Case2OrCase3)
1864#endif
1865 test %eax, %eax
1866 jnz L(Shl13LoopExit)
1867
1868 palignr $13, %xmm1, %xmm2
1869 movaps %xmm2, (%edx)
1870 movaps 19(%ecx), %xmm2
1871 movaps %xmm3, %xmm1
1872
1873 pcmpeqb %xmm2, %xmm0
1874 lea 16(%edx), %edx
1875 pmovmskb %xmm0, %eax
1876 lea 16(%ecx), %ecx
1877 movaps %xmm2, %xmm3
1878#ifdef USE_AS_STRNCPY
1879 sub $16, %ebx
1880 jbe L(StrncpyExit13Case2OrCase3)
1881#endif
1882 test %eax, %eax
1883 jnz L(Shl13LoopExit)
1884
1885 palignr $13, %xmm1, %xmm2
1886 movaps %xmm2, (%edx)
1887 movaps 19(%ecx), %xmm2
1888
1889 pcmpeqb %xmm2, %xmm0
1890 lea 16(%edx), %edx
1891 pmovmskb %xmm0, %eax
1892 lea 16(%ecx), %ecx
1893#ifdef USE_AS_STRNCPY
1894 sub $16, %ebx
1895 jbe L(StrncpyExit13Case2OrCase3)
1896#endif
1897 test %eax, %eax
1898 jnz L(Shl13LoopExit)
1899
1900 palignr $13, %xmm3, %xmm2
1901 movaps %xmm2, (%edx)
1902 lea 19(%ecx), %ecx
1903 lea 16(%edx), %edx
1904
1905 mov %ecx, %eax
1906 and $-0x40, %ecx
1907 sub %ecx, %eax
1908 lea -3(%ecx), %ecx
1909 sub %eax, %edx
1910#ifdef USE_AS_STRNCPY
1911 add %eax, %ebx
1912#endif
1913 movaps -13(%ecx), %xmm1
1914
1915L(Shl13LoopStart):
1916 movaps 3(%ecx), %xmm2
1917 movaps 19(%ecx), %xmm3
1918 movaps %xmm3, %xmm6
1919 movaps 35(%ecx), %xmm4
1920 movaps %xmm4, %xmm7
1921 movaps 51(%ecx), %xmm5
1922 pminub %xmm2, %xmm6
1923 pminub %xmm5, %xmm7
1924 pminub %xmm6, %xmm7
1925 pcmpeqb %xmm0, %xmm7
1926 pmovmskb %xmm7, %eax
1927 movaps %xmm5, %xmm7
1928 palignr $13, %xmm4, %xmm5
1929 palignr $13, %xmm3, %xmm4
1930 test %eax, %eax
1931 jnz L(Shl13Start)
1932#ifdef USE_AS_STRNCPY
1933 sub $64, %ebx
1934 jbe L(StrncpyLeave13)
1935#endif
1936 palignr $13, %xmm2, %xmm3
1937 lea 64(%ecx), %ecx
1938 palignr $13, %xmm1, %xmm2
1939 movaps %xmm7, %xmm1
1940 movaps %xmm5, 48(%edx)
1941 movaps %xmm4, 32(%edx)
1942 movaps %xmm3, 16(%edx)
1943 movaps %xmm2, (%edx)
1944 lea 64(%edx), %edx
1945 jmp L(Shl13LoopStart)
1946
1947L(Shl13LoopExit):
1948 movl -1(%ecx), %esi
1949 movl %esi, -1(%edx)
1950 mov $3, %esi
1951 jmp L(CopyFrom1To16Bytes)
1952
1953 .p2align 4
1954L(Shl14):
1955 movaps -14(%ecx), %xmm1
1956 movaps 2(%ecx), %xmm2
1957L(Shl14Start):
1958 pcmpeqb %xmm2, %xmm0
1959 pmovmskb %xmm0, %eax
1960 movaps %xmm2, %xmm3
1961#ifdef USE_AS_STRNCPY
1962 sub $16, %ebx
1963 jbe L(StrncpyExit14Case2OrCase3)
1964#endif
1965 test %eax, %eax
1966 jnz L(Shl14LoopExit)
1967
1968 palignr $14, %xmm1, %xmm2
1969 movaps %xmm3, %xmm1
1970 movaps %xmm2, (%edx)
1971 movaps 18(%ecx), %xmm2
1972
1973 pcmpeqb %xmm2, %xmm0
1974 lea 16(%edx), %edx
1975 pmovmskb %xmm0, %eax
1976 lea 16(%ecx), %ecx
1977 movaps %xmm2, %xmm3
1978#ifdef USE_AS_STRNCPY
1979 sub $16, %ebx
1980 jbe L(StrncpyExit14Case2OrCase3)
1981#endif
1982 test %eax, %eax
1983 jnz L(Shl14LoopExit)
1984
1985 palignr $14, %xmm1, %xmm2
1986 movaps %xmm2, (%edx)
1987 movaps 18(%ecx), %xmm2
1988 movaps %xmm3, %xmm1
1989
1990 pcmpeqb %xmm2, %xmm0
1991 lea 16(%edx), %edx
1992 pmovmskb %xmm0, %eax
1993 lea 16(%ecx), %ecx
1994 movaps %xmm2, %xmm3
1995#ifdef USE_AS_STRNCPY
1996 sub $16, %ebx
1997 jbe L(StrncpyExit14Case2OrCase3)
1998#endif
1999 test %eax, %eax
2000 jnz L(Shl14LoopExit)
2001
2002 palignr $14, %xmm1, %xmm2
2003 movaps %xmm2, (%edx)
2004 movaps 18(%ecx), %xmm2
2005
2006 pcmpeqb %xmm2, %xmm0
2007 lea 16(%edx), %edx
2008 pmovmskb %xmm0, %eax
2009 lea 16(%ecx), %ecx
2010#ifdef USE_AS_STRNCPY
2011 sub $16, %ebx
2012 jbe L(StrncpyExit14Case2OrCase3)
2013#endif
2014 test %eax, %eax
2015 jnz L(Shl14LoopExit)
2016
2017 palignr $14, %xmm3, %xmm2
2018 movaps %xmm2, (%edx)
2019 lea 18(%ecx), %ecx
2020 lea 16(%edx), %edx
2021
2022 mov %ecx, %eax
2023 and $-0x40, %ecx
2024 sub %ecx, %eax
2025 lea -2(%ecx), %ecx
2026 sub %eax, %edx
2027#ifdef USE_AS_STRNCPY
2028 add %eax, %ebx
2029#endif
2030 movaps -14(%ecx), %xmm1
2031
2032L(Shl14LoopStart):
2033 movaps 2(%ecx), %xmm2
2034 movaps 18(%ecx), %xmm3
2035 movaps %xmm3, %xmm6
2036 movaps 34(%ecx), %xmm4
2037 movaps %xmm4, %xmm7
2038 movaps 50(%ecx), %xmm5
2039 pminub %xmm2, %xmm6
2040 pminub %xmm5, %xmm7
2041 pminub %xmm6, %xmm7
2042 pcmpeqb %xmm0, %xmm7
2043 pmovmskb %xmm7, %eax
2044 movaps %xmm5, %xmm7
2045 palignr $14, %xmm4, %xmm5
2046 palignr $14, %xmm3, %xmm4
2047 test %eax, %eax
2048 jnz L(Shl14Start)
2049#ifdef USE_AS_STRNCPY
2050 sub $64, %ebx
2051 jbe L(StrncpyLeave14)
2052#endif
2053 palignr $14, %xmm2, %xmm3
2054 lea 64(%ecx), %ecx
2055 palignr $14, %xmm1, %xmm2
2056 movaps %xmm7, %xmm1
2057 movaps %xmm5, 48(%edx)
2058 movaps %xmm4, 32(%edx)
2059 movaps %xmm3, 16(%edx)
2060 movaps %xmm2, (%edx)
2061 lea 64(%edx), %edx
2062 jmp L(Shl14LoopStart)
2063
2064L(Shl14LoopExit):
2065 movl -2(%ecx), %esi
2066 movl %esi, -2(%edx)
2067 mov $2, %esi
2068 jmp L(CopyFrom1To16Bytes)
2069
2070 .p2align 4
2071L(Shl15):
2072 movaps -15(%ecx), %xmm1
2073 movaps 1(%ecx), %xmm2
2074L(Shl15Start):
2075 pcmpeqb %xmm2, %xmm0
2076 pmovmskb %xmm0, %eax
2077 movaps %xmm2, %xmm3
2078#ifdef USE_AS_STRNCPY
2079 sub $16, %ebx
2080 jbe L(StrncpyExit15Case2OrCase3)
2081#endif
2082 test %eax, %eax
2083 jnz L(Shl15LoopExit)
2084
2085 palignr $15, %xmm1, %xmm2
2086 movaps %xmm3, %xmm1
2087 movaps %xmm2, (%edx)
2088 movaps 17(%ecx), %xmm2
2089
2090 pcmpeqb %xmm2, %xmm0
2091 lea 16(%edx), %edx
2092 pmovmskb %xmm0, %eax
2093 lea 16(%ecx), %ecx
2094 movaps %xmm2, %xmm3
2095#ifdef USE_AS_STRNCPY
2096 sub $16, %ebx
2097 jbe L(StrncpyExit15Case2OrCase3)
2098#endif
2099 test %eax, %eax
2100 jnz L(Shl15LoopExit)
2101
2102 palignr $15, %xmm1, %xmm2
2103 movaps %xmm2, (%edx)
2104 movaps 17(%ecx), %xmm2
2105 movaps %xmm3, %xmm1
2106
2107 pcmpeqb %xmm2, %xmm0
2108 lea 16(%edx), %edx
2109 pmovmskb %xmm0, %eax
2110 lea 16(%ecx), %ecx
2111 movaps %xmm2, %xmm3
2112#ifdef USE_AS_STRNCPY
2113 sub $16, %ebx
2114 jbe L(StrncpyExit15Case2OrCase3)
2115#endif
2116 test %eax, %eax
2117 jnz L(Shl15LoopExit)
2118
2119 palignr $15, %xmm1, %xmm2
2120 movaps %xmm2, (%edx)
2121 movaps 17(%ecx), %xmm2
2122
2123 pcmpeqb %xmm2, %xmm0
2124 lea 16(%edx), %edx
2125 pmovmskb %xmm0, %eax
2126 lea 16(%ecx), %ecx
2127#ifdef USE_AS_STRNCPY
2128 sub $16, %ebx
2129 jbe L(StrncpyExit15Case2OrCase3)
2130#endif
2131 test %eax, %eax
2132 jnz L(Shl15LoopExit)
2133
2134 palignr $15, %xmm3, %xmm2
2135 movaps %xmm2, (%edx)
2136 lea 17(%ecx), %ecx
2137 lea 16(%edx), %edx
2138
2139 mov %ecx, %eax
2140 and $-0x40, %ecx
2141 sub %ecx, %eax
2142 lea -1(%ecx), %ecx
2143 sub %eax, %edx
2144#ifdef USE_AS_STRNCPY
2145 add %eax, %ebx
2146#endif
2147 movaps -15(%ecx), %xmm1
2148
2149L(Shl15LoopStart):
2150 movaps 1(%ecx), %xmm2
2151 movaps 17(%ecx), %xmm3
2152 movaps %xmm3, %xmm6
2153 movaps 33(%ecx), %xmm4
2154 movaps %xmm4, %xmm7
2155 movaps 49(%ecx), %xmm5
2156 pminub %xmm2, %xmm6
2157 pminub %xmm5, %xmm7
2158 pminub %xmm6, %xmm7
2159 pcmpeqb %xmm0, %xmm7
2160 pmovmskb %xmm7, %eax
2161 movaps %xmm5, %xmm7
2162 palignr $15, %xmm4, %xmm5
2163 palignr $15, %xmm3, %xmm4
2164 test %eax, %eax
2165 jnz L(Shl15Start)
2166#ifdef USE_AS_STRNCPY
2167 sub $64, %ebx
2168 jbe L(StrncpyLeave15)
2169#endif
2170 palignr $15, %xmm2, %xmm3
2171 lea 64(%ecx), %ecx
2172 palignr $15, %xmm1, %xmm2
2173 movaps %xmm7, %xmm1
2174 movaps %xmm5, 48(%edx)
2175 movaps %xmm4, 32(%edx)
2176 movaps %xmm3, 16(%edx)
2177 movaps %xmm2, (%edx)
2178 lea 64(%edx), %edx
2179 jmp L(Shl15LoopStart)
2180
2181L(Shl15LoopExit):
2182 movl -3(%ecx), %esi
2183 movl %esi, -3(%edx)
2184 mov $1, %esi
2185#if defined USE_AS_STRCAT || defined USE_AS_STRLCPY
2186 jmp L(CopyFrom1To16Bytes)
2187#endif
2188
2189
2190#if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY
2191
2192 .p2align 4
2193L(CopyFrom1To16Bytes):
2194# ifdef USE_AS_STRNCPY
2195 add $16, %ebx
2196# endif
2197 add %esi, %edx
2198 add %esi, %ecx
2199
2200 POP (%esi)
2201 test %al, %al
2202 jz L(ExitHigh8)
2203
2204L(CopyFrom1To16BytesLess8):
2205 mov %al, %ah
2206 and $15, %ah
2207 jz L(ExitHigh4)
2208
2209 test $0x01, %al
2210 jnz L(Exit1)
2211 test $0x02, %al
2212 jnz L(Exit2)
2213 test $0x04, %al
2214 jnz L(Exit3)
2215
2216 .p2align 4
2217L(Exit4):
2218 movl (%ecx), %eax
2219 movl %eax, (%edx)
2220 SAVE_RESULT (3)
2221# ifdef USE_AS_STRNCPY
2222 sub $4, %ebx
2223 lea 4(%edx), %ecx
2224 jnz L(StrncpyFillTailWithZero1)
2225# ifdef USE_AS_STPCPY
2226 cmpb $1, (%eax)
2227 sbb $-1, %eax
2228# endif
2229# endif
2230 RETURN1
2231
2232 .p2align 4
2233L(ExitHigh4):
2234 test $0x10, %al
2235 jnz L(Exit5)
2236 test $0x20, %al
2237 jnz L(Exit6)
2238 test $0x40, %al
2239 jnz L(Exit7)
2240
2241 .p2align 4
2242L(Exit8):
2243 movlpd (%ecx), %xmm0
2244 movlpd %xmm0, (%edx)
2245 SAVE_RESULT (7)
2246# ifdef USE_AS_STRNCPY
2247 sub $8, %ebx
2248 lea 8(%edx), %ecx
2249 jnz L(StrncpyFillTailWithZero1)
2250# ifdef USE_AS_STPCPY
2251 cmpb $1, (%eax)
2252 sbb $-1, %eax
2253# endif
2254# endif
2255 RETURN1
2256
2257 .p2align 4
2258L(ExitHigh8):
2259 mov %ah, %al
2260 and $15, %al
2261 jz L(ExitHigh12)
2262
2263 test $0x01, %ah
2264 jnz L(Exit9)
2265 test $0x02, %ah
2266 jnz L(Exit10)
2267 test $0x04, %ah
2268 jnz L(Exit11)
2269
2270 .p2align 4
2271L(Exit12):
2272 movlpd (%ecx), %xmm0
2273 movl 8(%ecx), %eax
2274 movlpd %xmm0, (%edx)
2275 movl %eax, 8(%edx)
2276 SAVE_RESULT (11)
2277# ifdef USE_AS_STRNCPY
2278 sub $12, %ebx
2279 lea 12(%edx), %ecx
2280 jnz L(StrncpyFillTailWithZero1)
2281# ifdef USE_AS_STPCPY
2282 cmpb $1, (%eax)
2283 sbb $-1, %eax
2284# endif
2285# endif
2286 RETURN1
2287
2288 .p2align 4
2289L(ExitHigh12):
2290 test $0x10, %ah
2291 jnz L(Exit13)
2292 test $0x20, %ah
2293 jnz L(Exit14)
2294 test $0x40, %ah
2295 jnz L(Exit15)
2296
2297 .p2align 4
2298L(Exit16):
2299 movdqu (%ecx), %xmm0
2300 movdqu %xmm0, (%edx)
2301 SAVE_RESULT (15)
2302# ifdef USE_AS_STRNCPY
2303 sub $16, %ebx
2304 lea 16(%edx), %ecx
2305 jnz L(StrncpyFillTailWithZero1)
2306# ifdef USE_AS_STPCPY
2307 cmpb $1, (%eax)
2308 sbb $-1, %eax
2309# endif
2310# endif
2311 RETURN1
2312
2313# ifdef USE_AS_STRNCPY
2314
2315 CFI_PUSH(%esi)
2316
2317 .p2align 4
2318L(CopyFrom1To16BytesCase2):
2319 add $16, %ebx
2320 add %esi, %ecx
2321 add %esi, %edx
2322
2323 POP (%esi)
2324
2325 test %al, %al
2326 jz L(ExitHighCase2)
2327
2328 cmp $8, %ebx
2329 ja L(CopyFrom1To16BytesLess8)
2330
2331 test $0x01, %al
2332 jnz L(Exit1)
2333 cmp $1, %ebx
2334 je L(Exit1)
2335 test $0x02, %al
2336 jnz L(Exit2)
2337 cmp $2, %ebx
2338 je L(Exit2)
2339 test $0x04, %al
2340 jnz L(Exit3)
2341 cmp $3, %ebx
2342 je L(Exit3)
2343 test $0x08, %al
2344 jnz L(Exit4)
2345 cmp $4, %ebx
2346 je L(Exit4)
2347 test $0x10, %al
2348 jnz L(Exit5)
2349 cmp $5, %ebx
2350 je L(Exit5)
2351 test $0x20, %al
2352 jnz L(Exit6)
2353 cmp $6, %ebx
2354 je L(Exit6)
2355 test $0x40, %al
2356 jnz L(Exit7)
2357 cmp $7, %ebx
2358 je L(Exit7)
2359 jmp L(Exit8)
2360
2361 .p2align 4
2362L(ExitHighCase2):
2363 cmp $8, %ebx
2364 jbe L(CopyFrom1To16BytesLess8Case3)
2365
2366 test $0x01, %ah
2367 jnz L(Exit9)
2368 cmp $9, %ebx
2369 je L(Exit9)
2370 test $0x02, %ah
2371 jnz L(Exit10)
2372 cmp $10, %ebx
2373 je L(Exit10)
2374 test $0x04, %ah
2375 jnz L(Exit11)
2376 cmp $11, %ebx
2377 je L(Exit11)
2378 test $0x8, %ah
2379 jnz L(Exit12)
2380 cmp $12, %ebx
2381 je L(Exit12)
2382 test $0x10, %ah
2383 jnz L(Exit13)
2384 cmp $13, %ebx
2385 je L(Exit13)
2386 test $0x20, %ah
2387 jnz L(Exit14)
2388 cmp $14, %ebx
2389 je L(Exit14)
2390 test $0x40, %ah
2391 jnz L(Exit15)
2392 cmp $15, %ebx
2393 je L(Exit15)
2394 jmp L(Exit16)
2395
2396 CFI_PUSH(%esi)
2397
2398 .p2align 4
2399L(CopyFrom1To16BytesCase2OrCase3):
2400 test %eax, %eax
2401 jnz L(CopyFrom1To16BytesCase2)
2402
2403 .p2align 4
2404L(CopyFrom1To16BytesCase3):
2405 add $16, %ebx
2406 add %esi, %edx
2407 add %esi, %ecx
2408
2409 POP (%esi)
2410
2411 cmp $8, %ebx
2412 ja L(ExitHigh8Case3)
2413
2414L(CopyFrom1To16BytesLess8Case3):
2415 cmp $4, %ebx
2416 ja L(ExitHigh4Case3)
2417
2418 cmp $1, %ebx
2419 je L(Exit1)
2420 cmp $2, %ebx
2421 je L(Exit2)
2422 cmp $3, %ebx
2423 je L(Exit3)
2424 movl (%ecx), %eax
2425 movl %eax, (%edx)
2426 SAVE_RESULT (4)
2427 RETURN1
2428
2429 .p2align 4
2430L(ExitHigh4Case3):
2431 cmp $5, %ebx
2432 je L(Exit5)
2433 cmp $6, %ebx
2434 je L(Exit6)
2435 cmp $7, %ebx
2436 je L(Exit7)
2437 movlpd (%ecx), %xmm0
2438 movlpd %xmm0, (%edx)
2439 SAVE_RESULT (8)
2440 RETURN1
2441
2442 .p2align 4
2443L(ExitHigh8Case3):
2444 cmp $12, %ebx
2445 ja L(ExitHigh12Case3)
2446
2447 cmp $9, %ebx
2448 je L(Exit9)
2449 cmp $10, %ebx
2450 je L(Exit10)
2451 cmp $11, %ebx
2452 je L(Exit11)
2453 movlpd (%ecx), %xmm0
2454 movl 8(%ecx), %eax
2455 movlpd %xmm0, (%edx)
2456 movl %eax, 8(%edx)
2457 SAVE_RESULT (12)
2458 RETURN1
2459
2460 .p2align 4
2461L(ExitHigh12Case3):
2462 cmp $13, %ebx
2463 je L(Exit13)
2464 cmp $14, %ebx
2465 je L(Exit14)
2466 cmp $15, %ebx
2467 je L(Exit15)
2468 movlpd (%ecx), %xmm0
2469 movlpd 8(%ecx), %xmm1
2470 movlpd %xmm0, (%edx)
2471 movlpd %xmm1, 8(%edx)
2472 SAVE_RESULT (16)
2473 RETURN1
2474
2475# endif
2476
2477 .p2align 4
2478L(Exit1):
2479 movb (%ecx), %al
2480 movb %al, (%edx)
2481 SAVE_RESULT (0)
2482# ifdef USE_AS_STRNCPY
2483 sub $1, %ebx
2484 lea 1(%edx), %ecx
2485 jnz L(StrncpyFillTailWithZero1)
2486# ifdef USE_AS_STPCPY
2487 cmpb $1, (%eax)
2488 sbb $-1, %eax
2489# endif
2490# endif
2491 RETURN1
2492
2493 .p2align 4
2494L(Exit2):
2495 movw (%ecx), %ax
2496 movw %ax, (%edx)
2497 SAVE_RESULT (1)
2498# ifdef USE_AS_STRNCPY
2499 sub $2, %ebx
2500 lea 2(%edx), %ecx
2501 jnz L(StrncpyFillTailWithZero1)
2502# ifdef USE_AS_STPCPY
2503 cmpb $1, (%eax)
2504 sbb $-1, %eax
2505# endif
2506# endif
2507 RETURN1
2508
2509 .p2align 4
2510L(Exit3):
2511 movw (%ecx), %ax
2512 movw %ax, (%edx)
2513 movb 2(%ecx), %al
2514 movb %al, 2(%edx)
2515 SAVE_RESULT (2)
2516# ifdef USE_AS_STRNCPY
2517 sub $3, %ebx
2518 lea 3(%edx), %ecx
2519 jnz L(StrncpyFillTailWithZero1)
2520# ifdef USE_AS_STPCPY
2521 cmpb $1, (%eax)
2522 sbb $-1, %eax
2523# endif
2524# endif
2525 RETURN1
2526
2527 .p2align 4
2528L(Exit5):
2529 movl (%ecx), %eax
2530 movl %eax, (%edx)
2531 movb 4(%ecx), %al
2532 movb %al, 4(%edx)
2533 SAVE_RESULT (4)
2534# ifdef USE_AS_STRNCPY
2535 sub $5, %ebx
2536 lea 5(%edx), %ecx
2537 jnz L(StrncpyFillTailWithZero1)
2538# ifdef USE_AS_STPCPY
2539 cmpb $1, (%eax)
2540 sbb $-1, %eax
2541# endif
2542# endif
2543 RETURN1
2544
2545 .p2align 4
2546L(Exit6):
2547 movl (%ecx), %eax
2548 movl %eax, (%edx)
2549 movw 4(%ecx), %ax
2550 movw %ax, 4(%edx)
2551 SAVE_RESULT (5)
2552# ifdef USE_AS_STRNCPY
2553 sub $6, %ebx
2554 lea 6(%edx), %ecx
2555 jnz L(StrncpyFillTailWithZero1)
2556# ifdef USE_AS_STPCPY
2557 cmpb $1, (%eax)
2558 sbb $-1, %eax
2559# endif
2560# endif
2561 RETURN1
2562
2563 .p2align 4
2564L(Exit7):
2565 movl (%ecx), %eax
2566 movl %eax, (%edx)
2567 movl 3(%ecx), %eax
2568 movl %eax, 3(%edx)
2569 SAVE_RESULT (6)
2570# ifdef USE_AS_STRNCPY
2571 sub $7, %ebx
2572 lea 7(%edx), %ecx
2573 jnz L(StrncpyFillTailWithZero1)
2574# ifdef USE_AS_STPCPY
2575 cmpb $1, (%eax)
2576 sbb $-1, %eax
2577# endif
2578# endif
2579 RETURN1
2580
2581 .p2align 4
2582L(Exit9):
2583 movlpd (%ecx), %xmm0
2584 movb 8(%ecx), %al
2585 movlpd %xmm0, (%edx)
2586 movb %al, 8(%edx)
2587 SAVE_RESULT (8)
2588# ifdef USE_AS_STRNCPY
2589 sub $9, %ebx
2590 lea 9(%edx), %ecx
2591 jnz L(StrncpyFillTailWithZero1)
2592# ifdef USE_AS_STPCPY
2593 cmpb $1, (%eax)
2594 sbb $-1, %eax
2595# endif
2596# endif
2597 RETURN1
2598
2599 .p2align 4
2600L(Exit10):
2601 movlpd (%ecx), %xmm0
2602 movw 8(%ecx), %ax
2603 movlpd %xmm0, (%edx)
2604 movw %ax, 8(%edx)
2605 SAVE_RESULT (9)
2606# ifdef USE_AS_STRNCPY
2607 sub $10, %ebx
2608 lea 10(%edx), %ecx
2609 jnz L(StrncpyFillTailWithZero1)
2610# ifdef USE_AS_STPCPY
2611 cmpb $1, (%eax)
2612 sbb $-1, %eax
2613# endif
2614# endif
2615 RETURN1
2616
2617 .p2align 4
2618L(Exit11):
2619 movlpd (%ecx), %xmm0
2620 movl 7(%ecx), %eax
2621 movlpd %xmm0, (%edx)
2622 movl %eax, 7(%edx)
2623 SAVE_RESULT (10)
2624# ifdef USE_AS_STRNCPY
2625 sub $11, %ebx
2626 lea 11(%edx), %ecx
2627 jnz L(StrncpyFillTailWithZero1)
2628# ifdef USE_AS_STPCPY
2629 cmpb $1, (%eax)
2630 sbb $-1, %eax
2631# endif
2632# endif
2633 RETURN1
2634
2635 .p2align 4
2636L(Exit13):
2637 movlpd (%ecx), %xmm0
2638 movlpd 5(%ecx), %xmm1
2639 movlpd %xmm0, (%edx)
2640 movlpd %xmm1, 5(%edx)
2641 SAVE_RESULT (12)
2642# ifdef USE_AS_STRNCPY
2643 sub $13, %ebx
2644 lea 13(%edx), %ecx
2645 jnz L(StrncpyFillTailWithZero1)
2646# ifdef USE_AS_STPCPY
2647 cmpb $1, (%eax)
2648 sbb $-1, %eax
2649# endif
2650# endif
2651 RETURN1
2652
2653 .p2align 4
2654L(Exit14):
2655 movlpd (%ecx), %xmm0
2656 movlpd 6(%ecx), %xmm1
2657 movlpd %xmm0, (%edx)
2658 movlpd %xmm1, 6(%edx)
2659 SAVE_RESULT (13)
2660# ifdef USE_AS_STRNCPY
2661 sub $14, %ebx
2662 lea 14(%edx), %ecx
2663 jnz L(StrncpyFillTailWithZero1)
2664# ifdef USE_AS_STPCPY
2665 cmpb $1, (%eax)
2666 sbb $-1, %eax
2667# endif
2668# endif
2669 RETURN1
2670
2671 .p2align 4
2672L(Exit15):
2673 movlpd (%ecx), %xmm0
2674 movlpd 7(%ecx), %xmm1
2675 movlpd %xmm0, (%edx)
2676 movlpd %xmm1, 7(%edx)
2677 SAVE_RESULT (14)
2678# ifdef USE_AS_STRNCPY
2679 sub $15, %ebx
2680 lea 15(%edx), %ecx
2681 jnz L(StrncpyFillTailWithZero1)
2682# ifdef USE_AS_STPCPY
2683 cmpb $1, (%eax)
2684 sbb $-1, %eax
2685# endif
2686# endif
2687 RETURN1
2688
2689CFI_POP (%edi)
2690
2691# ifdef USE_AS_STRNCPY
2692 .p2align 4
2693L(Fill0):
2694 RETURN
2695
2696 .p2align 4
2697L(Fill1):
2698 movb %dl, (%ecx)
2699 RETURN
2700
2701 .p2align 4
2702L(Fill2):
2703 movw %dx, (%ecx)
2704 RETURN
2705
2706 .p2align 4
2707L(Fill3):
2708 movw %dx, (%ecx)
2709 movb %dl, 2(%ecx)
2710 RETURN
2711
2712 .p2align 4
2713L(Fill4):
2714 movl %edx, (%ecx)
2715 RETURN
2716
2717 .p2align 4
2718L(Fill5):
2719 movl %edx, (%ecx)
2720 movb %dl, 4(%ecx)
2721 RETURN
2722
2723 .p2align 4
2724L(Fill6):
2725 movl %edx, (%ecx)
2726 movw %dx, 4(%ecx)
2727 RETURN
2728
2729 .p2align 4
2730L(Fill7):
2731 movl %edx, (%ecx)
2732 movl %edx, 3(%ecx)
2733 RETURN
2734
2735 .p2align 4
2736L(Fill8):
2737 movlpd %xmm0, (%ecx)
2738 RETURN
2739
2740 .p2align 4
2741L(Fill9):
2742 movlpd %xmm0, (%ecx)
2743 movb %dl, 8(%ecx)
2744 RETURN
2745
2746 .p2align 4
2747L(Fill10):
2748 movlpd %xmm0, (%ecx)
2749 movw %dx, 8(%ecx)
2750 RETURN
2751
2752 .p2align 4
2753L(Fill11):
2754 movlpd %xmm0, (%ecx)
2755 movl %edx, 7(%ecx)
2756 RETURN
2757
2758 .p2align 4
2759L(Fill12):
2760 movlpd %xmm0, (%ecx)
2761 movl %edx, 8(%ecx)
2762 RETURN
2763
2764 .p2align 4
2765L(Fill13):
2766 movlpd %xmm0, (%ecx)
2767 movlpd %xmm0, 5(%ecx)
2768 RETURN
2769
2770 .p2align 4
2771L(Fill14):
2772 movlpd %xmm0, (%ecx)
2773 movlpd %xmm0, 6(%ecx)
2774 RETURN
2775
2776 .p2align 4
2777L(Fill15):
2778 movlpd %xmm0, (%ecx)
2779 movlpd %xmm0, 7(%ecx)
2780 RETURN
2781
2782 .p2align 4
2783L(Fill16):
2784 movlpd %xmm0, (%ecx)
2785 movlpd %xmm0, 8(%ecx)
2786 RETURN
2787
2788 .p2align 4
2789L(StrncpyFillExit1):
2790 lea 16(%ebx), %ebx
2791L(FillFrom1To16Bytes):
2792 test %ebx, %ebx
2793 jz L(Fill0)
2794 cmp $16, %ebx
2795 je L(Fill16)
2796 cmp $8, %ebx
2797 je L(Fill8)
2798 jg L(FillMore8)
2799 cmp $4, %ebx
2800 je L(Fill4)
2801 jg L(FillMore4)
2802 cmp $2, %ebx
2803 jl L(Fill1)
2804 je L(Fill2)
2805 jg L(Fill3)
2806L(FillMore8): /* but less than 16 */
2807 cmp $12, %ebx
2808 je L(Fill12)
2809 jl L(FillLess12)
2810 cmp $14, %ebx
2811 jl L(Fill13)
2812 je L(Fill14)
2813 jg L(Fill15)
2814L(FillMore4): /* but less than 8 */
2815 cmp $6, %ebx
2816 jl L(Fill5)
2817 je L(Fill6)
2818 jg L(Fill7)
2819L(FillLess12): /* but more than 8 */
2820 cmp $10, %ebx
2821 jl L(Fill9)
2822 je L(Fill10)
2823 jmp L(Fill11)
2824
2825 CFI_PUSH(%edi)
2826
2827 .p2align 4
2828L(StrncpyFillTailWithZero1):
2829 POP (%edi)
2830L(StrncpyFillTailWithZero):
2831 pxor %xmm0, %xmm0
2832 xor %edx, %edx
2833 sub $16, %ebx
2834 jbe L(StrncpyFillExit1)
2835
2836 movlpd %xmm0, (%ecx)
2837 movlpd %xmm0, 8(%ecx)
2838
2839 lea 16(%ecx), %ecx
2840
2841 mov %ecx, %edx
2842 and $0xf, %edx
2843 sub %edx, %ecx
2844 add %edx, %ebx
2845 xor %edx, %edx
2846 sub $64, %ebx
2847 jb L(StrncpyFillLess64)
2848
2849L(StrncpyFillLoopMovdqa):
2850 movdqa %xmm0, (%ecx)
2851 movdqa %xmm0, 16(%ecx)
2852 movdqa %xmm0, 32(%ecx)
2853 movdqa %xmm0, 48(%ecx)
2854 lea 64(%ecx), %ecx
2855 sub $64, %ebx
2856 jae L(StrncpyFillLoopMovdqa)
2857
2858L(StrncpyFillLess64):
2859 add $32, %ebx
2860 jl L(StrncpyFillLess32)
2861 movdqa %xmm0, (%ecx)
2862 movdqa %xmm0, 16(%ecx)
2863 lea 32(%ecx), %ecx
2864 sub $16, %ebx
2865 jl L(StrncpyFillExit1)
2866 movdqa %xmm0, (%ecx)
2867 lea 16(%ecx), %ecx
2868 jmp L(FillFrom1To16Bytes)
2869
2870L(StrncpyFillLess32):
2871 add $16, %ebx
2872 jl L(StrncpyFillExit1)
2873 movdqa %xmm0, (%ecx)
2874 lea 16(%ecx), %ecx
2875 jmp L(FillFrom1To16Bytes)
2876# endif
2877
2878 .p2align 4
2879L(ExitTail1):
2880 movb (%ecx), %al
2881 movb %al, (%edx)
2882 SAVE_RESULT_TAIL (0)
2883# ifdef USE_AS_STRNCPY
2884 sub $1, %ebx
2885 lea 1(%edx), %ecx
2886 jnz L(StrncpyFillTailWithZero)
2887# ifdef USE_AS_STPCPY
2888 cmpb $1, (%eax)
2889 sbb $-1, %eax
2890# endif
2891# endif
2892 RETURN
2893
2894 .p2align 4
2895L(ExitTail2):
2896 movw (%ecx), %ax
2897 movw %ax, (%edx)
2898 SAVE_RESULT_TAIL (1)
2899# ifdef USE_AS_STRNCPY
2900 sub $2, %ebx
2901 lea 2(%edx), %ecx
2902 jnz L(StrncpyFillTailWithZero)
2903# ifdef USE_AS_STPCPY
2904 cmpb $1, (%eax)
2905 sbb $-1, %eax
2906# endif
2907# endif
2908 RETURN
2909
2910 .p2align 4
2911L(ExitTail3):
2912 movw (%ecx), %ax
2913 movw %ax, (%edx)
2914 movb 2(%ecx), %al
2915 movb %al, 2(%edx)
2916 SAVE_RESULT_TAIL (2)
2917# ifdef USE_AS_STRNCPY
2918 sub $3, %ebx
2919 lea 3(%edx), %ecx
2920 jnz L(StrncpyFillTailWithZero)
2921# ifdef USE_AS_STPCPY
2922 cmpb $1, (%eax)
2923 sbb $-1, %eax
2924# endif
2925# endif
2926 RETURN
2927
2928 .p2align 4
2929L(ExitTail4):
2930 movl (%ecx), %eax
2931 movl %eax, (%edx)
2932 SAVE_RESULT_TAIL (3)
2933# ifdef USE_AS_STRNCPY
2934 sub $4, %ebx
2935 lea 4(%edx), %ecx
2936 jnz L(StrncpyFillTailWithZero)
2937# ifdef USE_AS_STPCPY
2938 cmpb $1, (%eax)
2939 sbb $-1, %eax
2940# endif
2941# endif
2942 RETURN
2943
2944 .p2align 4
2945L(ExitTail5):
2946 movl (%ecx), %eax
2947 movl %eax, (%edx)
2948 movb 4(%ecx), %al
2949 movb %al, 4(%edx)
2950 SAVE_RESULT_TAIL (4)
2951# ifdef USE_AS_STRNCPY
2952 sub $5, %ebx
2953 lea 5(%edx), %ecx
2954 jnz L(StrncpyFillTailWithZero)
2955# ifdef USE_AS_STPCPY
2956 cmpb $1, (%eax)
2957 sbb $-1, %eax
2958# endif
2959# endif
2960 RETURN
2961
2962 .p2align 4
2963L(ExitTail6):
2964 movl (%ecx), %eax
2965 movl %eax, (%edx)
2966 movw 4(%ecx), %ax
2967 movw %ax, 4(%edx)
2968 SAVE_RESULT_TAIL (5)
2969# ifdef USE_AS_STRNCPY
2970 sub $6, %ebx
2971 lea 6(%edx), %ecx
2972 jnz L(StrncpyFillTailWithZero)
2973# ifdef USE_AS_STPCPY
2974 cmpb $1, (%eax)
2975 sbb $-1, %eax
2976# endif
2977# endif
2978 RETURN
2979
2980 .p2align 4
2981L(ExitTail7):
2982 movl (%ecx), %eax
2983 movl %eax, (%edx)
2984 movl 3(%ecx), %eax
2985 movl %eax, 3(%edx)
2986 SAVE_RESULT_TAIL (6)
2987# ifdef USE_AS_STRNCPY
2988 sub $7, %ebx
2989 lea 7(%edx), %ecx
2990 jnz L(StrncpyFillTailWithZero)
2991# ifdef USE_AS_STPCPY
2992 cmpb $1, (%eax)
2993 sbb $-1, %eax
2994# endif
2995# endif
2996 RETURN
2997
2998 .p2align 4
2999L(ExitTail8):
3000 movlpd (%ecx), %xmm0
3001 movlpd %xmm0, (%edx)
3002 SAVE_RESULT_TAIL (7)
3003# ifdef USE_AS_STRNCPY
3004 sub $8, %ebx
3005 lea 8(%edx), %ecx
3006 jnz L(StrncpyFillTailWithZero)
3007# endif
3008 RETURN
3009
3010 .p2align 4
3011L(ExitTail9):
3012 movlpd (%ecx), %xmm0
3013 movb 8(%ecx), %al
3014 movlpd %xmm0, (%edx)
3015 movb %al, 8(%edx)
3016 SAVE_RESULT_TAIL (8)
3017# ifdef USE_AS_STRNCPY
3018 sub $9, %ebx
3019 lea 9(%edx), %ecx
3020 jnz L(StrncpyFillTailWithZero)
3021# ifdef USE_AS_STPCPY
3022 cmpb $1, (%eax)
3023 sbb $-1, %eax
3024# endif
3025# endif
3026 RETURN
3027
3028 .p2align 4
3029L(ExitTail10):
3030 movlpd (%ecx), %xmm0
3031 movw 8(%ecx), %ax
3032 movlpd %xmm0, (%edx)
3033 movw %ax, 8(%edx)
3034 SAVE_RESULT_TAIL (9)
3035# ifdef USE_AS_STRNCPY
3036 sub $10, %ebx
3037 lea 10(%edx), %ecx
3038 jnz L(StrncpyFillTailWithZero)
3039# ifdef USE_AS_STPCPY
3040 cmpb $1, (%eax)
3041 sbb $-1, %eax
3042# endif
3043# endif
3044 RETURN
3045
3046 .p2align 4
3047L(ExitTail11):
3048 movlpd (%ecx), %xmm0
3049 movl 7(%ecx), %eax
3050 movlpd %xmm0, (%edx)
3051 movl %eax, 7(%edx)
3052 SAVE_RESULT_TAIL (10)
3053# ifdef USE_AS_STRNCPY
3054 sub $11, %ebx
3055 lea 11(%edx), %ecx
3056 jnz L(StrncpyFillTailWithZero)
3057# ifdef USE_AS_STPCPY
3058 cmpb $1, (%eax)
3059 sbb $-1, %eax
3060# endif
3061# endif
3062 RETURN
3063
3064 .p2align 4
3065L(ExitTail12):
3066 movlpd (%ecx), %xmm0
3067 movl 8(%ecx), %eax
3068 movlpd %xmm0, (%edx)
3069 movl %eax, 8(%edx)
3070 SAVE_RESULT_TAIL (11)
3071# ifdef USE_AS_STRNCPY
3072 sub $12, %ebx
3073 lea 12(%edx), %ecx
3074 jnz L(StrncpyFillTailWithZero)
3075# ifdef USE_AS_STPCPY
3076 cmpb $1, (%eax)
3077 sbb $-1, %eax
3078# endif
3079# endif
3080 RETURN
3081
3082 .p2align 4
3083L(ExitTail13):
3084 movlpd (%ecx), %xmm0
3085 movlpd 5(%ecx), %xmm1
3086 movlpd %xmm0, (%edx)
3087 movlpd %xmm1, 5(%edx)
3088 SAVE_RESULT_TAIL (12)
3089# ifdef USE_AS_STRNCPY
3090 sub $13, %ebx
3091 lea 13(%edx), %ecx
3092 jnz L(StrncpyFillTailWithZero)
3093# ifdef USE_AS_STPCPY
3094 cmpb $1, (%eax)
3095 sbb $-1, %eax
3096# endif
3097# endif
3098 RETURN
3099
3100 .p2align 4
3101L(ExitTail14):
3102 movlpd (%ecx), %xmm0
3103 movlpd 6(%ecx), %xmm1
3104 movlpd %xmm0, (%edx)
3105 movlpd %xmm1, 6(%edx)
3106 SAVE_RESULT_TAIL (13)
3107# ifdef USE_AS_STRNCPY
3108 sub $14, %ebx
3109 lea 14(%edx), %ecx
3110 jnz L(StrncpyFillTailWithZero)
3111# ifdef USE_AS_STPCPY
3112 cmpb $1, (%eax)
3113 sbb $-1, %eax
3114# endif
3115# endif
3116 RETURN
3117
3118 .p2align 4
3119L(ExitTail15):
3120 movlpd (%ecx), %xmm0
3121 movlpd 7(%ecx), %xmm1
3122 movlpd %xmm0, (%edx)
3123 movlpd %xmm1, 7(%edx)
3124 SAVE_RESULT_TAIL (14)
3125# ifdef USE_AS_STRNCPY
3126 sub $15, %ebx
3127 lea 15(%edx), %ecx
3128 jnz L(StrncpyFillTailWithZero)
3129# endif
3130 RETURN
3131
3132 .p2align 4
3133L(ExitTail16):
3134 movdqu (%ecx), %xmm0
3135 movdqu %xmm0, (%edx)
3136 SAVE_RESULT_TAIL (15)
3137# ifdef USE_AS_STRNCPY
3138 sub $16, %ebx
3139 lea 16(%edx), %ecx
3140 jnz L(StrncpyFillTailWithZero)
3141# ifdef USE_AS_STPCPY
3142 cmpb $1, (%eax)
3143 sbb $-1, %eax
3144# endif
3145# endif
3146 RETURN
3147#endif
3148
3149#ifdef USE_AS_STRNCPY
3150# ifndef USE_AS_STRCAT
3151 CFI_PUSH (%esi)
3152 CFI_PUSH (%edi)
3153# endif
3154 .p2align 4
3155L(StrncpyLeaveCase2OrCase3):
3156 test %eax, %eax
3157 jnz L(Aligned64LeaveCase2)
3158
3159L(Aligned64LeaveCase3):
3160 add $48, %ebx
3161 jle L(CopyFrom1To16BytesCase3)
3162 movaps %xmm4, -64(%edx)
3163 lea 16(%esi), %esi
3164 sub $16, %ebx
3165 jbe L(CopyFrom1To16BytesCase3)
3166 movaps %xmm5, -48(%edx)
3167 lea 16(%esi), %esi
3168 sub $16, %ebx
3169 jbe L(CopyFrom1To16BytesCase3)
3170 movaps %xmm6, -32(%edx)
3171 lea 16(%esi), %esi
3172 lea -16(%ebx), %ebx
3173 jmp L(CopyFrom1To16BytesCase3)
3174
3175L(Aligned64LeaveCase2):
3176 pcmpeqb %xmm4, %xmm0
3177 pmovmskb %xmm0, %eax
3178 add $48, %ebx
3179 jle L(CopyFrom1To16BytesCase2OrCase3)
3180 test %eax, %eax
3181 jnz L(CopyFrom1To16Bytes)
3182
3183 pcmpeqb %xmm5, %xmm0
3184 pmovmskb %xmm0, %eax
3185 movaps %xmm4, -64(%edx)
3186 lea 16(%esi), %esi
3187 sub $16, %ebx
3188 jbe L(CopyFrom1To16BytesCase2OrCase3)
3189 test %eax, %eax
3190 jnz L(CopyFrom1To16Bytes)
3191
3192 pcmpeqb %xmm6, %xmm0
3193 pmovmskb %xmm0, %eax
3194 movaps %xmm5, -48(%edx)
3195 lea 16(%esi), %esi
3196 sub $16, %ebx
3197 jbe L(CopyFrom1To16BytesCase2OrCase3)
3198 test %eax, %eax
3199 jnz L(CopyFrom1To16Bytes)
3200
3201 pcmpeqb %xmm7, %xmm0
3202 pmovmskb %xmm0, %eax
3203 movaps %xmm6, -32(%edx)
3204 lea 16(%esi), %esi
3205 lea -16(%ebx), %ebx
3206 jmp L(CopyFrom1To16BytesCase2)
3207
3208/*--------------------------------------------------*/
3209 .p2align 4
3210L(StrncpyExit1Case2OrCase3):
3211 movlpd (%ecx), %xmm0
3212 movlpd 7(%ecx), %xmm1
3213 movlpd %xmm0, (%edx)
3214 movlpd %xmm1, 7(%edx)
3215 mov $15, %esi
3216 test %eax, %eax
3217 jnz L(CopyFrom1To16BytesCase2)
3218 jmp L(CopyFrom1To16BytesCase3)
3219
3220 .p2align 4
3221L(StrncpyExit2Case2OrCase3):
3222 movlpd (%ecx), %xmm0
3223 movlpd 6(%ecx), %xmm1
3224 movlpd %xmm0, (%edx)
3225 movlpd %xmm1, 6(%edx)
3226 mov $14, %esi
3227 test %eax, %eax
3228 jnz L(CopyFrom1To16BytesCase2)
3229 jmp L(CopyFrom1To16BytesCase3)
3230
3231 .p2align 4
3232L(StrncpyExit3Case2OrCase3):
3233 movlpd (%ecx), %xmm0
3234 movlpd 5(%ecx), %xmm1
3235 movlpd %xmm0, (%edx)
3236 movlpd %xmm1, 5(%edx)
3237 mov $13, %esi
3238 test %eax, %eax
3239 jnz L(CopyFrom1To16BytesCase2)
3240 jmp L(CopyFrom1To16BytesCase3)
3241
3242 .p2align 4
3243L(StrncpyExit4Case2OrCase3):
3244 movlpd (%ecx), %xmm0
3245 movl 8(%ecx), %esi
3246 movlpd %xmm0, (%edx)
3247 movl %esi, 8(%edx)
3248 mov $12, %esi
3249 test %eax, %eax
3250 jnz L(CopyFrom1To16BytesCase2)
3251 jmp L(CopyFrom1To16BytesCase3)
3252
3253 .p2align 4
3254L(StrncpyExit5Case2OrCase3):
3255 movlpd (%ecx), %xmm0
3256 movl 7(%ecx), %esi
3257 movlpd %xmm0, (%edx)
3258 movl %esi, 7(%edx)
3259 mov $11, %esi
3260 test %eax, %eax
3261 jnz L(CopyFrom1To16BytesCase2)
3262 jmp L(CopyFrom1To16BytesCase3)
3263
3264 .p2align 4
3265L(StrncpyExit6Case2OrCase3):
3266 movlpd (%ecx), %xmm0
3267 movl 6(%ecx), %esi
3268 movlpd %xmm0, (%edx)
3269 movl %esi, 6(%edx)
3270 mov $10, %esi
3271 test %eax, %eax
3272 jnz L(CopyFrom1To16BytesCase2)
3273 jmp L(CopyFrom1To16BytesCase3)
3274
3275 .p2align 4
3276L(StrncpyExit7Case2OrCase3):
3277 movlpd (%ecx), %xmm0
3278 movl 5(%ecx), %esi
3279 movlpd %xmm0, (%edx)
3280 movl %esi, 5(%edx)
3281 mov $9, %esi
3282 test %eax, %eax
3283 jnz L(CopyFrom1To16BytesCase2)
3284 jmp L(CopyFrom1To16BytesCase3)
3285
3286 .p2align 4
3287L(StrncpyExit8Case2OrCase3):
3288 movlpd (%ecx), %xmm0
3289 movlpd %xmm0, (%edx)
3290 mov $8, %esi
3291 test %eax, %eax
3292 jnz L(CopyFrom1To16BytesCase2)
3293 jmp L(CopyFrom1To16BytesCase3)
3294
3295 .p2align 4
3296L(StrncpyExit9Case2OrCase3):
3297 movlpd (%ecx), %xmm0
3298 movlpd %xmm0, (%edx)
3299 mov $7, %esi
3300 test %eax, %eax
3301 jnz L(CopyFrom1To16BytesCase2)
3302 jmp L(CopyFrom1To16BytesCase3)
3303
3304 .p2align 4
3305L(StrncpyExit10Case2OrCase3):
3306 movlpd -1(%ecx), %xmm0
3307 movlpd %xmm0, -1(%edx)
3308 mov $6, %esi
3309 test %eax, %eax
3310 jnz L(CopyFrom1To16BytesCase2)
3311 jmp L(CopyFrom1To16BytesCase3)
3312
3313 .p2align 4
3314L(StrncpyExit11Case2OrCase3):
3315 movlpd -2(%ecx), %xmm0
3316 movlpd %xmm0, -2(%edx)
3317 mov $5, %esi
3318 test %eax, %eax
3319 jnz L(CopyFrom1To16BytesCase2)
3320 jmp L(CopyFrom1To16BytesCase3)
3321
3322 .p2align 4
3323L(StrncpyExit12Case2OrCase3):
3324 movl (%ecx), %esi
3325 movl %esi, (%edx)
3326 mov $4, %esi
3327 test %eax, %eax
3328 jnz L(CopyFrom1To16BytesCase2)
3329 jmp L(CopyFrom1To16BytesCase3)
3330
3331 .p2align 4
3332L(StrncpyExit13Case2OrCase3):
3333 movl -1(%ecx), %esi
3334 movl %esi, -1(%edx)
3335 mov $3, %esi
3336 test %eax, %eax
3337 jnz L(CopyFrom1To16BytesCase2)
3338 jmp L(CopyFrom1To16BytesCase3)
3339
3340 .p2align 4
3341L(StrncpyExit14Case2OrCase3):
3342 movl -2(%ecx), %esi
3343 movl %esi, -2(%edx)
3344 mov $2, %esi
3345 test %eax, %eax
3346 jnz L(CopyFrom1To16BytesCase2)
3347 jmp L(CopyFrom1To16BytesCase3)
3348
3349 .p2align 4
3350L(StrncpyExit15Case2OrCase3):
3351 movl -3(%ecx), %esi
3352 movl %esi, -3(%edx)
3353 mov $1, %esi
3354 test %eax, %eax
3355 jnz L(CopyFrom1To16BytesCase2)
3356 jmp L(CopyFrom1To16BytesCase3)
3357
3358L(StrncpyLeave1):
3359 movaps %xmm2, %xmm3
3360 add $48, %ebx
3361 jle L(StrncpyExit1)
3362 palignr $1, %xmm1, %xmm2
3363 movaps %xmm2, (%edx)
3364 movaps 31(%ecx), %xmm2
3365 lea 16(%esi), %esi
3366 sub $16, %ebx
3367 jbe L(StrncpyExit1)
3368 palignr $1, %xmm3, %xmm2
3369 movaps %xmm2, 16(%edx)
3370 lea 16(%esi), %esi
3371 sub $16, %ebx
3372 jbe L(StrncpyExit1)
3373 movaps %xmm4, 32(%edx)
3374 lea 16(%esi), %esi
3375 sub $16, %ebx
3376 jbe L(StrncpyExit1)
3377 movaps %xmm5, 48(%edx)
3378 lea 16(%esi), %esi
3379 lea -16(%ebx), %ebx
3380L(StrncpyExit1):
3381 lea 15(%edx, %esi), %edx
3382 lea 15(%ecx, %esi), %ecx
3383 movdqu -16(%ecx), %xmm0
3384 xor %esi, %esi
3385 movdqu %xmm0, -16(%edx)
3386 jmp L(CopyFrom1To16BytesCase3)
3387
3388L(StrncpyLeave2):
3389 movaps %xmm2, %xmm3
3390 add $48, %ebx
3391 jle L(StrncpyExit2)
3392 palignr $2, %xmm1, %xmm2
3393 movaps %xmm2, (%edx)
3394 movaps 30(%ecx), %xmm2
3395 lea 16(%esi), %esi
3396 sub $16, %ebx
3397 jbe L(StrncpyExit2)
3398 palignr $2, %xmm3, %xmm2
3399 movaps %xmm2, 16(%edx)
3400 lea 16(%esi), %esi
3401 sub $16, %ebx
3402 jbe L(StrncpyExit2)
3403 movaps %xmm4, 32(%edx)
3404 lea 16(%esi), %esi
3405 sub $16, %ebx
3406 jbe L(StrncpyExit2)
3407 movaps %xmm5, 48(%edx)
3408 lea 16(%esi), %esi
3409 lea -16(%ebx), %ebx
3410L(StrncpyExit2):
3411 lea 14(%edx, %esi), %edx
3412 lea 14(%ecx, %esi), %ecx
3413 movdqu -16(%ecx), %xmm0
3414 xor %esi, %esi
3415 movdqu %xmm0, -16(%edx)
3416 jmp L(CopyFrom1To16BytesCase3)
3417
3418L(StrncpyLeave3):
3419 movaps %xmm2, %xmm3
3420 add $48, %ebx
3421 jle L(StrncpyExit3)
3422 palignr $3, %xmm1, %xmm2
3423 movaps %xmm2, (%edx)
3424 movaps 29(%ecx), %xmm2
3425 lea 16(%esi), %esi
3426 sub $16, %ebx
3427 jbe L(StrncpyExit3)
3428 palignr $3, %xmm3, %xmm2
3429 movaps %xmm2, 16(%edx)
3430 lea 16(%esi), %esi
3431 sub $16, %ebx
3432 jbe L(StrncpyExit3)
3433 movaps %xmm4, 32(%edx)
3434 lea 16(%esi), %esi
3435 sub $16, %ebx
3436 jbe L(StrncpyExit3)
3437 movaps %xmm5, 48(%edx)
3438 lea 16(%esi), %esi
3439 lea -16(%ebx), %ebx
3440L(StrncpyExit3):
3441 lea 13(%edx, %esi), %edx
3442 lea 13(%ecx, %esi), %ecx
3443 movdqu -16(%ecx), %xmm0
3444 xor %esi, %esi
3445 movdqu %xmm0, -16(%edx)
3446 jmp L(CopyFrom1To16BytesCase3)
3447
3448L(StrncpyLeave4):
3449 movaps %xmm2, %xmm3
3450 add $48, %ebx
3451 jle L(StrncpyExit4)
3452 palignr $4, %xmm1, %xmm2
3453 movaps %xmm2, (%edx)
3454 movaps 28(%ecx), %xmm2
3455 lea 16(%esi), %esi
3456 sub $16, %ebx
3457 jbe L(StrncpyExit4)
3458 palignr $4, %xmm3, %xmm2
3459 movaps %xmm2, 16(%edx)
3460 lea 16(%esi), %esi
3461 sub $16, %ebx
3462 jbe L(StrncpyExit4)
3463 movaps %xmm4, 32(%edx)
3464 lea 16(%esi), %esi
3465 sub $16, %ebx
3466 jbe L(StrncpyExit4)
3467 movaps %xmm5, 48(%edx)
3468 lea 16(%esi), %esi
3469 lea -16(%ebx), %ebx
3470L(StrncpyExit4):
3471 lea 12(%edx, %esi), %edx
3472 lea 12(%ecx, %esi), %ecx
3473 movlpd -12(%ecx), %xmm0
3474 movl -4(%ecx), %eax
3475 movlpd %xmm0, -12(%edx)
3476 movl %eax, -4(%edx)
3477 xor %esi, %esi
3478 jmp L(CopyFrom1To16BytesCase3)
3479
3480L(StrncpyLeave5):
3481 movaps %xmm2, %xmm3
3482 add $48, %ebx
3483 jle L(StrncpyExit5)
3484 palignr $5, %xmm1, %xmm2
3485 movaps %xmm2, (%edx)
3486 movaps 27(%ecx), %xmm2
3487 lea 16(%esi), %esi
3488 sub $16, %ebx
3489 jbe L(StrncpyExit5)
3490 palignr $5, %xmm3, %xmm2
3491 movaps %xmm2, 16(%edx)
3492 lea 16(%esi), %esi
3493 sub $16, %ebx
3494 jbe L(StrncpyExit5)
3495 movaps %xmm4, 32(%edx)
3496 lea 16(%esi), %esi
3497 sub $16, %ebx
3498 jbe L(StrncpyExit5)
3499 movaps %xmm5, 48(%edx)
3500 lea 16(%esi), %esi
3501 lea -16(%ebx), %ebx
3502L(StrncpyExit5):
3503 lea 11(%edx, %esi), %edx
3504 lea 11(%ecx, %esi), %ecx
3505 movlpd -11(%ecx), %xmm0
3506 movl -4(%ecx), %eax
3507 movlpd %xmm0, -11(%edx)
3508 movl %eax, -4(%edx)
3509 xor %esi, %esi
3510 jmp L(CopyFrom1To16BytesCase3)
3511
3512L(StrncpyLeave6):
3513 movaps %xmm2, %xmm3
3514 add $48, %ebx
3515 jle L(StrncpyExit6)
3516 palignr $6, %xmm1, %xmm2
3517 movaps %xmm2, (%edx)
3518 movaps 26(%ecx), %xmm2
3519 lea 16(%esi), %esi
3520 sub $16, %ebx
3521 jbe L(StrncpyExit6)
3522 palignr $6, %xmm3, %xmm2
3523 movaps %xmm2, 16(%edx)
3524 lea 16(%esi), %esi
3525 sub $16, %ebx
3526 jbe L(StrncpyExit6)
3527 movaps %xmm4, 32(%edx)
3528 lea 16(%esi), %esi
3529 sub $16, %ebx
3530 jbe L(StrncpyExit6)
3531 movaps %xmm5, 48(%edx)
3532 lea 16(%esi), %esi
3533 lea -16(%ebx), %ebx
3534L(StrncpyExit6):
3535 lea 10(%edx, %esi), %edx
3536 lea 10(%ecx, %esi), %ecx
3537
3538 movlpd -10(%ecx), %xmm0
3539 movw -2(%ecx), %ax
3540 movlpd %xmm0, -10(%edx)
3541 movw %ax, -2(%edx)
3542 xor %esi, %esi
3543 jmp L(CopyFrom1To16BytesCase3)
3544
3545L(StrncpyLeave7):
3546 movaps %xmm2, %xmm3
3547 add $48, %ebx
3548 jle L(StrncpyExit7)
3549 palignr $7, %xmm1, %xmm2
3550 movaps %xmm2, (%edx)
3551 movaps 25(%ecx), %xmm2
3552 lea 16(%esi), %esi
3553 sub $16, %ebx
3554 jbe L(StrncpyExit7)
3555 palignr $7, %xmm3, %xmm2
3556 movaps %xmm2, 16(%edx)
3557 lea 16(%esi), %esi
3558 sub $16, %ebx
3559 jbe L(StrncpyExit7)
3560 movaps %xmm4, 32(%edx)
3561 lea 16(%esi), %esi
3562 sub $16, %ebx
3563 jbe L(StrncpyExit7)
3564 movaps %xmm5, 48(%edx)
3565 lea 16(%esi), %esi
3566 lea -16(%ebx), %ebx
3567L(StrncpyExit7):
3568 lea 9(%edx, %esi), %edx
3569 lea 9(%ecx, %esi), %ecx
3570
3571 movlpd -9(%ecx), %xmm0
3572 movb -1(%ecx), %ah
3573 movlpd %xmm0, -9(%edx)
3574 movb %ah, -1(%edx)
3575 xor %esi, %esi
3576 jmp L(CopyFrom1To16BytesCase3)
3577
3578L(StrncpyLeave8):
3579 movaps %xmm2, %xmm3
3580 add $48, %ebx
3581 jle L(StrncpyExit8)
3582 palignr $8, %xmm1, %xmm2
3583 movaps %xmm2, (%edx)
3584 movaps 24(%ecx), %xmm2
3585 lea 16(%esi), %esi
3586 sub $16, %ebx
3587 jbe L(StrncpyExit8)
3588 palignr $8, %xmm3, %xmm2
3589 movaps %xmm2, 16(%edx)
3590 lea 16(%esi), %esi
3591 sub $16, %ebx
3592 jbe L(StrncpyExit8)
3593 movaps %xmm4, 32(%edx)
3594 lea 16(%esi), %esi
3595 sub $16, %ebx
3596 jbe L(StrncpyExit8)
3597 movaps %xmm5, 48(%edx)
3598 lea 16(%esi), %esi
3599 lea -16(%ebx), %ebx
3600L(StrncpyExit8):
3601 lea 8(%edx, %esi), %edx
3602 lea 8(%ecx, %esi), %ecx
3603 movlpd -8(%ecx), %xmm0
3604 movlpd %xmm0, -8(%edx)
3605 xor %esi, %esi
3606 jmp L(CopyFrom1To16BytesCase3)
3607
3608L(StrncpyLeave9):
3609 movaps %xmm2, %xmm3
3610 add $48, %ebx
3611 jle L(StrncpyExit9)
3612 palignr $9, %xmm1, %xmm2
3613 movaps %xmm2, (%edx)
3614 movaps 23(%ecx), %xmm2
3615 lea 16(%esi), %esi
3616 sub $16, %ebx
3617 jbe L(StrncpyExit9)
3618 palignr $9, %xmm3, %xmm2
3619 movaps %xmm2, 16(%edx)
3620 lea 16(%esi), %esi
3621 sub $16, %ebx
3622 jbe L(StrncpyExit9)
3623 movaps %xmm4, 32(%edx)
3624 lea 16(%esi), %esi
3625 sub $16, %ebx
3626 jbe L(StrncpyExit9)
3627 movaps %xmm5, 48(%edx)
3628 lea 16(%esi), %esi
3629 lea -16(%ebx), %ebx
3630L(StrncpyExit9):
3631 lea 7(%edx, %esi), %edx
3632 lea 7(%ecx, %esi), %ecx
3633
3634 movlpd -8(%ecx), %xmm0
3635 movlpd %xmm0, -8(%edx)
3636 xor %esi, %esi
3637 jmp L(CopyFrom1To16BytesCase3)
3638
3639L(StrncpyLeave10):
3640 movaps %xmm2, %xmm3
3641 add $48, %ebx
3642 jle L(StrncpyExit10)
3643 palignr $10, %xmm1, %xmm2
3644 movaps %xmm2, (%edx)
3645 movaps 22(%ecx), %xmm2
3646 lea 16(%esi), %esi
3647 sub $16, %ebx
3648 jbe L(StrncpyExit10)
3649 palignr $10, %xmm3, %xmm2
3650 movaps %xmm2, 16(%edx)
3651 lea 16(%esi), %esi
3652 sub $16, %ebx
3653 jbe L(StrncpyExit10)
3654 movaps %xmm4, 32(%edx)
3655 lea 16(%esi), %esi
3656 sub $16, %ebx
3657 jbe L(StrncpyExit10)
3658 movaps %xmm5, 48(%edx)
3659 lea 16(%esi), %esi
3660 lea -16(%ebx), %ebx
3661L(StrncpyExit10):
3662 lea 6(%edx, %esi), %edx
3663 lea 6(%ecx, %esi), %ecx
3664
3665 movlpd -8(%ecx), %xmm0
3666 movlpd %xmm0, -8(%edx)
3667 xor %esi, %esi
3668 jmp L(CopyFrom1To16BytesCase3)
3669
3670L(StrncpyLeave11):
3671 movaps %xmm2, %xmm3
3672 add $48, %ebx
3673 jle L(StrncpyExit11)
3674 palignr $11, %xmm1, %xmm2
3675 movaps %xmm2, (%edx)
3676 movaps 21(%ecx), %xmm2
3677 lea 16(%esi), %esi
3678 sub $16, %ebx
3679 jbe L(StrncpyExit11)
3680 palignr $11, %xmm3, %xmm2
3681 movaps %xmm2, 16(%edx)
3682 lea 16(%esi), %esi
3683 sub $16, %ebx
3684 jbe L(StrncpyExit11)
3685 movaps %xmm4, 32(%edx)
3686 lea 16(%esi), %esi
3687 sub $16, %ebx
3688 jbe L(StrncpyExit11)
3689 movaps %xmm5, 48(%edx)
3690 lea 16(%esi), %esi
3691 lea -16(%ebx), %ebx
3692L(StrncpyExit11):
3693 lea 5(%edx, %esi), %edx
3694 lea 5(%ecx, %esi), %ecx
3695 movl -5(%ecx), %esi
3696 movb -1(%ecx), %ah
3697 movl %esi, -5(%edx)
3698 movb %ah, -1(%edx)
3699 xor %esi, %esi
3700 jmp L(CopyFrom1To16BytesCase3)
3701
3702L(StrncpyLeave12):
3703 movaps %xmm2, %xmm3
3704 add $48, %ebx
3705 jle L(StrncpyExit12)
3706 palignr $12, %xmm1, %xmm2
3707 movaps %xmm2, (%edx)
3708 movaps 20(%ecx), %xmm2
3709 lea 16(%esi), %esi
3710 sub $16, %ebx
3711 jbe L(StrncpyExit12)
3712 palignr $12, %xmm3, %xmm2
3713 movaps %xmm2, 16(%edx)
3714 lea 16(%esi), %esi
3715 sub $16, %ebx
3716 jbe L(StrncpyExit12)
3717 movaps %xmm4, 32(%edx)
3718 lea 16(%esi), %esi
3719 sub $16, %ebx
3720 jbe L(StrncpyExit12)
3721 movaps %xmm5, 48(%edx)
3722 lea 16(%esi), %esi
3723 lea -16(%ebx), %ebx
3724L(StrncpyExit12):
3725 lea 4(%edx, %esi), %edx
3726 lea 4(%ecx, %esi), %ecx
3727 movl -4(%ecx), %eax
3728 movl %eax, -4(%edx)
3729 xor %esi, %esi
3730 jmp L(CopyFrom1To16BytesCase3)
3731
3732L(StrncpyLeave13):
3733 movaps %xmm2, %xmm3
3734 add $48, %ebx
3735 jle L(StrncpyExit13)
3736 palignr $13, %xmm1, %xmm2
3737 movaps %xmm2, (%edx)
3738 movaps 19(%ecx), %xmm2
3739 lea 16(%esi), %esi
3740 sub $16, %ebx
3741 jbe L(StrncpyExit13)
3742 palignr $13, %xmm3, %xmm2
3743 movaps %xmm2, 16(%edx)
3744 lea 16(%esi), %esi
3745 sub $16, %ebx
3746 jbe L(StrncpyExit13)
3747 movaps %xmm4, 32(%edx)
3748 lea 16(%esi), %esi
3749 sub $16, %ebx
3750 jbe L(StrncpyExit13)
3751 movaps %xmm5, 48(%edx)
3752 lea 16(%esi), %esi
3753 lea -16(%ebx), %ebx
3754L(StrncpyExit13):
3755 lea 3(%edx, %esi), %edx
3756 lea 3(%ecx, %esi), %ecx
3757
3758 movl -4(%ecx), %eax
3759 movl %eax, -4(%edx)
3760 xor %esi, %esi
3761 jmp L(CopyFrom1To16BytesCase3)
3762
3763L(StrncpyLeave14):
3764 movaps %xmm2, %xmm3
3765 add $48, %ebx
3766 jle L(StrncpyExit14)
3767 palignr $14, %xmm1, %xmm2
3768 movaps %xmm2, (%edx)
3769 movaps 18(%ecx), %xmm2
3770 lea 16(%esi), %esi
3771 sub $16, %ebx
3772 jbe L(StrncpyExit14)
3773 palignr $14, %xmm3, %xmm2
3774 movaps %xmm2, 16(%edx)
3775 lea 16(%esi), %esi
3776 sub $16, %ebx
3777 jbe L(StrncpyExit14)
3778 movaps %xmm4, 32(%edx)
3779 lea 16(%esi), %esi
3780 sub $16, %ebx
3781 jbe L(StrncpyExit14)
3782 movaps %xmm5, 48(%edx)
3783 lea 16(%esi), %esi
3784 lea -16(%ebx), %ebx
3785L(StrncpyExit14):
3786 lea 2(%edx, %esi), %edx
3787 lea 2(%ecx, %esi), %ecx
3788 movw -2(%ecx), %ax
3789 movw %ax, -2(%edx)
3790 xor %esi, %esi
3791 jmp L(CopyFrom1To16BytesCase3)
3792
3793L(StrncpyLeave15):
3794 movaps %xmm2, %xmm3
3795 add $48, %ebx
3796 jle L(StrncpyExit15)
3797 palignr $15, %xmm1, %xmm2
3798 movaps %xmm2, (%edx)
3799 movaps 17(%ecx), %xmm2
3800 lea 16(%esi), %esi
3801 sub $16, %ebx
3802 jbe L(StrncpyExit15)
3803 palignr $15, %xmm3, %xmm2
3804 movaps %xmm2, 16(%edx)
3805 lea 16(%esi), %esi
3806 sub $16, %ebx
3807 jbe L(StrncpyExit15)
3808 movaps %xmm4, 32(%edx)
3809 lea 16(%esi), %esi
3810 sub $16, %ebx
3811 jbe L(StrncpyExit15)
3812 movaps %xmm5, 48(%edx)
3813 lea 16(%esi), %esi
3814 lea -16(%ebx), %ebx
3815L(StrncpyExit15):
3816 lea 1(%edx, %esi), %edx
3817 lea 1(%ecx, %esi), %ecx
3818 movb -1(%ecx), %ah
3819 movb %ah, -1(%edx)
3820 xor %esi, %esi
3821 jmp L(CopyFrom1To16BytesCase3)
3822#endif
3823
3824#if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY
3825# ifdef USE_AS_STRNCPY
3826 CFI_POP (%esi)
3827 CFI_POP (%edi)
3828
3829 .p2align 4
3830L(ExitTail0):
3831 movl %edx, %eax
3832 RETURN
3833
3834 .p2align 4
3835L(StrncpyExit15Bytes):
3836 cmp $12, %ebx
3837 jbe L(StrncpyExit12Bytes)
3838 cmpb $0, 8(%ecx)
3839 jz L(ExitTail9)
3840 cmpb $0, 9(%ecx)
3841 jz L(ExitTail10)
3842 cmpb $0, 10(%ecx)
3843 jz L(ExitTail11)
3844 cmpb $0, 11(%ecx)
3845 jz L(ExitTail12)
3846 cmp $13, %ebx
3847 je L(ExitTail13)
3848 cmpb $0, 12(%ecx)
3849 jz L(ExitTail13)
3850 cmp $14, %ebx
3851 je L(ExitTail14)
3852 cmpb $0, 13(%ecx)
3853 jz L(ExitTail14)
3854 movlpd (%ecx), %xmm0
3855 movlpd 7(%ecx), %xmm1
3856 movlpd %xmm0, (%edx)
3857 movlpd %xmm1, 7(%edx)
3858# ifdef USE_AS_STPCPY
3859 lea 14(%edx), %eax
3860 cmpb $1, (%eax)
3861 sbb $-1, %eax
3862# else
3863 movl %edx, %eax
3864# endif
3865 RETURN
3866
3867 .p2align 4
3868L(StrncpyExit12Bytes):
3869 cmp $9, %ebx
3870 je L(ExitTail9)
3871 cmpb $0, 8(%ecx)
3872 jz L(ExitTail9)
3873 cmp $10, %ebx
3874 je L(ExitTail10)
3875 cmpb $0, 9(%ecx)
3876 jz L(ExitTail10)
3877 cmp $11, %ebx
3878 je L(ExitTail11)
3879 cmpb $0, 10(%ecx)
3880 jz L(ExitTail11)
3881 movlpd (%ecx), %xmm0
3882 movl 8(%ecx), %eax
3883 movlpd %xmm0, (%edx)
3884 movl %eax, 8(%edx)
3885 SAVE_RESULT_TAIL (11)
3886# ifdef USE_AS_STPCPY
3887 cmpb $1, (%eax)
3888 sbb $-1, %eax
3889# endif
3890 RETURN
3891
3892 .p2align 4
3893L(StrncpyExit8Bytes):
3894 cmp $4, %ebx
3895 jbe L(StrncpyExit4Bytes)
3896 cmpb $0, (%ecx)
3897 jz L(ExitTail1)
3898 cmpb $0, 1(%ecx)
3899 jz L(ExitTail2)
3900 cmpb $0, 2(%ecx)
3901 jz L(ExitTail3)
3902 cmpb $0, 3(%ecx)
3903 jz L(ExitTail4)
3904
3905 cmp $5, %ebx
3906 je L(ExitTail5)
3907 cmpb $0, 4(%ecx)
3908 jz L(ExitTail5)
3909 cmp $6, %ebx
3910 je L(ExitTail6)
3911 cmpb $0, 5(%ecx)
3912 jz L(ExitTail6)
3913 cmp $7, %ebx
3914 je L(ExitTail7)
3915 cmpb $0, 6(%ecx)
3916 jz L(ExitTail7)
3917 movlpd (%ecx), %xmm0
3918 movlpd %xmm0, (%edx)
3919# ifdef USE_AS_STPCPY
3920 lea 7(%edx), %eax
3921 cmpb $1, (%eax)
3922 sbb $-1, %eax
3923# else
3924 movl %edx, %eax
3925# endif
3926 RETURN
3927
3928 .p2align 4
3929L(StrncpyExit4Bytes):
3930 test %ebx, %ebx
3931 jz L(ExitTail0)
3932 cmp $1, %ebx
3933 je L(ExitTail1)
3934 cmpb $0, (%ecx)
3935 jz L(ExitTail1)
3936 cmp $2, %ebx
3937 je L(ExitTail2)
3938 cmpb $0, 1(%ecx)
3939 jz L(ExitTail2)
3940 cmp $3, %ebx
3941 je L(ExitTail3)
3942 cmpb $0, 2(%ecx)
3943 jz L(ExitTail3)
3944 movl (%ecx), %eax
3945 movl %eax, (%edx)
3946 SAVE_RESULT_TAIL (3)
3947# ifdef USE_AS_STPCPY
3948 cmpb $1, (%eax)
3949 sbb $-1, %eax
3950# endif
3951 RETURN
3952# endif
3953
3954END (STRCPY)
3955#endif