blob: d9b6129d0fac5ed9ff0666a4c31628a57ae5a203 [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef cfi_remember_state
56# define cfi_remember_state .cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state .cfi_restore_state
61#endif
62
63#ifndef ENTRY
64# define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68name: \
69 cfi_startproc
70#endif
71
72#ifndef END
73# define END(name) \
74 cfi_endproc; \
75 .size name, .-name
76#endif
77
78#define CFI_PUSH(REG) \
79 cfi_adjust_cfa_offset (4); \
80 cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG) \
83 cfi_adjust_cfa_offset (-4); \
84 cfi_restore (REG)
85
86#define PUSH(REG) pushl REG; CFI_PUSH (REG)
87#define POP(REG) popl REG; CFI_POP (REG)
88
89#ifndef STRCAT
90# define STRCAT strcat
91#endif
92
93#define PARMS 4
94#define STR1 PARMS+4
95#define STR2 STR1+4
96
97#ifdef USE_AS_STRNCAT
98# define LEN STR2+8
99#endif
100
101#define USE_AS_STRCAT
102
103 .section .text.ssse3,"ax",@progbits
104ENTRY (STRCAT)
105 PUSH (%edi)
106 mov STR1(%esp), %edi
107 mov %edi, %edx
108
109#define RETURN jmp L(StrcpyAtom)
110#include "sse2-strlen-atom.S"
111
112L(StrcpyAtom):
113 mov STR2(%esp), %ecx
114 lea (%edi, %eax), %edx
115#ifdef USE_AS_STRNCAT
116 PUSH (%ebx)
117 mov LEN(%esp), %ebx
118 test %ebx, %ebx
119 jz L(StrncatExit0)
120 cmp $8, %ebx
121 jbe L(StrncpyExit8Bytes)
122#endif
123 cmpb $0, (%ecx)
124 jz L(Exit1)
125 cmpb $0, 1(%ecx)
126 jz L(Exit2)
127 cmpb $0, 2(%ecx)
128 jz L(Exit3)
129 cmpb $0, 3(%ecx)
130 jz L(Exit4)
131 cmpb $0, 4(%ecx)
132 jz L(Exit5)
133 cmpb $0, 5(%ecx)
134 jz L(Exit6)
135 cmpb $0, 6(%ecx)
136 jz L(Exit7)
137 cmpb $0, 7(%ecx)
138 jz L(Exit8)
139 cmpb $0, 8(%ecx)
140 jz L(Exit9)
141#ifdef USE_AS_STRNCAT
142 cmp $16, %ebx
143 jb L(StrncpyExit15Bytes)
144#endif
145 cmpb $0, 9(%ecx)
146 jz L(Exit10)
147 cmpb $0, 10(%ecx)
148 jz L(Exit11)
149 cmpb $0, 11(%ecx)
150 jz L(Exit12)
151 cmpb $0, 12(%ecx)
152 jz L(Exit13)
153 cmpb $0, 13(%ecx)
154 jz L(Exit14)
155 cmpb $0, 14(%ecx)
156 jz L(Exit15)
157 cmpb $0, 15(%ecx)
158 jz L(Exit16)
159#ifdef USE_AS_STRNCAT
160 cmp $16, %ebx
161 je L(StrncatExit16)
162
163# define RETURN1 POP (%ebx); POP (%edi); ret; \
164 CFI_PUSH (%ebx); CFI_PUSH (%edi)
165# define USE_AS_STRNCPY
166#else
167# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
168#endif
169#include "ssse3-strcpy-atom.S"
170
171 .p2align 4
172L(CopyFrom1To16Bytes):
173 add %esi, %edx
174 add %esi, %ecx
175
176 POP (%esi)
177 test %al, %al
178 jz L(ExitHigh)
179 test $0x01, %al
180 jnz L(Exit1)
181 test $0x02, %al
182 jnz L(Exit2)
183 test $0x04, %al
184 jnz L(Exit3)
185 test $0x08, %al
186 jnz L(Exit4)
187 test $0x10, %al
188 jnz L(Exit5)
189 test $0x20, %al
190 jnz L(Exit6)
191 test $0x40, %al
192 jnz L(Exit7)
193 movlpd (%ecx), %xmm0
194 movlpd %xmm0, (%edx)
195 movl %edi, %eax
196 RETURN1
197
198 .p2align 4
199L(ExitHigh):
200 test $0x01, %ah
201 jnz L(Exit9)
202 test $0x02, %ah
203 jnz L(Exit10)
204 test $0x04, %ah
205 jnz L(Exit11)
206 test $0x08, %ah
207 jnz L(Exit12)
208 test $0x10, %ah
209 jnz L(Exit13)
210 test $0x20, %ah
211 jnz L(Exit14)
212 test $0x40, %ah
213 jnz L(Exit15)
214 movlpd (%ecx), %xmm0
215 movlpd 8(%ecx), %xmm1
216 movlpd %xmm0, (%edx)
217 movlpd %xmm1, 8(%edx)
218 movl %edi, %eax
219 RETURN1
220
221 .p2align 4
222L(StrncatExit1):
223 movb %bh, 1(%edx)
224L(Exit1):
225 movb (%ecx), %al
226 movb %al, (%edx)
227 movl %edi, %eax
228 RETURN1
229
230 .p2align 4
231L(StrncatExit2):
232 movb %bh, 2(%edx)
233L(Exit2):
234 movw (%ecx), %ax
235 movw %ax, (%edx)
236 movl %edi, %eax
237 RETURN1
238
239 .p2align 4
240L(StrncatExit3):
241 movb %bh, 3(%edx)
242L(Exit3):
243 movw (%ecx), %ax
244 movw %ax, (%edx)
245 movb 2(%ecx), %al
246 movb %al, 2(%edx)
247 movl %edi, %eax
248 RETURN1
249
250 .p2align 4
251L(StrncatExit4):
252 movb %bh, 4(%edx)
253L(Exit4):
254 movl (%ecx), %eax
255 movl %eax, (%edx)
256 movl %edi, %eax
257 RETURN1
258
259 .p2align 4
260L(StrncatExit5):
261 movb %bh, 5(%edx)
262L(Exit5):
263 movl (%ecx), %eax
264 movl %eax, (%edx)
265 movb 4(%ecx), %al
266 movb %al, 4(%edx)
267 movl %edi, %eax
268 RETURN1
269
270 .p2align 4
271L(StrncatExit6):
272 movb %bh, 6(%edx)
273L(Exit6):
274 movl (%ecx), %eax
275 movl %eax, (%edx)
276 movw 4(%ecx), %ax
277 movw %ax, 4(%edx)
278 movl %edi, %eax
279 RETURN1
280
281 .p2align 4
282L(StrncatExit7):
283 movb %bh, 7(%edx)
284L(Exit7):
285 movl (%ecx), %eax
286 movl %eax, (%edx)
287 movl 3(%ecx), %eax
288 movl %eax, 3(%edx)
289 movl %edi, %eax
290 RETURN1
291
292 .p2align 4
293L(StrncatExit8):
294 movb %bh, 8(%edx)
295L(Exit8):
296 movlpd (%ecx), %xmm0
297 movlpd %xmm0, (%edx)
298 movl %edi, %eax
299 RETURN1
300
301 .p2align 4
302L(StrncatExit9):
303 movb %bh, 9(%edx)
304L(Exit9):
305 movlpd (%ecx), %xmm0
306 movlpd %xmm0, (%edx)
307 movb 8(%ecx), %al
308 movb %al, 8(%edx)
309 movl %edi, %eax
310 RETURN1
311
312 .p2align 4
313L(StrncatExit10):
314 movb %bh, 10(%edx)
315L(Exit10):
316 movlpd (%ecx), %xmm0
317 movlpd %xmm0, (%edx)
318 movw 8(%ecx), %ax
319 movw %ax, 8(%edx)
320 movl %edi, %eax
321 RETURN1
322
323 .p2align 4
324L(StrncatExit11):
325 movb %bh, 11(%edx)
326L(Exit11):
327 movlpd (%ecx), %xmm0
328 movlpd %xmm0, (%edx)
329 movl 7(%ecx), %eax
330 movl %eax, 7(%edx)
331 movl %edi, %eax
332 RETURN1
333
334 .p2align 4
335L(StrncatExit12):
336 movb %bh, 12(%edx)
337L(Exit12):
338 movlpd (%ecx), %xmm0
339 movlpd %xmm0, (%edx)
340 movl 8(%ecx), %eax
341 movl %eax, 8(%edx)
342 movl %edi, %eax
343 RETURN1
344
345 .p2align 4
346L(StrncatExit13):
347 movb %bh, 13(%edx)
348L(Exit13):
349 movlpd (%ecx), %xmm0
350 movlpd %xmm0, (%edx)
351 movlpd 5(%ecx), %xmm0
352 movlpd %xmm0, 5(%edx)
353 movl %edi, %eax
354 RETURN1
355
356 .p2align 4
357L(StrncatExit14):
358 movb %bh, 14(%edx)
359L(Exit14):
360 movlpd (%ecx), %xmm0
361 movlpd %xmm0, (%edx)
362 movlpd 6(%ecx), %xmm0
363 movlpd %xmm0, 6(%edx)
364 movl %edi, %eax
365 RETURN1
366
367 .p2align 4
368L(StrncatExit15):
369 movb %bh, 15(%edx)
370L(Exit15):
371 movlpd (%ecx), %xmm0
372 movlpd %xmm0, (%edx)
373 movlpd 7(%ecx), %xmm0
374 movlpd %xmm0, 7(%edx)
375 movl %edi, %eax
376 RETURN1
377
378 .p2align 4
379L(StrncatExit16):
380 movb %bh, 16(%edx)
381L(Exit16):
382 movlpd (%ecx), %xmm0
383 movlpd 8(%ecx), %xmm1
384 movlpd %xmm0, (%edx)
385 movlpd %xmm1, 8(%edx)
386 movl %edi, %eax
387 RETURN1
388
389#ifdef USE_AS_STRNCPY
390
391 CFI_PUSH(%esi)
392
393 .p2align 4
394L(CopyFrom1To16BytesCase2):
395 add $16, %ebx
396 add %esi, %ecx
397 lea (%esi, %edx), %esi
398 lea -9(%ebx), %edx
399 and $1<<7, %dh
400 or %al, %dh
401 lea (%esi), %edx
402 POP (%esi)
403 jz L(ExitHighCase2)
404
405 test $0x01, %al
406 jnz L(Exit1)
407 cmp $1, %ebx
408 je L(StrncatExit1)
409 test $0x02, %al
410 jnz L(Exit2)
411 cmp $2, %ebx
412 je L(StrncatExit2)
413 test $0x04, %al
414 jnz L(Exit3)
415 cmp $3, %ebx
416 je L(StrncatExit3)
417 test $0x08, %al
418 jnz L(Exit4)
419 cmp $4, %ebx
420 je L(StrncatExit4)
421 test $0x10, %al
422 jnz L(Exit5)
423 cmp $5, %ebx
424 je L(StrncatExit5)
425 test $0x20, %al
426 jnz L(Exit6)
427 cmp $6, %ebx
428 je L(StrncatExit6)
429 test $0x40, %al
430 jnz L(Exit7)
431 cmp $7, %ebx
432 je L(StrncatExit7)
433 movlpd (%ecx), %xmm0
434 movlpd %xmm0, (%edx)
435 lea 7(%edx), %eax
436 cmpb $1, (%eax)
437 sbb $-1, %eax
438 xor %cl, %cl
439 movb %cl, (%eax)
440 movl %edi, %eax
441 RETURN1
442
443 .p2align 4
444L(ExitHighCase2):
445 test $0x01, %ah
446 jnz L(Exit9)
447 cmp $9, %ebx
448 je L(StrncatExit9)
449 test $0x02, %ah
450 jnz L(Exit10)
451 cmp $10, %ebx
452 je L(StrncatExit10)
453 test $0x04, %ah
454 jnz L(Exit11)
455 cmp $11, %ebx
456 je L(StrncatExit11)
457 test $0x8, %ah
458 jnz L(Exit12)
459 cmp $12, %ebx
460 je L(StrncatExit12)
461 test $0x10, %ah
462 jnz L(Exit13)
463 cmp $13, %ebx
464 je L(StrncatExit13)
465 test $0x20, %ah
466 jnz L(Exit14)
467 cmp $14, %ebx
468 je L(StrncatExit14)
469 test $0x40, %ah
470 jnz L(Exit15)
471 cmp $15, %ebx
472 je L(StrncatExit15)
473 movlpd (%ecx), %xmm0
474 movlpd %xmm0, (%edx)
475 movlpd 8(%ecx), %xmm1
476 movlpd %xmm1, 8(%edx)
477 movl %edi, %eax
478 RETURN1
479
480 CFI_PUSH(%esi)
481
482L(CopyFrom1To16BytesCase2OrCase3):
483 test %eax, %eax
484 jnz L(CopyFrom1To16BytesCase2)
485
486 .p2align 4
487L(CopyFrom1To16BytesCase3):
488 add $16, %ebx
489 add %esi, %edx
490 add %esi, %ecx
491
492 POP (%esi)
493
494 cmp $8, %ebx
495 ja L(ExitHighCase3)
496 cmp $1, %ebx
497 je L(StrncatExit1)
498 cmp $2, %ebx
499 je L(StrncatExit2)
500 cmp $3, %ebx
501 je L(StrncatExit3)
502 cmp $4, %ebx
503 je L(StrncatExit4)
504 cmp $5, %ebx
505 je L(StrncatExit5)
506 cmp $6, %ebx
507 je L(StrncatExit6)
508 cmp $7, %ebx
509 je L(StrncatExit7)
510 movlpd (%ecx), %xmm0
511 movlpd %xmm0, (%edx)
512 movb %bh, 8(%edx)
513 movl %edi, %eax
514 RETURN1
515
516 .p2align 4
517L(ExitHighCase3):
518 cmp $9, %ebx
519 je L(StrncatExit9)
520 cmp $10, %ebx
521 je L(StrncatExit10)
522 cmp $11, %ebx
523 je L(StrncatExit11)
524 cmp $12, %ebx
525 je L(StrncatExit12)
526 cmp $13, %ebx
527 je L(StrncatExit13)
528 cmp $14, %ebx
529 je L(StrncatExit14)
530 cmp $15, %ebx
531 je L(StrncatExit15)
532 movlpd (%ecx), %xmm0
533 movlpd %xmm0, (%edx)
534 movlpd 8(%ecx), %xmm1
535 movlpd %xmm1, 8(%edx)
536 movb %bh, 16(%edx)
537 movl %edi, %eax
538 RETURN1
539
540 .p2align 4
541L(StrncatExit0):
542 movl %edi, %eax
543 RETURN1
544
545 .p2align 4
546L(StrncpyExit15Bytes):
547 cmp $9, %ebx
548 je L(StrncatExit9)
549 cmpb $0, 9(%ecx)
550 jz L(Exit10)
551 cmp $10, %ebx
552 je L(StrncatExit10)
553 cmpb $0, 10(%ecx)
554 jz L(Exit11)
555 cmp $11, %ebx
556 je L(StrncatExit11)
557 cmpb $0, 11(%ecx)
558 jz L(Exit12)
559 cmp $12, %ebx
560 je L(StrncatExit12)
561 cmpb $0, 12(%ecx)
562 jz L(Exit13)
563 cmp $13, %ebx
564 je L(StrncatExit13)
565 cmpb $0, 13(%ecx)
566 jz L(Exit14)
567 cmp $14, %ebx
568 je L(StrncatExit14)
569 movlpd (%ecx), %xmm0
570 movlpd %xmm0, (%edx)
571 movlpd 7(%ecx), %xmm0
572 movlpd %xmm0, 7(%edx)
573 lea 14(%edx), %eax
574 cmpb $1, (%eax)
575 sbb $-1, %eax
576 movb %bh, (%eax)
577 movl %edi, %eax
578 RETURN1
579
580 .p2align 4
581L(StrncpyExit8Bytes):
582 cmpb $0, (%ecx)
583 jz L(Exit1)
584 cmp $1, %ebx
585 je L(StrncatExit1)
586 cmpb $0, 1(%ecx)
587 jz L(Exit2)
588 cmp $2, %ebx
589 je L(StrncatExit2)
590 cmpb $0, 2(%ecx)
591 jz L(Exit3)
592 cmp $3, %ebx
593 je L(StrncatExit3)
594 cmpb $0, 3(%ecx)
595 jz L(Exit4)
596 cmp $4, %ebx
597 je L(StrncatExit4)
598 cmpb $0, 4(%ecx)
599 jz L(Exit5)
600 cmp $5, %ebx
601 je L(StrncatExit5)
602 cmpb $0, 5(%ecx)
603 jz L(Exit6)
604 cmp $6, %ebx
605 je L(StrncatExit6)
606 cmpb $0, 6(%ecx)
607 jz L(Exit7)
608 cmp $7, %ebx
609 je L(StrncatExit7)
610 movlpd (%ecx), %xmm0
611 movlpd %xmm0, (%edx)
612 lea 7(%edx), %eax
613 cmpb $1, (%eax)
614 sbb $-1, %eax
615 movb %bh, (%eax)
616 movl %edi, %eax
617 RETURN1
618
619#endif
620END (STRCAT)