blob: 5ad87915005507edc13fd99ac9c0a7ea0d39f1a6 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef MEMCMP
32# define MEMCMP ssse3_memcmp3_new
33#endif
34
35#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
43#ifndef cfi_startproc
44# define cfi_startproc .cfi_startproc
45#endif
46
47#ifndef cfi_endproc
48# define cfi_endproc .cfi_endproc
49#endif
50
51#ifndef cfi_rel_offset
52# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
53#endif
54
55#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070056# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080057#endif
58
59#ifndef cfi_adjust_cfa_offset
60# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
61#endif
62
Bruce Beare124a5422010-10-11 12:24:41 -070063#ifndef cfi_remember_state
64# define cfi_remember_state .cfi_remember_state
65#endif
66
67#ifndef cfi_restore_state
68# define cfi_restore_state .cfi_restore_state
69#endif
70
Bruce Beare8ff1a272010-03-04 11:03:37 -080071#ifndef ENTRY
72# define ENTRY(name) \
73 .type name, @function; \
74 .globl name; \
75 .p2align 4; \
76name: \
77 cfi_startproc
78#endif
79
80#ifndef END
81# define END(name) \
82 cfi_endproc; \
83 .size name, .-name
84#endif
85
86#define CFI_PUSH(REG) \
87 cfi_adjust_cfa_offset (4); \
88 cfi_rel_offset (REG, 0)
89
90#define CFI_POP(REG) \
91 cfi_adjust_cfa_offset (-4); \
92 cfi_restore (REG)
93
94#define PUSH(REG) pushl REG; CFI_PUSH (REG)
95#define POP(REG) popl REG; CFI_POP (REG)
96
97#define PARMS 4
98#define BLK1 PARMS
99#define BLK2 BLK1+4
100#define LEN BLK2+4
101#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
Bruce Beare124a5422010-10-11 12:24:41 -0700102#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800103
104 .section .text.ssse3,"ax",@progbits
105ENTRY (MEMCMP)
106 movl LEN(%esp), %ecx
107 movl BLK1(%esp), %eax
108 cmp $48, %ecx
109 movl BLK2(%esp), %edx
110 jae L(48bytesormore)
111 cmp $1, %ecx
112 jbe L(less1bytes)
113 PUSH (%ebx)
114 add %ecx, %edx
115 add %ecx, %eax
116 jmp L(less48bytes)
117
118 CFI_POP (%ebx)
119 ALIGN (4)
120L(less1bytes):
121 jb L(zero)
122 movb (%eax), %cl
123 cmp (%edx), %cl
124 je L(zero)
125 mov $1, %eax
126 ja L(1bytesend)
127 neg %eax
128L(1bytesend):
129 ret
130
131 ALIGN (4)
132L(zero):
133 mov $0, %eax
134 ret
135
136 ALIGN (4)
137L(48bytesormore):
138 PUSH (%ebx)
139 PUSH (%esi)
140 PUSH (%edi)
Bruce Beare124a5422010-10-11 12:24:41 -0700141 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800142 movdqu (%eax), %xmm3
143 movdqu (%edx), %xmm0
144 movl %eax, %edi
145 movl %edx, %esi
146 pcmpeqb %xmm0, %xmm3
147 pmovmskb %xmm3, %edx
148 lea 16(%edi), %edi
149
150 sub $0xffff, %edx
151 lea 16(%esi), %esi
152 jnz L(less16bytes)
153 mov %edi, %edx
154 and $0xf, %edx
155 xor %edx, %edi
156 sub %edx, %esi
157 add %edx, %ecx
158 mov %esi, %edx
159 and $0xf, %edx
160 jz L(shr_0)
161 xor %edx, %esi
162
163 cmp $8, %edx
164 jae L(next_unaligned_table)
165 cmp $0, %edx
166 je L(shr_0)
167 cmp $1, %edx
168 je L(shr_1)
169 cmp $2, %edx
170 je L(shr_2)
171 cmp $3, %edx
172 je L(shr_3)
173 cmp $4, %edx
174 je L(shr_4)
175 cmp $5, %edx
176 je L(shr_5)
177 cmp $6, %edx
178 je L(shr_6)
179 jmp L(shr_7)
180
181 ALIGN (4)
182L(next_unaligned_table):
183 cmp $8, %edx
184 je L(shr_8)
185 cmp $9, %edx
186 je L(shr_9)
187 cmp $10, %edx
188 je L(shr_10)
189 cmp $11, %edx
190 je L(shr_11)
191 cmp $12, %edx
192 je L(shr_12)
193 cmp $13, %edx
194 je L(shr_13)
195 cmp $14, %edx
196 je L(shr_14)
197 jmp L(shr_15)
198
199 ALIGN (4)
200L(shr_0):
201 cmp $80, %ecx
202 jae L(shr_0_gobble)
203 lea -48(%ecx), %ecx
204 xor %eax, %eax
205 movaps (%esi), %xmm1
206 pcmpeqb (%edi), %xmm1
207 movaps 16(%esi), %xmm2
208 pcmpeqb 16(%edi), %xmm2
209 pand %xmm1, %xmm2
210 pmovmskb %xmm2, %edx
211 add $32, %edi
212 add $32, %esi
213 sub $0xffff, %edx
214 jnz L(exit)
215
216 lea (%ecx, %edi,1), %eax
217 lea (%ecx, %esi,1), %edx
218 POP (%edi)
219 POP (%esi)
220 jmp L(less48bytes)
221
Bruce Beare124a5422010-10-11 12:24:41 -0700222 cfi_restore_state
223 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800224 ALIGN (4)
225L(shr_0_gobble):
226 lea -48(%ecx), %ecx
227 movdqa (%esi), %xmm0
228 xor %eax, %eax
229 pcmpeqb (%edi), %xmm0
230 sub $32, %ecx
231 movdqa 16(%esi), %xmm2
232 pcmpeqb 16(%edi), %xmm2
233L(shr_0_gobble_loop):
234 pand %xmm0, %xmm2
235 sub $32, %ecx
236 pmovmskb %xmm2, %edx
237 movdqa %xmm0, %xmm1
238 movdqa 32(%esi), %xmm0
239 movdqa 48(%esi), %xmm2
240 sbb $0xffff, %edx
241 pcmpeqb 32(%edi), %xmm0
242 pcmpeqb 48(%edi), %xmm2
243 lea 32(%edi), %edi
244 lea 32(%esi), %esi
245 jz L(shr_0_gobble_loop)
246
247 pand %xmm0, %xmm2
248 cmp $0, %ecx
249 jge L(shr_0_gobble_loop_next)
250 inc %edx
251 add $32, %ecx
252L(shr_0_gobble_loop_next):
253 test %edx, %edx
254 jnz L(exit)
255
256 pmovmskb %xmm2, %edx
257 movdqa %xmm0, %xmm1
258 lea 32(%edi), %edi
259 lea 32(%esi), %esi
260 sub $0xffff, %edx
261 jnz L(exit)
262 lea (%ecx, %edi,1), %eax
263 lea (%ecx, %esi,1), %edx
264 POP (%edi)
265 POP (%esi)
266 jmp L(less48bytes)
267
Bruce Beare124a5422010-10-11 12:24:41 -0700268 cfi_restore_state
269 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800270 ALIGN (4)
271L(shr_1):
272 cmp $80, %ecx
273 lea -48(%ecx), %ecx
274 mov %edx, %eax
275 jae L(shr_1_gobble)
276
277 movdqa 16(%esi), %xmm1
278 movdqa %xmm1, %xmm2
279 palignr $1,(%esi), %xmm1
280 pcmpeqb (%edi), %xmm1
281
282 movdqa 32(%esi), %xmm3
283 palignr $1,%xmm2, %xmm3
284 pcmpeqb 16(%edi), %xmm3
285
286 pand %xmm1, %xmm3
287 pmovmskb %xmm3, %edx
288 lea 32(%edi), %edi
289 lea 32(%esi), %esi
290 sub $0xffff, %edx
291 jnz L(exit)
292 lea (%ecx, %edi,1), %eax
293 lea 1(%ecx, %esi,1), %edx
294 POP (%edi)
295 POP (%esi)
296 jmp L(less48bytes)
297
Bruce Beare124a5422010-10-11 12:24:41 -0700298 cfi_restore_state
299 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800300 ALIGN (4)
301L(shr_1_gobble):
302 sub $32, %ecx
303 movdqa 16(%esi), %xmm0
304 palignr $1,(%esi), %xmm0
305 pcmpeqb (%edi), %xmm0
306
307 movdqa 32(%esi), %xmm3
308 palignr $1,16(%esi), %xmm3
309 pcmpeqb 16(%edi), %xmm3
310
311L(shr_1_gobble_loop):
312 pand %xmm0, %xmm3
313 sub $32, %ecx
314 pmovmskb %xmm3, %edx
315 movdqa %xmm0, %xmm1
316
317 movdqa 64(%esi), %xmm3
318 palignr $1,48(%esi), %xmm3
319 sbb $0xffff, %edx
320 movdqa 48(%esi), %xmm0
321 palignr $1,32(%esi), %xmm0
322 pcmpeqb 32(%edi), %xmm0
323 lea 32(%esi), %esi
324 pcmpeqb 48(%edi), %xmm3
325
326 lea 32(%edi), %edi
327 jz L(shr_1_gobble_loop)
328 pand %xmm0, %xmm3
329
330 cmp $0, %ecx
331 jge L(shr_1_gobble_next)
332 inc %edx
333 add $32, %ecx
334L(shr_1_gobble_next):
335 test %edx, %edx
336 jnz L(exit)
337
338 pmovmskb %xmm3, %edx
339 movdqa %xmm0, %xmm1
340 lea 32(%edi), %edi
341 lea 32(%esi), %esi
342 sub $0xffff, %edx
343 jnz L(exit)
344
345 lea (%ecx, %edi,1), %eax
346 lea 1(%ecx, %esi,1), %edx
347 POP (%edi)
348 POP (%esi)
349 jmp L(less48bytes)
350
Bruce Beare124a5422010-10-11 12:24:41 -0700351 cfi_restore_state
352 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800353 ALIGN (4)
354L(shr_2):
355 cmp $80, %ecx
356 lea -48(%ecx), %ecx
357 mov %edx, %eax
358 jae L(shr_2_gobble)
359
360 movdqa 16(%esi), %xmm1
361 movdqa %xmm1, %xmm2
362 palignr $2,(%esi), %xmm1
363 pcmpeqb (%edi), %xmm1
364
365 movdqa 32(%esi), %xmm3
366 palignr $2,%xmm2, %xmm3
367 pcmpeqb 16(%edi), %xmm3
368
369 pand %xmm1, %xmm3
370 pmovmskb %xmm3, %edx
371 lea 32(%edi), %edi
372 lea 32(%esi), %esi
373 sub $0xffff, %edx
374 jnz L(exit)
375 lea (%ecx, %edi,1), %eax
376 lea 2(%ecx, %esi,1), %edx
377 POP (%edi)
378 POP (%esi)
379 jmp L(less48bytes)
380
Bruce Beare124a5422010-10-11 12:24:41 -0700381 cfi_restore_state
382 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800383 ALIGN (4)
384L(shr_2_gobble):
385 sub $32, %ecx
386 movdqa 16(%esi), %xmm0
387 palignr $2,(%esi), %xmm0
388 pcmpeqb (%edi), %xmm0
389
390 movdqa 32(%esi), %xmm3
391 palignr $2,16(%esi), %xmm3
392 pcmpeqb 16(%edi), %xmm3
393
394L(shr_2_gobble_loop):
395 pand %xmm0, %xmm3
396 sub $32, %ecx
397 pmovmskb %xmm3, %edx
398 movdqa %xmm0, %xmm1
399
400 movdqa 64(%esi), %xmm3
401 palignr $2,48(%esi), %xmm3
402 sbb $0xffff, %edx
403 movdqa 48(%esi), %xmm0
404 palignr $2,32(%esi), %xmm0
405 pcmpeqb 32(%edi), %xmm0
406 lea 32(%esi), %esi
407 pcmpeqb 48(%edi), %xmm3
408
409 lea 32(%edi), %edi
410 jz L(shr_2_gobble_loop)
411 pand %xmm0, %xmm3
412
413 cmp $0, %ecx
414 jge L(shr_2_gobble_next)
415 inc %edx
416 add $32, %ecx
417L(shr_2_gobble_next):
418 test %edx, %edx
419 jnz L(exit)
420
421 pmovmskb %xmm3, %edx
422 movdqa %xmm0, %xmm1
423 lea 32(%edi), %edi
424 lea 32(%esi), %esi
425 sub $0xffff, %edx
426 jnz L(exit)
427
428 lea (%ecx, %edi,1), %eax
429 lea 2(%ecx, %esi,1), %edx
430 POP (%edi)
431 POP (%esi)
432 jmp L(less48bytes)
433
Bruce Beare124a5422010-10-11 12:24:41 -0700434 cfi_restore_state
435 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800436 ALIGN (4)
437L(shr_3):
438 cmp $80, %ecx
439 lea -48(%ecx), %ecx
440 mov %edx, %eax
441 jae L(shr_3_gobble)
442
443 movdqa 16(%esi), %xmm1
444 movdqa %xmm1, %xmm2
445 palignr $3,(%esi), %xmm1
446 pcmpeqb (%edi), %xmm1
447
448 movdqa 32(%esi), %xmm3
449 palignr $3,%xmm2, %xmm3
450 pcmpeqb 16(%edi), %xmm3
451
452 pand %xmm1, %xmm3
453 pmovmskb %xmm3, %edx
454 lea 32(%edi), %edi
455 lea 32(%esi), %esi
456 sub $0xffff, %edx
457 jnz L(exit)
458 lea (%ecx, %edi,1), %eax
459 lea 3(%ecx, %esi,1), %edx
460 POP (%edi)
461 POP (%esi)
462 jmp L(less48bytes)
463
Bruce Beare124a5422010-10-11 12:24:41 -0700464 cfi_restore_state
465 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800466 ALIGN (4)
467L(shr_3_gobble):
468 sub $32, %ecx
469 movdqa 16(%esi), %xmm0
470 palignr $3,(%esi), %xmm0
471 pcmpeqb (%edi), %xmm0
472
473 movdqa 32(%esi), %xmm3
474 palignr $3,16(%esi), %xmm3
475 pcmpeqb 16(%edi), %xmm3
476
477L(shr_3_gobble_loop):
478 pand %xmm0, %xmm3
479 sub $32, %ecx
480 pmovmskb %xmm3, %edx
481 movdqa %xmm0, %xmm1
482
483 movdqa 64(%esi), %xmm3
484 palignr $3,48(%esi), %xmm3
485 sbb $0xffff, %edx
486 movdqa 48(%esi), %xmm0
487 palignr $3,32(%esi), %xmm0
488 pcmpeqb 32(%edi), %xmm0
489 lea 32(%esi), %esi
490 pcmpeqb 48(%edi), %xmm3
491
492 lea 32(%edi), %edi
493 jz L(shr_3_gobble_loop)
494 pand %xmm0, %xmm3
495
496 cmp $0, %ecx
497 jge L(shr_3_gobble_next)
498 inc %edx
499 add $32, %ecx
500L(shr_3_gobble_next):
501 test %edx, %edx
502 jnz L(exit)
503
504 pmovmskb %xmm3, %edx
505 movdqa %xmm0, %xmm1
506 lea 32(%edi), %edi
507 lea 32(%esi), %esi
508 sub $0xffff, %edx
509 jnz L(exit)
510
511 lea (%ecx, %edi,1), %eax
512 lea 3(%ecx, %esi,1), %edx
513 POP (%edi)
514 POP (%esi)
515 jmp L(less48bytes)
516
Bruce Beare124a5422010-10-11 12:24:41 -0700517 cfi_restore_state
518 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800519 ALIGN (4)
520L(shr_4):
521 cmp $80, %ecx
522 lea -48(%ecx), %ecx
523 mov %edx, %eax
524 jae L(shr_4_gobble)
525
526 movdqa 16(%esi), %xmm1
527 movdqa %xmm1, %xmm2
528 palignr $4,(%esi), %xmm1
529 pcmpeqb (%edi), %xmm1
530
531 movdqa 32(%esi), %xmm3
532 palignr $4,%xmm2, %xmm3
533 pcmpeqb 16(%edi), %xmm3
534
535 pand %xmm1, %xmm3
536 pmovmskb %xmm3, %edx
537 lea 32(%edi), %edi
538 lea 32(%esi), %esi
539 sub $0xffff, %edx
540 jnz L(exit)
541 lea (%ecx, %edi,1), %eax
542 lea 4(%ecx, %esi,1), %edx
543 POP (%edi)
544 POP (%esi)
545 jmp L(less48bytes)
546
Bruce Beare124a5422010-10-11 12:24:41 -0700547 cfi_restore_state
548 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800549 ALIGN (4)
550L(shr_4_gobble):
551 sub $32, %ecx
552 movdqa 16(%esi), %xmm0
553 palignr $4,(%esi), %xmm0
554 pcmpeqb (%edi), %xmm0
555
556 movdqa 32(%esi), %xmm3
557 palignr $4,16(%esi), %xmm3
558 pcmpeqb 16(%edi), %xmm3
559
560L(shr_4_gobble_loop):
561 pand %xmm0, %xmm3
562 sub $32, %ecx
563 pmovmskb %xmm3, %edx
564 movdqa %xmm0, %xmm1
565
566 movdqa 64(%esi), %xmm3
567 palignr $4,48(%esi), %xmm3
568 sbb $0xffff, %edx
569 movdqa 48(%esi), %xmm0
570 palignr $4,32(%esi), %xmm0
571 pcmpeqb 32(%edi), %xmm0
572 lea 32(%esi), %esi
573 pcmpeqb 48(%edi), %xmm3
574
575 lea 32(%edi), %edi
576 jz L(shr_4_gobble_loop)
577 pand %xmm0, %xmm3
578
579 cmp $0, %ecx
580 jge L(shr_4_gobble_next)
581 inc %edx
582 add $32, %ecx
583L(shr_4_gobble_next):
584 test %edx, %edx
585 jnz L(exit)
586
587 pmovmskb %xmm3, %edx
588 movdqa %xmm0, %xmm1
589 lea 32(%edi), %edi
590 lea 32(%esi), %esi
591 sub $0xffff, %edx
592 jnz L(exit)
593
594 lea (%ecx, %edi,1), %eax
595 lea 4(%ecx, %esi,1), %edx
596 POP (%edi)
597 POP (%esi)
598 jmp L(less48bytes)
599
Bruce Beare124a5422010-10-11 12:24:41 -0700600 cfi_restore_state
601 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800602 ALIGN (4)
603L(shr_5):
604 cmp $80, %ecx
605 lea -48(%ecx), %ecx
606 mov %edx, %eax
607 jae L(shr_5_gobble)
608
609 movdqa 16(%esi), %xmm1
610 movdqa %xmm1, %xmm2
611 palignr $5,(%esi), %xmm1
612 pcmpeqb (%edi), %xmm1
613
614 movdqa 32(%esi), %xmm3
615 palignr $5,%xmm2, %xmm3
616 pcmpeqb 16(%edi), %xmm3
617
618 pand %xmm1, %xmm3
619 pmovmskb %xmm3, %edx
620 lea 32(%edi), %edi
621 lea 32(%esi), %esi
622 sub $0xffff, %edx
623 jnz L(exit)
624 lea (%ecx, %edi,1), %eax
625 lea 5(%ecx, %esi,1), %edx
626 POP (%edi)
627 POP (%esi)
628 jmp L(less48bytes)
629
Bruce Beare124a5422010-10-11 12:24:41 -0700630 cfi_restore_state
631 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800632 ALIGN (4)
633L(shr_5_gobble):
634 sub $32, %ecx
635 movdqa 16(%esi), %xmm0
636 palignr $5,(%esi), %xmm0
637 pcmpeqb (%edi), %xmm0
638
639 movdqa 32(%esi), %xmm3
640 palignr $5,16(%esi), %xmm3
641 pcmpeqb 16(%edi), %xmm3
642
643L(shr_5_gobble_loop):
644 pand %xmm0, %xmm3
645 sub $32, %ecx
646 pmovmskb %xmm3, %edx
647 movdqa %xmm0, %xmm1
648
649 movdqa 64(%esi), %xmm3
650 palignr $5,48(%esi), %xmm3
651 sbb $0xffff, %edx
652 movdqa 48(%esi), %xmm0
653 palignr $5,32(%esi), %xmm0
654 pcmpeqb 32(%edi), %xmm0
655 lea 32(%esi), %esi
656 pcmpeqb 48(%edi), %xmm3
657
658 lea 32(%edi), %edi
659 jz L(shr_5_gobble_loop)
660 pand %xmm0, %xmm3
661
662 cmp $0, %ecx
663 jge L(shr_5_gobble_next)
664 inc %edx
665 add $32, %ecx
666L(shr_5_gobble_next):
667 test %edx, %edx
668 jnz L(exit)
669
670 pmovmskb %xmm3, %edx
671 movdqa %xmm0, %xmm1
672 lea 32(%edi), %edi
673 lea 32(%esi), %esi
674 sub $0xffff, %edx
675 jnz L(exit)
676
677 lea (%ecx, %edi,1), %eax
678 lea 5(%ecx, %esi,1), %edx
679 POP (%edi)
680 POP (%esi)
681 jmp L(less48bytes)
682
Bruce Beare124a5422010-10-11 12:24:41 -0700683 cfi_restore_state
684 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800685 ALIGN (4)
686L(shr_6):
687 cmp $80, %ecx
688 lea -48(%ecx), %ecx
689 mov %edx, %eax
690 jae L(shr_6_gobble)
691
692 movdqa 16(%esi), %xmm1
693 movdqa %xmm1, %xmm2
694 palignr $6,(%esi), %xmm1
695 pcmpeqb (%edi), %xmm1
696
697 movdqa 32(%esi), %xmm3
698 palignr $6,%xmm2, %xmm3
699 pcmpeqb 16(%edi), %xmm3
700
701 pand %xmm1, %xmm3
702 pmovmskb %xmm3, %edx
703 lea 32(%edi), %edi
704 lea 32(%esi), %esi
705 sub $0xffff, %edx
706 jnz L(exit)
707 lea (%ecx, %edi,1), %eax
708 lea 6(%ecx, %esi,1), %edx
709 POP (%edi)
710 POP (%esi)
711 jmp L(less48bytes)
712
Bruce Beare124a5422010-10-11 12:24:41 -0700713 cfi_restore_state
714 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800715 ALIGN (4)
716L(shr_6_gobble):
717 sub $32, %ecx
718 movdqa 16(%esi), %xmm0
719 palignr $6,(%esi), %xmm0
720 pcmpeqb (%edi), %xmm0
721
722 movdqa 32(%esi), %xmm3
723 palignr $6,16(%esi), %xmm3
724 pcmpeqb 16(%edi), %xmm3
725
726L(shr_6_gobble_loop):
727 pand %xmm0, %xmm3
728 sub $32, %ecx
729 pmovmskb %xmm3, %edx
730 movdqa %xmm0, %xmm1
731
732 movdqa 64(%esi), %xmm3
733 palignr $6,48(%esi), %xmm3
734 sbb $0xffff, %edx
735 movdqa 48(%esi), %xmm0
736 palignr $6,32(%esi), %xmm0
737 pcmpeqb 32(%edi), %xmm0
738 lea 32(%esi), %esi
739 pcmpeqb 48(%edi), %xmm3
740
741 lea 32(%edi), %edi
742 jz L(shr_6_gobble_loop)
743 pand %xmm0, %xmm3
744
745 cmp $0, %ecx
746 jge L(shr_6_gobble_next)
747 inc %edx
748 add $32, %ecx
749L(shr_6_gobble_next):
750 test %edx, %edx
751 jnz L(exit)
752
753 pmovmskb %xmm3, %edx
754 movdqa %xmm0, %xmm1
755 lea 32(%edi), %edi
756 lea 32(%esi), %esi
757 sub $0xffff, %edx
758 jnz L(exit)
759
760 lea (%ecx, %edi,1), %eax
761 lea 6(%ecx, %esi,1), %edx
762 POP (%edi)
763 POP (%esi)
764 jmp L(less48bytes)
765
Bruce Beare124a5422010-10-11 12:24:41 -0700766 cfi_restore_state
767 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800768 ALIGN (4)
769L(shr_7):
770 cmp $80, %ecx
771 lea -48(%ecx), %ecx
772 mov %edx, %eax
773 jae L(shr_7_gobble)
774
775 movdqa 16(%esi), %xmm1
776 movdqa %xmm1, %xmm2
777 palignr $7,(%esi), %xmm1
778 pcmpeqb (%edi), %xmm1
779
780 movdqa 32(%esi), %xmm3
781 palignr $7,%xmm2, %xmm3
782 pcmpeqb 16(%edi), %xmm3
783
784 pand %xmm1, %xmm3
785 pmovmskb %xmm3, %edx
786 lea 32(%edi), %edi
787 lea 32(%esi), %esi
788 sub $0xffff, %edx
789 jnz L(exit)
790 lea (%ecx, %edi,1), %eax
791 lea 7(%ecx, %esi,1), %edx
792 POP (%edi)
793 POP (%esi)
794 jmp L(less48bytes)
795
Bruce Beare124a5422010-10-11 12:24:41 -0700796 cfi_restore_state
797 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800798 ALIGN (4)
799L(shr_7_gobble):
800 sub $32, %ecx
801 movdqa 16(%esi), %xmm0
802 palignr $7,(%esi), %xmm0
803 pcmpeqb (%edi), %xmm0
804
805 movdqa 32(%esi), %xmm3
806 palignr $7,16(%esi), %xmm3
807 pcmpeqb 16(%edi), %xmm3
808
809L(shr_7_gobble_loop):
810 pand %xmm0, %xmm3
811 sub $32, %ecx
812 pmovmskb %xmm3, %edx
813 movdqa %xmm0, %xmm1
814
815 movdqa 64(%esi), %xmm3
816 palignr $7,48(%esi), %xmm3
817 sbb $0xffff, %edx
818 movdqa 48(%esi), %xmm0
819 palignr $7,32(%esi), %xmm0
820 pcmpeqb 32(%edi), %xmm0
821 lea 32(%esi), %esi
822 pcmpeqb 48(%edi), %xmm3
823
824 lea 32(%edi), %edi
825 jz L(shr_7_gobble_loop)
826 pand %xmm0, %xmm3
827
828 cmp $0, %ecx
829 jge L(shr_7_gobble_next)
830 inc %edx
831 add $32, %ecx
832L(shr_7_gobble_next):
833 test %edx, %edx
834 jnz L(exit)
835
836 pmovmskb %xmm3, %edx
837 movdqa %xmm0, %xmm1
838 lea 32(%edi), %edi
839 lea 32(%esi), %esi
840 sub $0xffff, %edx
841 jnz L(exit)
842
843 lea (%ecx, %edi,1), %eax
844 lea 7(%ecx, %esi,1), %edx
845 POP (%edi)
846 POP (%esi)
847 jmp L(less48bytes)
848
Bruce Beare124a5422010-10-11 12:24:41 -0700849 cfi_restore_state
850 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800851 ALIGN (4)
852L(shr_8):
853 cmp $80, %ecx
854 lea -48(%ecx), %ecx
855 mov %edx, %eax
856 jae L(shr_8_gobble)
857
858 movdqa 16(%esi), %xmm1
859 movdqa %xmm1, %xmm2
860 palignr $8,(%esi), %xmm1
861 pcmpeqb (%edi), %xmm1
862
863 movdqa 32(%esi), %xmm3
864 palignr $8,%xmm2, %xmm3
865 pcmpeqb 16(%edi), %xmm3
866
867 pand %xmm1, %xmm3
868 pmovmskb %xmm3, %edx
869 lea 32(%edi), %edi
870 lea 32(%esi), %esi
871 sub $0xffff, %edx
872 jnz L(exit)
873 lea (%ecx, %edi,1), %eax
874 lea 8(%ecx, %esi,1), %edx
875 POP (%edi)
876 POP (%esi)
877 jmp L(less48bytes)
878
Bruce Beare124a5422010-10-11 12:24:41 -0700879 cfi_restore_state
880 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800881 ALIGN (4)
882L(shr_8_gobble):
883 sub $32, %ecx
884 movdqa 16(%esi), %xmm0
885 palignr $8,(%esi), %xmm0
886 pcmpeqb (%edi), %xmm0
887
888 movdqa 32(%esi), %xmm3
889 palignr $8,16(%esi), %xmm3
890 pcmpeqb 16(%edi), %xmm3
891
892L(shr_8_gobble_loop):
893 pand %xmm0, %xmm3
894 sub $32, %ecx
895 pmovmskb %xmm3, %edx
896 movdqa %xmm0, %xmm1
897
898 movdqa 64(%esi), %xmm3
899 palignr $8,48(%esi), %xmm3
900 sbb $0xffff, %edx
901 movdqa 48(%esi), %xmm0
902 palignr $8,32(%esi), %xmm0
903 pcmpeqb 32(%edi), %xmm0
904 lea 32(%esi), %esi
905 pcmpeqb 48(%edi), %xmm3
906
907 lea 32(%edi), %edi
908 jz L(shr_8_gobble_loop)
909 pand %xmm0, %xmm3
910
911 cmp $0, %ecx
912 jge L(shr_8_gobble_next)
913 inc %edx
914 add $32, %ecx
915L(shr_8_gobble_next):
916 test %edx, %edx
917 jnz L(exit)
918
919 pmovmskb %xmm3, %edx
920 movdqa %xmm0, %xmm1
921 lea 32(%edi), %edi
922 lea 32(%esi), %esi
923 sub $0xffff, %edx
924 jnz L(exit)
925
926 lea (%ecx, %edi,1), %eax
927 lea 8(%ecx, %esi,1), %edx
928 POP (%edi)
929 POP (%esi)
930 jmp L(less48bytes)
931
Bruce Beare124a5422010-10-11 12:24:41 -0700932 cfi_restore_state
933 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800934 ALIGN (4)
935L(shr_9):
936 cmp $80, %ecx
937 lea -48(%ecx), %ecx
938 mov %edx, %eax
939 jae L(shr_9_gobble)
940
941 movdqa 16(%esi), %xmm1
942 movdqa %xmm1, %xmm2
943 palignr $9,(%esi), %xmm1
944 pcmpeqb (%edi), %xmm1
945
946 movdqa 32(%esi), %xmm3
947 palignr $9,%xmm2, %xmm3
948 pcmpeqb 16(%edi), %xmm3
949
950 pand %xmm1, %xmm3
951 pmovmskb %xmm3, %edx
952 lea 32(%edi), %edi
953 lea 32(%esi), %esi
954 sub $0xffff, %edx
955 jnz L(exit)
956 lea (%ecx, %edi,1), %eax
957 lea 9(%ecx, %esi,1), %edx
958 POP (%edi)
959 POP (%esi)
960 jmp L(less48bytes)
961
Bruce Beare124a5422010-10-11 12:24:41 -0700962 cfi_restore_state
963 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800964 ALIGN (4)
965L(shr_9_gobble):
966 sub $32, %ecx
967 movdqa 16(%esi), %xmm0
968 palignr $9,(%esi), %xmm0
969 pcmpeqb (%edi), %xmm0
970
971 movdqa 32(%esi), %xmm3
972 palignr $9,16(%esi), %xmm3
973 pcmpeqb 16(%edi), %xmm3
974
975L(shr_9_gobble_loop):
976 pand %xmm0, %xmm3
977 sub $32, %ecx
978 pmovmskb %xmm3, %edx
979 movdqa %xmm0, %xmm1
980
981 movdqa 64(%esi), %xmm3
982 palignr $9,48(%esi), %xmm3
983 sbb $0xffff, %edx
984 movdqa 48(%esi), %xmm0
985 palignr $9,32(%esi), %xmm0
986 pcmpeqb 32(%edi), %xmm0
987 lea 32(%esi), %esi
988 pcmpeqb 48(%edi), %xmm3
989
990 lea 32(%edi), %edi
991 jz L(shr_9_gobble_loop)
992 pand %xmm0, %xmm3
993
994 cmp $0, %ecx
995 jge L(shr_9_gobble_next)
996 inc %edx
997 add $32, %ecx
998L(shr_9_gobble_next):
999 test %edx, %edx
1000 jnz L(exit)
1001
1002 pmovmskb %xmm3, %edx
1003 movdqa %xmm0, %xmm1
1004 lea 32(%edi), %edi
1005 lea 32(%esi), %esi
1006 sub $0xffff, %edx
1007 jnz L(exit)
1008
1009 lea (%ecx, %edi,1), %eax
1010 lea 9(%ecx, %esi,1), %edx
1011 POP (%edi)
1012 POP (%esi)
1013 jmp L(less48bytes)
1014
Bruce Beare124a5422010-10-11 12:24:41 -07001015 cfi_restore_state
1016 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001017 ALIGN (4)
1018L(shr_10):
1019 cmp $80, %ecx
1020 lea -48(%ecx), %ecx
1021 mov %edx, %eax
1022 jae L(shr_10_gobble)
1023
1024 movdqa 16(%esi), %xmm1
1025 movdqa %xmm1, %xmm2
1026 palignr $10, (%esi), %xmm1
1027 pcmpeqb (%edi), %xmm1
1028
1029 movdqa 32(%esi), %xmm3
1030 palignr $10,%xmm2, %xmm3
1031 pcmpeqb 16(%edi), %xmm3
1032
1033 pand %xmm1, %xmm3
1034 pmovmskb %xmm3, %edx
1035 lea 32(%edi), %edi
1036 lea 32(%esi), %esi
1037 sub $0xffff, %edx
1038 jnz L(exit)
1039 lea (%ecx, %edi,1), %eax
1040 lea 10(%ecx, %esi,1), %edx
1041 POP (%edi)
1042 POP (%esi)
1043 jmp L(less48bytes)
1044
Bruce Beare124a5422010-10-11 12:24:41 -07001045 cfi_restore_state
1046 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001047 ALIGN (4)
1048L(shr_10_gobble):
1049 sub $32, %ecx
1050 movdqa 16(%esi), %xmm0
1051 palignr $10, (%esi), %xmm0
1052 pcmpeqb (%edi), %xmm0
1053
1054 movdqa 32(%esi), %xmm3
1055 palignr $10, 16(%esi), %xmm3
1056 pcmpeqb 16(%edi), %xmm3
1057
1058L(shr_10_gobble_loop):
1059 pand %xmm0, %xmm3
1060 sub $32, %ecx
1061 pmovmskb %xmm3, %edx
1062 movdqa %xmm0, %xmm1
1063
1064 movdqa 64(%esi), %xmm3
1065 palignr $10,48(%esi), %xmm3
1066 sbb $0xffff, %edx
1067 movdqa 48(%esi), %xmm0
1068 palignr $10,32(%esi), %xmm0
1069 pcmpeqb 32(%edi), %xmm0
1070 lea 32(%esi), %esi
1071 pcmpeqb 48(%edi), %xmm3
1072
1073 lea 32(%edi), %edi
1074 jz L(shr_10_gobble_loop)
1075 pand %xmm0, %xmm3
1076
1077 cmp $0, %ecx
1078 jge L(shr_10_gobble_next)
1079 inc %edx
1080 add $32, %ecx
1081L(shr_10_gobble_next):
1082 test %edx, %edx
1083 jnz L(exit)
1084
1085 pmovmskb %xmm3, %edx
1086 movdqa %xmm0, %xmm1
1087 lea 32(%edi), %edi
1088 lea 32(%esi), %esi
1089 sub $0xffff, %edx
1090 jnz L(exit)
1091
1092 lea (%ecx, %edi,1), %eax
1093 lea 10(%ecx, %esi,1), %edx
1094 POP (%edi)
1095 POP (%esi)
1096 jmp L(less48bytes)
1097
Bruce Beare124a5422010-10-11 12:24:41 -07001098 cfi_restore_state
1099 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001100 ALIGN (4)
1101L(shr_11):
1102 cmp $80, %ecx
1103 lea -48(%ecx), %ecx
1104 mov %edx, %eax
1105 jae L(shr_11_gobble)
1106
1107 movdqa 16(%esi), %xmm1
1108 movdqa %xmm1, %xmm2
1109 palignr $11, (%esi), %xmm1
1110 pcmpeqb (%edi), %xmm1
1111
1112 movdqa 32(%esi), %xmm3
1113 palignr $11, %xmm2, %xmm3
1114 pcmpeqb 16(%edi), %xmm3
1115
1116 pand %xmm1, %xmm3
1117 pmovmskb %xmm3, %edx
1118 lea 32(%edi), %edi
1119 lea 32(%esi), %esi
1120 sub $0xffff, %edx
1121 jnz L(exit)
1122 lea (%ecx, %edi,1), %eax
1123 lea 11(%ecx, %esi,1), %edx
1124 POP (%edi)
1125 POP (%esi)
1126 jmp L(less48bytes)
1127
Bruce Beare124a5422010-10-11 12:24:41 -07001128 cfi_restore_state
1129 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001130 ALIGN (4)
1131L(shr_11_gobble):
1132 sub $32, %ecx
1133 movdqa 16(%esi), %xmm0
1134 palignr $11, (%esi), %xmm0
1135 pcmpeqb (%edi), %xmm0
1136
1137 movdqa 32(%esi), %xmm3
1138 palignr $11, 16(%esi), %xmm3
1139 pcmpeqb 16(%edi), %xmm3
1140
1141L(shr_11_gobble_loop):
1142 pand %xmm0, %xmm3
1143 sub $32, %ecx
1144 pmovmskb %xmm3, %edx
1145 movdqa %xmm0, %xmm1
1146
1147 movdqa 64(%esi), %xmm3
1148 palignr $11,48(%esi), %xmm3
1149 sbb $0xffff, %edx
1150 movdqa 48(%esi), %xmm0
1151 palignr $11,32(%esi), %xmm0
1152 pcmpeqb 32(%edi), %xmm0
1153 lea 32(%esi), %esi
1154 pcmpeqb 48(%edi), %xmm3
1155
1156 lea 32(%edi), %edi
1157 jz L(shr_11_gobble_loop)
1158 pand %xmm0, %xmm3
1159
1160 cmp $0, %ecx
1161 jge L(shr_11_gobble_next)
1162 inc %edx
1163 add $32, %ecx
1164L(shr_11_gobble_next):
1165 test %edx, %edx
1166 jnz L(exit)
1167
1168 pmovmskb %xmm3, %edx
1169 movdqa %xmm0, %xmm1
1170 lea 32(%edi), %edi
1171 lea 32(%esi), %esi
1172 sub $0xffff, %edx
1173 jnz L(exit)
1174
1175 lea (%ecx, %edi,1), %eax
1176 lea 11(%ecx, %esi,1), %edx
1177 POP (%edi)
1178 POP (%esi)
1179 jmp L(less48bytes)
1180
Bruce Beare124a5422010-10-11 12:24:41 -07001181 cfi_restore_state
1182 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001183 ALIGN (4)
1184L(shr_12):
1185 cmp $80, %ecx
1186 lea -48(%ecx), %ecx
1187 mov %edx, %eax
1188 jae L(shr_12_gobble)
1189
1190 movdqa 16(%esi), %xmm1
1191 movdqa %xmm1, %xmm2
1192 palignr $12, (%esi), %xmm1
1193 pcmpeqb (%edi), %xmm1
1194
1195 movdqa 32(%esi), %xmm3
1196 palignr $12, %xmm2, %xmm3
1197 pcmpeqb 16(%edi), %xmm3
1198
1199 pand %xmm1, %xmm3
1200 pmovmskb %xmm3, %edx
1201 lea 32(%edi), %edi
1202 lea 32(%esi), %esi
1203 sub $0xffff, %edx
1204 jnz L(exit)
1205 lea (%ecx, %edi,1), %eax
1206 lea 12(%ecx, %esi,1), %edx
1207 POP (%edi)
1208 POP (%esi)
1209 jmp L(less48bytes)
1210
Bruce Beare124a5422010-10-11 12:24:41 -07001211 cfi_restore_state
1212 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001213 ALIGN (4)
1214L(shr_12_gobble):
1215 sub $32, %ecx
1216 movdqa 16(%esi), %xmm0
1217 palignr $12, (%esi), %xmm0
1218 pcmpeqb (%edi), %xmm0
1219
1220 movdqa 32(%esi), %xmm3
1221 palignr $12, 16(%esi), %xmm3
1222 pcmpeqb 16(%edi), %xmm3
1223
1224L(shr_12_gobble_loop):
1225 pand %xmm0, %xmm3
1226 sub $32, %ecx
1227 pmovmskb %xmm3, %edx
1228 movdqa %xmm0, %xmm1
1229
1230 movdqa 64(%esi), %xmm3
1231 palignr $12,48(%esi), %xmm3
1232 sbb $0xffff, %edx
1233 movdqa 48(%esi), %xmm0
1234 palignr $12,32(%esi), %xmm0
1235 pcmpeqb 32(%edi), %xmm0
1236 lea 32(%esi), %esi
1237 pcmpeqb 48(%edi), %xmm3
1238
1239 lea 32(%edi), %edi
1240 jz L(shr_12_gobble_loop)
1241 pand %xmm0, %xmm3
1242
1243 cmp $0, %ecx
1244 jge L(shr_12_gobble_next)
1245 inc %edx
1246 add $32, %ecx
1247L(shr_12_gobble_next):
1248 test %edx, %edx
1249 jnz L(exit)
1250
1251 pmovmskb %xmm3, %edx
1252 movdqa %xmm0, %xmm1
1253 lea 32(%edi), %edi
1254 lea 32(%esi), %esi
1255 sub $0xffff, %edx
1256 jnz L(exit)
1257
1258 lea (%ecx, %edi,1), %eax
1259 lea 12(%ecx, %esi,1), %edx
1260 POP (%edi)
1261 POP (%esi)
1262 jmp L(less48bytes)
1263
Bruce Beare124a5422010-10-11 12:24:41 -07001264 cfi_restore_state
1265 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001266 ALIGN (4)
1267L(shr_13):
1268 cmp $80, %ecx
1269 lea -48(%ecx), %ecx
1270 mov %edx, %eax
1271 jae L(shr_13_gobble)
1272
1273 movdqa 16(%esi), %xmm1
1274 movdqa %xmm1, %xmm2
1275 palignr $13, (%esi), %xmm1
1276 pcmpeqb (%edi), %xmm1
1277
1278 movdqa 32(%esi), %xmm3
1279 palignr $13, %xmm2, %xmm3
1280 pcmpeqb 16(%edi), %xmm3
1281
1282 pand %xmm1, %xmm3
1283 pmovmskb %xmm3, %edx
1284 lea 32(%edi), %edi
1285 lea 32(%esi), %esi
1286 sub $0xffff, %edx
1287 jnz L(exit)
1288 lea (%ecx, %edi,1), %eax
1289 lea 13(%ecx, %esi,1), %edx
1290 POP (%edi)
1291 POP (%esi)
1292 jmp L(less48bytes)
1293
Bruce Beare124a5422010-10-11 12:24:41 -07001294 cfi_restore_state
1295 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001296 ALIGN (4)
1297L(shr_13_gobble):
1298 sub $32, %ecx
1299 movdqa 16(%esi), %xmm0
1300 palignr $13, (%esi), %xmm0
1301 pcmpeqb (%edi), %xmm0
1302
1303 movdqa 32(%esi), %xmm3
1304 palignr $13, 16(%esi), %xmm3
1305 pcmpeqb 16(%edi), %xmm3
1306
1307L(shr_13_gobble_loop):
1308 pand %xmm0, %xmm3
1309 sub $32, %ecx
1310 pmovmskb %xmm3, %edx
1311 movdqa %xmm0, %xmm1
1312
1313 movdqa 64(%esi), %xmm3
1314 palignr $13,48(%esi), %xmm3
1315 sbb $0xffff, %edx
1316 movdqa 48(%esi), %xmm0
1317 palignr $13,32(%esi), %xmm0
1318 pcmpeqb 32(%edi), %xmm0
1319 lea 32(%esi), %esi
1320 pcmpeqb 48(%edi), %xmm3
1321
1322 lea 32(%edi), %edi
1323 jz L(shr_13_gobble_loop)
1324 pand %xmm0, %xmm3
1325
1326 cmp $0, %ecx
1327 jge L(shr_13_gobble_next)
1328 inc %edx
1329 add $32, %ecx
1330L(shr_13_gobble_next):
1331 test %edx, %edx
1332 jnz L(exit)
1333
1334 pmovmskb %xmm3, %edx
1335 movdqa %xmm0, %xmm1
1336 lea 32(%edi), %edi
1337 lea 32(%esi), %esi
1338 sub $0xffff, %edx
1339 jnz L(exit)
1340
1341 lea (%ecx, %edi,1), %eax
1342 lea 13(%ecx, %esi,1), %edx
1343 POP (%edi)
1344 POP (%esi)
1345 jmp L(less48bytes)
1346
Bruce Beare124a5422010-10-11 12:24:41 -07001347 cfi_restore_state
1348 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001349 ALIGN (4)
1350L(shr_14):
1351 cmp $80, %ecx
1352 lea -48(%ecx), %ecx
1353 mov %edx, %eax
1354 jae L(shr_14_gobble)
1355
1356 movdqa 16(%esi), %xmm1
1357 movdqa %xmm1, %xmm2
1358 palignr $14, (%esi), %xmm1
1359 pcmpeqb (%edi), %xmm1
1360
1361 movdqa 32(%esi), %xmm3
1362 palignr $14, %xmm2, %xmm3
1363 pcmpeqb 16(%edi), %xmm3
1364
1365 pand %xmm1, %xmm3
1366 pmovmskb %xmm3, %edx
1367 lea 32(%edi), %edi
1368 lea 32(%esi), %esi
1369 sub $0xffff, %edx
1370 jnz L(exit)
1371 lea (%ecx, %edi,1), %eax
1372 lea 14(%ecx, %esi,1), %edx
1373 POP (%edi)
1374 POP (%esi)
1375 jmp L(less48bytes)
1376
Bruce Beare124a5422010-10-11 12:24:41 -07001377 cfi_restore_state
1378 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001379 ALIGN (4)
1380L(shr_14_gobble):
1381 sub $32, %ecx
1382 movdqa 16(%esi), %xmm0
1383 palignr $14, (%esi), %xmm0
1384 pcmpeqb (%edi), %xmm0
1385
1386 movdqa 32(%esi), %xmm3
1387 palignr $14, 16(%esi), %xmm3
1388 pcmpeqb 16(%edi), %xmm3
1389
1390L(shr_14_gobble_loop):
1391 pand %xmm0, %xmm3
1392 sub $32, %ecx
1393 pmovmskb %xmm3, %edx
1394 movdqa %xmm0, %xmm1
1395
1396 movdqa 64(%esi), %xmm3
1397 palignr $14,48(%esi), %xmm3
1398 sbb $0xffff, %edx
1399 movdqa 48(%esi), %xmm0
1400 palignr $14,32(%esi), %xmm0
1401 pcmpeqb 32(%edi), %xmm0
1402 lea 32(%esi), %esi
1403 pcmpeqb 48(%edi), %xmm3
1404
1405 lea 32(%edi), %edi
1406 jz L(shr_14_gobble_loop)
1407 pand %xmm0, %xmm3
1408
1409 cmp $0, %ecx
1410 jge L(shr_14_gobble_next)
1411 inc %edx
1412 add $32, %ecx
1413L(shr_14_gobble_next):
1414 test %edx, %edx
1415 jnz L(exit)
1416
1417 pmovmskb %xmm3, %edx
1418 movdqa %xmm0, %xmm1
1419 lea 32(%edi), %edi
1420 lea 32(%esi), %esi
1421 sub $0xffff, %edx
1422 jnz L(exit)
1423
1424 lea (%ecx, %edi,1), %eax
1425 lea 14(%ecx, %esi,1), %edx
1426 POP (%edi)
1427 POP (%esi)
1428 jmp L(less48bytes)
1429
Bruce Beare124a5422010-10-11 12:24:41 -07001430 cfi_restore_state
1431 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001432 ALIGN (4)
1433L(shr_15):
1434 cmp $80, %ecx
1435 lea -48(%ecx), %ecx
1436 mov %edx, %eax
1437 jae L(shr_15_gobble)
1438
1439 movdqa 16(%esi), %xmm1
1440 movdqa %xmm1, %xmm2
1441 palignr $15, (%esi), %xmm1
1442 pcmpeqb (%edi), %xmm1
1443
1444 movdqa 32(%esi), %xmm3
1445 palignr $15, %xmm2, %xmm3
1446 pcmpeqb 16(%edi), %xmm3
1447
1448 pand %xmm1, %xmm3
1449 pmovmskb %xmm3, %edx
1450 lea 32(%edi), %edi
1451 lea 32(%esi), %esi
1452 sub $0xffff, %edx
1453 jnz L(exit)
1454 lea (%ecx, %edi,1), %eax
1455 lea 15(%ecx, %esi,1), %edx
1456 POP (%edi)
1457 POP (%esi)
1458 jmp L(less48bytes)
1459
Bruce Beare124a5422010-10-11 12:24:41 -07001460 cfi_restore_state
1461 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001462 ALIGN (4)
1463L(shr_15_gobble):
1464 sub $32, %ecx
1465 movdqa 16(%esi), %xmm0
1466 palignr $15, (%esi), %xmm0
1467 pcmpeqb (%edi), %xmm0
1468
1469 movdqa 32(%esi), %xmm3
1470 palignr $15, 16(%esi), %xmm3
1471 pcmpeqb 16(%edi), %xmm3
1472
1473L(shr_15_gobble_loop):
1474 pand %xmm0, %xmm3
1475 sub $32, %ecx
1476 pmovmskb %xmm3, %edx
1477 movdqa %xmm0, %xmm1
1478
1479 movdqa 64(%esi), %xmm3
1480 palignr $15,48(%esi), %xmm3
1481 sbb $0xffff, %edx
1482 movdqa 48(%esi), %xmm0
1483 palignr $15,32(%esi), %xmm0
1484 pcmpeqb 32(%edi), %xmm0
1485 lea 32(%esi), %esi
1486 pcmpeqb 48(%edi), %xmm3
1487
1488 lea 32(%edi), %edi
1489 jz L(shr_15_gobble_loop)
1490 pand %xmm0, %xmm3
1491
1492 cmp $0, %ecx
1493 jge L(shr_15_gobble_next)
1494 inc %edx
1495 add $32, %ecx
1496L(shr_15_gobble_next):
1497 test %edx, %edx
1498 jnz L(exit)
1499
1500 pmovmskb %xmm3, %edx
1501 movdqa %xmm0, %xmm1
1502 lea 32(%edi), %edi
1503 lea 32(%esi), %esi
1504 sub $0xffff, %edx
1505 jnz L(exit)
1506
1507 lea (%ecx, %edi,1), %eax
1508 lea 15(%ecx, %esi,1), %edx
1509 POP (%edi)
1510 POP (%esi)
1511 jmp L(less48bytes)
1512
Bruce Beare124a5422010-10-11 12:24:41 -07001513 cfi_restore_state
1514 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08001515 ALIGN (4)
1516L(exit):
1517 pmovmskb %xmm1, %ebx
1518 sub $0xffff, %ebx
1519 jz L(first16bytes)
1520 lea -16(%esi), %esi
1521 lea -16(%edi), %edi
1522 mov %ebx, %edx
1523L(first16bytes):
1524 add %eax, %esi
1525L(less16bytes):
1526 test %dl, %dl
1527 jz L(next_24_bytes)
1528
1529 test $0x01, %dl
1530 jnz L(Byte16)
1531
1532 test $0x02, %dl
1533 jnz L(Byte17)
1534
1535 test $0x04, %dl
1536 jnz L(Byte18)
1537
1538 test $0x08, %dl
1539 jnz L(Byte19)
1540
1541 test $0x10, %dl
1542 jnz L(Byte20)
1543
1544 test $0x20, %dl
1545 jnz L(Byte21)
1546
1547 test $0x40, %dl
1548 jnz L(Byte22)
1549L(Byte23):
1550 movzbl -9(%edi), %eax
1551 movzbl -9(%esi), %edx
1552 sub %edx, %eax
1553 RETURN
1554
1555 ALIGN (4)
1556L(Byte16):
1557 movzbl -16(%edi), %eax
1558 movzbl -16(%esi), %edx
1559 sub %edx, %eax
1560 RETURN
1561
1562 ALIGN (4)
1563L(Byte17):
1564 movzbl -15(%edi), %eax
1565 movzbl -15(%esi), %edx
1566 sub %edx, %eax
1567 RETURN
1568
1569 ALIGN (4)
1570L(Byte18):
1571 movzbl -14(%edi), %eax
1572 movzbl -14(%esi), %edx
1573 sub %edx, %eax
1574 RETURN
1575
1576 ALIGN (4)
1577L(Byte19):
1578 movzbl -13(%edi), %eax
1579 movzbl -13(%esi), %edx
1580 sub %edx, %eax
1581 RETURN
1582
1583 ALIGN (4)
1584L(Byte20):
1585 movzbl -12(%edi), %eax
1586 movzbl -12(%esi), %edx
1587 sub %edx, %eax
1588 RETURN
1589
1590 ALIGN (4)
1591L(Byte21):
1592 movzbl -11(%edi), %eax
1593 movzbl -11(%esi), %edx
1594 sub %edx, %eax
1595 RETURN
1596
1597 ALIGN (4)
1598L(Byte22):
1599 movzbl -10(%edi), %eax
1600 movzbl -10(%esi), %edx
1601 sub %edx, %eax
1602 RETURN
1603
1604 ALIGN (4)
1605L(next_24_bytes):
1606 lea 8(%edi), %edi
1607 lea 8(%esi), %esi
1608 test $0x01, %dh
1609 jnz L(Byte16)
1610
1611 test $0x02, %dh
1612 jnz L(Byte17)
1613
1614 test $0x04, %dh
1615 jnz L(Byte18)
1616
1617 test $0x08, %dh
1618 jnz L(Byte19)
1619
1620 test $0x10, %dh
1621 jnz L(Byte20)
1622
1623 test $0x20, %dh
1624 jnz L(Byte21)
1625
1626 test $0x40, %dh
1627 jnz L(Byte22)
1628
1629 ALIGN (4)
1630L(Byte31):
1631 movzbl -9(%edi), %eax
1632 movzbl -9(%esi), %edx
1633 sub %edx, %eax
1634 RETURN_END
1635 CFI_PUSH (%ebx)
1636
1637 ALIGN (4)
1638L(more8bytes):
1639 cmp $16, %ecx
1640 jae L(more16bytes)
1641 cmp $8, %ecx
1642 je L(8bytes)
1643 cmp $9, %ecx
1644 je L(9bytes)
1645 cmp $10, %ecx
1646 je L(10bytes)
1647 cmp $11, %ecx
1648 je L(11bytes)
1649 cmp $12, %ecx
1650 je L(12bytes)
1651 cmp $13, %ecx
1652 je L(13bytes)
1653 cmp $14, %ecx
1654 je L(14bytes)
1655 jmp L(15bytes)
1656
1657 ALIGN (4)
1658L(more16bytes):
1659 cmp $24, %ecx
1660 jae L(more24bytes)
1661 cmp $16, %ecx
1662 je L(16bytes)
1663 cmp $17, %ecx
1664 je L(17bytes)
1665 cmp $18, %ecx
1666 je L(18bytes)
1667 cmp $19, %ecx
1668 je L(19bytes)
1669 cmp $20, %ecx
1670 je L(20bytes)
1671 cmp $21, %ecx
1672 je L(21bytes)
1673 cmp $22, %ecx
1674 je L(22bytes)
1675 jmp L(23bytes)
1676
1677 ALIGN (4)
1678L(more24bytes):
1679 cmp $32, %ecx
1680 jae L(more32bytes)
1681 cmp $24, %ecx
1682 je L(24bytes)
1683 cmp $25, %ecx
1684 je L(25bytes)
1685 cmp $26, %ecx
1686 je L(26bytes)
1687 cmp $27, %ecx
1688 je L(27bytes)
1689 cmp $28, %ecx
1690 je L(28bytes)
1691 cmp $29, %ecx
1692 je L(29bytes)
1693 cmp $30, %ecx
1694 je L(30bytes)
1695 jmp L(31bytes)
1696
1697 ALIGN (4)
1698L(more32bytes):
1699 cmp $40, %ecx
1700 jae L(more40bytes)
1701 cmp $32, %ecx
1702 je L(32bytes)
1703 cmp $33, %ecx
1704 je L(33bytes)
1705 cmp $34, %ecx
1706 je L(34bytes)
1707 cmp $35, %ecx
1708 je L(35bytes)
1709 cmp $36, %ecx
1710 je L(36bytes)
1711 cmp $37, %ecx
1712 je L(37bytes)
1713 cmp $38, %ecx
1714 je L(38bytes)
1715 jmp L(39bytes)
1716
1717 ALIGN (4)
1718L(more40bytes):
1719 cmp $40, %ecx
1720 je L(40bytes)
1721 cmp $41, %ecx
1722 je L(41bytes)
1723 cmp $42, %ecx
1724 je L(42bytes)
1725 cmp $43, %ecx
1726 je L(43bytes)
1727 cmp $44, %ecx
1728 je L(44bytes)
1729 cmp $45, %ecx
1730 je L(45bytes)
1731 cmp $46, %ecx
1732 je L(46bytes)
1733 jmp L(47bytes)
1734
1735 ALIGN (4)
1736L(less48bytes):
1737 cmp $8, %ecx
1738 jae L(more8bytes)
1739 cmp $2, %ecx
1740 je L(2bytes)
1741 cmp $3, %ecx
1742 je L(3bytes)
1743 cmp $4, %ecx
1744 je L(4bytes)
1745 cmp $5, %ecx
1746 je L(5bytes)
1747 cmp $6, %ecx
1748 je L(6bytes)
1749 jmp L(7bytes)
1750
1751
1752 ALIGN (4)
1753L(44bytes):
1754 mov -44(%eax), %ecx
1755 mov -44(%edx), %ebx
1756 cmp %ebx, %ecx
1757 jne L(find_diff)
1758L(40bytes):
1759 mov -40(%eax), %ecx
1760 mov -40(%edx), %ebx
1761 cmp %ebx, %ecx
1762 jne L(find_diff)
1763L(36bytes):
1764 mov -36(%eax), %ecx
1765 mov -36(%edx), %ebx
1766 cmp %ebx, %ecx
1767 jne L(find_diff)
1768L(32bytes):
1769 mov -32(%eax), %ecx
1770 mov -32(%edx), %ebx
1771 cmp %ebx, %ecx
1772 jne L(find_diff)
1773L(28bytes):
1774 mov -28(%eax), %ecx
1775 mov -28(%edx), %ebx
1776 cmp %ebx, %ecx
1777 jne L(find_diff)
1778L(24bytes):
1779 mov -24(%eax), %ecx
1780 mov -24(%edx), %ebx
1781 cmp %ebx, %ecx
1782 jne L(find_diff)
1783L(20bytes):
1784 mov -20(%eax), %ecx
1785 mov -20(%edx), %ebx
1786 cmp %ebx, %ecx
1787 jne L(find_diff)
1788L(16bytes):
1789 mov -16(%eax), %ecx
1790 mov -16(%edx), %ebx
1791 cmp %ebx, %ecx
1792 jne L(find_diff)
1793L(12bytes):
1794 mov -12(%eax), %ecx
1795 mov -12(%edx), %ebx
1796 cmp %ebx, %ecx
1797 jne L(find_diff)
1798L(8bytes):
1799 mov -8(%eax), %ecx
1800 mov -8(%edx), %ebx
1801 cmp %ebx, %ecx
1802 jne L(find_diff)
1803L(4bytes):
1804 mov -4(%eax), %ecx
1805 mov -4(%edx), %ebx
1806 cmp %ebx, %ecx
1807 mov $0, %eax
1808 jne L(find_diff)
1809 POP (%ebx)
1810 ret
1811 CFI_PUSH (%ebx)
1812
1813 ALIGN (4)
1814L(45bytes):
1815 mov -45(%eax), %ecx
1816 mov -45(%edx), %ebx
1817 cmp %ebx, %ecx
1818 jne L(find_diff)
1819L(41bytes):
1820 mov -41(%eax), %ecx
1821 mov -41(%edx), %ebx
1822 cmp %ebx, %ecx
1823 jne L(find_diff)
1824L(37bytes):
1825 mov -37(%eax), %ecx
1826 mov -37(%edx), %ebx
1827 cmp %ebx, %ecx
1828 jne L(find_diff)
1829L(33bytes):
1830 mov -33(%eax), %ecx
1831 mov -33(%edx), %ebx
1832 cmp %ebx, %ecx
1833 jne L(find_diff)
1834L(29bytes):
1835 mov -29(%eax), %ecx
1836 mov -29(%edx), %ebx
1837 cmp %ebx, %ecx
1838 jne L(find_diff)
1839L(25bytes):
1840 mov -25(%eax), %ecx
1841 mov -25(%edx), %ebx
1842 cmp %ebx, %ecx
1843 jne L(find_diff)
1844L(21bytes):
1845 mov -21(%eax), %ecx
1846 mov -21(%edx), %ebx
1847 cmp %ebx, %ecx
1848 jne L(find_diff)
1849L(17bytes):
1850 mov -17(%eax), %ecx
1851 mov -17(%edx), %ebx
1852 cmp %ebx, %ecx
1853 jne L(find_diff)
1854L(13bytes):
1855 mov -13(%eax), %ecx
1856 mov -13(%edx), %ebx
1857 cmp %ebx, %ecx
1858 jne L(find_diff)
1859L(9bytes):
1860 mov -9(%eax), %ecx
1861 mov -9(%edx), %ebx
1862 cmp %ebx, %ecx
1863 jne L(find_diff)
1864L(5bytes):
1865 mov -5(%eax), %ecx
1866 mov -5(%edx), %ebx
1867 cmp %ebx, %ecx
1868 jne L(find_diff)
1869 movzbl -1(%eax), %ecx
1870 cmp -1(%edx), %cl
1871 mov $0, %eax
1872 jne L(end)
1873 POP (%ebx)
1874 ret
1875 CFI_PUSH (%ebx)
1876
1877 ALIGN (4)
1878L(46bytes):
1879 mov -46(%eax), %ecx
1880 mov -46(%edx), %ebx
1881 cmp %ebx, %ecx
1882 jne L(find_diff)
1883L(42bytes):
1884 mov -42(%eax), %ecx
1885 mov -42(%edx), %ebx
1886 cmp %ebx, %ecx
1887 jne L(find_diff)
1888L(38bytes):
1889 mov -38(%eax), %ecx
1890 mov -38(%edx), %ebx
1891 cmp %ebx, %ecx
1892 jne L(find_diff)
1893L(34bytes):
1894 mov -34(%eax), %ecx
1895 mov -34(%edx), %ebx
1896 cmp %ebx, %ecx
1897 jne L(find_diff)
1898L(30bytes):
1899 mov -30(%eax), %ecx
1900 mov -30(%edx), %ebx
1901 cmp %ebx, %ecx
1902 jne L(find_diff)
1903L(26bytes):
1904 mov -26(%eax), %ecx
1905 mov -26(%edx), %ebx
1906 cmp %ebx, %ecx
1907 jne L(find_diff)
1908L(22bytes):
1909 mov -22(%eax), %ecx
1910 mov -22(%edx), %ebx
1911 cmp %ebx, %ecx
1912 jne L(find_diff)
1913L(18bytes):
1914 mov -18(%eax), %ecx
1915 mov -18(%edx), %ebx
1916 cmp %ebx, %ecx
1917 jne L(find_diff)
1918L(14bytes):
1919 mov -14(%eax), %ecx
1920 mov -14(%edx), %ebx
1921 cmp %ebx, %ecx
1922 jne L(find_diff)
1923L(10bytes):
1924 mov -10(%eax), %ecx
1925 mov -10(%edx), %ebx
1926 cmp %ebx, %ecx
1927 jne L(find_diff)
1928L(6bytes):
1929 mov -6(%eax), %ecx
1930 mov -6(%edx), %ebx
1931 cmp %ebx, %ecx
1932 jne L(find_diff)
1933L(2bytes):
1934 movzwl -2(%eax), %ecx
1935 movzwl -2(%edx), %ebx
1936 cmp %bl, %cl
1937 jne L(end)
1938 cmp %bh, %ch
1939 mov $0, %eax
1940 jne L(end)
1941 POP (%ebx)
1942 ret
1943 CFI_PUSH (%ebx)
1944
1945 ALIGN (4)
1946L(47bytes):
1947 movl -47(%eax), %ecx
1948 movl -47(%edx), %ebx
1949 cmp %ebx, %ecx
1950 jne L(find_diff)
1951L(43bytes):
1952 movl -43(%eax), %ecx
1953 movl -43(%edx), %ebx
1954 cmp %ebx, %ecx
1955 jne L(find_diff)
1956L(39bytes):
1957 movl -39(%eax), %ecx
1958 movl -39(%edx), %ebx
1959 cmp %ebx, %ecx
1960 jne L(find_diff)
1961L(35bytes):
1962 movl -35(%eax), %ecx
1963 movl -35(%edx), %ebx
1964 cmp %ebx, %ecx
1965 jne L(find_diff)
1966L(31bytes):
1967 movl -31(%eax), %ecx
1968 movl -31(%edx), %ebx
1969 cmp %ebx, %ecx
1970 jne L(find_diff)
1971L(27bytes):
1972 movl -27(%eax), %ecx
1973 movl -27(%edx), %ebx
1974 cmp %ebx, %ecx
1975 jne L(find_diff)
1976L(23bytes):
1977 movl -23(%eax), %ecx
1978 movl -23(%edx), %ebx
1979 cmp %ebx, %ecx
1980 jne L(find_diff)
1981L(19bytes):
1982 movl -19(%eax), %ecx
1983 movl -19(%edx), %ebx
1984 cmp %ebx, %ecx
1985 jne L(find_diff)
1986L(15bytes):
1987 movl -15(%eax), %ecx
1988 movl -15(%edx), %ebx
1989 cmp %ebx, %ecx
1990 jne L(find_diff)
1991L(11bytes):
1992 movl -11(%eax), %ecx
1993 movl -11(%edx), %ebx
1994 cmp %ebx, %ecx
1995 jne L(find_diff)
1996L(7bytes):
1997 movl -7(%eax), %ecx
1998 movl -7(%edx), %ebx
1999 cmp %ebx, %ecx
2000 jne L(find_diff)
2001L(3bytes):
2002 movzwl -3(%eax), %ecx
2003 movzwl -3(%edx), %ebx
2004 cmpb %bl, %cl
2005 jne L(end)
2006 cmp %bx, %cx
2007 jne L(end)
2008 movzbl -1(%eax), %eax
2009 cmpb -1(%edx), %al
2010 mov $0, %eax
2011 jne L(end)
2012 POP (%ebx)
2013 ret
2014 CFI_PUSH (%ebx)
2015
2016 ALIGN (4)
2017L(find_diff):
2018 cmpb %bl, %cl
2019 jne L(end)
2020 cmp %bx, %cx
2021 jne L(end)
2022 shr $16,%ecx
2023 shr $16,%ebx
2024 cmp %bl, %cl
2025 jne L(end)
2026 cmp %bx, %cx
2027L(end):
2028 POP (%ebx)
2029 mov $1, %eax
2030 ja L(bigger)
2031 neg %eax
2032L(bigger):
2033 ret
2034
2035END (MEMCMP)