blob: 17662fae6b3ce5d8abcb947e15ca43dd8efeb7a7 [file] [log] [blame]
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86.S"
18
19#define MEMCMP __memcmp16
20
21/* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */
22
23#ifndef L
24# define L(label) .L##label
25#endif
26
27#define CFI_PUSH(REG) \
28 CFI_ADJUST_CFA_OFFSET(4); \
29 CFI_REL_OFFSET(REG, 0)
30
31#define CFI_POP(REG) \
32 CFI_ADJUST_CFA_OFFSET(-4); \
33 CFI_RESTORE(REG)
34
35#define PUSH(REG) pushl REG; CFI_PUSH (REG)
36#define POP(REG) popl REG; CFI_POP (REG)
37
38#define PARMS 4
39#define BLK1 PARMS
40#define BLK2 BLK1+4
41#define LEN BLK2+4
42#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
43#define RETURN RETURN_END; CFI_RESTORE_STATE; CFI_REMEMBER_STATE
44
45DEFINE_FUNCTION MEMCMP
46 movl LEN(%esp), %ecx
47
48 shl $1, %ecx
49 jz L(zero)
50
51 movl BLK1(%esp), %eax
52 cmp $48, %ecx
53 movl BLK2(%esp), %edx
54 jae L(48bytesormore)
55
56 PUSH (%ebx)
57 add %ecx, %edx
58 add %ecx, %eax
59 jmp L(less48bytes)
60
61 CFI_POP (%ebx)
62
63 .p2align 4
64L(zero):
65 xor %eax, %eax
66 ret
67
68 .p2align 4
69L(48bytesormore):
70 PUSH (%ebx)
71 PUSH (%esi)
72 PUSH (%edi)
73 CFI_REMEMBER_STATE
74 movdqu (%eax), %xmm3
75 movdqu (%edx), %xmm0
76 movl %eax, %edi
77 movl %edx, %esi
78 pcmpeqb %xmm0, %xmm3
79 pmovmskb %xmm3, %edx
80 lea 16(%edi), %edi
81
82 sub $0xffff, %edx
83 lea 16(%esi), %esi
84 jnz L(less16bytes)
85 mov %edi, %edx
86 and $0xf, %edx
87 xor %edx, %edi
88 sub %edx, %esi
89 add %edx, %ecx
90 mov %esi, %edx
91 and $0xf, %edx
92 jz L(shr_0)
93 xor %edx, %esi
94
95 cmp $0, %edx
96 je L(shr_0)
97 cmp $2, %edx
98 je L(shr_2)
99 cmp $4, %edx
100 je L(shr_4)
101 cmp $6, %edx
102 je L(shr_6)
103 cmp $8, %edx
104 je L(shr_8)
105 cmp $10, %edx
106 je L(shr_10)
107 cmp $12, %edx
108 je L(shr_12)
109 jmp L(shr_14)
110
111 .p2align 4
112L(shr_0):
113 cmp $80, %ecx
114 jae L(shr_0_gobble)
115 lea -48(%ecx), %ecx
116 xor %eax, %eax
117 movaps (%esi), %xmm1
118 pcmpeqb (%edi), %xmm1
119 movaps 16(%esi), %xmm2
120 pcmpeqb 16(%edi), %xmm2
121 pand %xmm1, %xmm2
122 pmovmskb %xmm2, %edx
123 add $32, %edi
124 add $32, %esi
125 sub $0xffff, %edx
126 jnz L(exit)
127
128 lea (%ecx, %edi,1), %eax
129 lea (%ecx, %esi,1), %edx
130 POP (%edi)
131 POP (%esi)
132 jmp L(less48bytes)
133
134 CFI_RESTORE_STATE
135 CFI_REMEMBER_STATE
136 .p2align 4
137L(shr_0_gobble):
138 lea -48(%ecx), %ecx
139 movdqa (%esi), %xmm0
140 xor %eax, %eax
141 pcmpeqb (%edi), %xmm0
142 sub $32, %ecx
143 movdqa 16(%esi), %xmm2
144 pcmpeqb 16(%edi), %xmm2
145L(shr_0_gobble_loop):
146 pand %xmm0, %xmm2
147 sub $32, %ecx
148 pmovmskb %xmm2, %edx
149 movdqa %xmm0, %xmm1
150 movdqa 32(%esi), %xmm0
151 movdqa 48(%esi), %xmm2
152 sbb $0xffff, %edx
153 pcmpeqb 32(%edi), %xmm0
154 pcmpeqb 48(%edi), %xmm2
155 lea 32(%edi), %edi
156 lea 32(%esi), %esi
157 jz L(shr_0_gobble_loop)
158
159 pand %xmm0, %xmm2
160 cmp $0, %ecx
161 jge L(shr_0_gobble_loop_next)
162 inc %edx
163 add $32, %ecx
164L(shr_0_gobble_loop_next):
165 test %edx, %edx
166 jnz L(exit)
167
168 pmovmskb %xmm2, %edx
169 movdqa %xmm0, %xmm1
170 lea 32(%edi), %edi
171 lea 32(%esi), %esi
172 sub $0xffff, %edx
173 jnz L(exit)
174 lea (%ecx, %edi,1), %eax
175 lea (%ecx, %esi,1), %edx
176 POP (%edi)
177 POP (%esi)
178 jmp L(less48bytes)
179
180 CFI_RESTORE_STATE
181 CFI_REMEMBER_STATE
182 .p2align 4
183L(shr_2):
184 cmp $80, %ecx
185 lea -48(%ecx), %ecx
186 mov %edx, %eax
187 jae L(shr_2_gobble)
188
189 movdqa 16(%esi), %xmm1
190 movdqa %xmm1, %xmm2
191 palignr $2,(%esi), %xmm1
192 pcmpeqb (%edi), %xmm1
193
194 movdqa 32(%esi), %xmm3
195 palignr $2,%xmm2, %xmm3
196 pcmpeqb 16(%edi), %xmm3
197
198 pand %xmm1, %xmm3
199 pmovmskb %xmm3, %edx
200 lea 32(%edi), %edi
201 lea 32(%esi), %esi
202 sub $0xffff, %edx
203 jnz L(exit)
204 lea (%ecx, %edi,1), %eax
205 lea 2(%ecx, %esi,1), %edx
206 POP (%edi)
207 POP (%esi)
208 jmp L(less48bytes)
209
210 CFI_RESTORE_STATE
211 CFI_REMEMBER_STATE
212 .p2align 4
213L(shr_2_gobble):
214 sub $32, %ecx
215 movdqa 16(%esi), %xmm0
216 palignr $2,(%esi), %xmm0
217 pcmpeqb (%edi), %xmm0
218
219 movdqa 32(%esi), %xmm3
220 palignr $2,16(%esi), %xmm3
221 pcmpeqb 16(%edi), %xmm3
222
223L(shr_2_gobble_loop):
224 pand %xmm0, %xmm3
225 sub $32, %ecx
226 pmovmskb %xmm3, %edx
227 movdqa %xmm0, %xmm1
228
229 movdqa 64(%esi), %xmm3
230 palignr $2,48(%esi), %xmm3
231 sbb $0xffff, %edx
232 movdqa 48(%esi), %xmm0
233 palignr $2,32(%esi), %xmm0
234 pcmpeqb 32(%edi), %xmm0
235 lea 32(%esi), %esi
236 pcmpeqb 48(%edi), %xmm3
237
238 lea 32(%edi), %edi
239 jz L(shr_2_gobble_loop)
240 pand %xmm0, %xmm3
241
242 cmp $0, %ecx
243 jge L(shr_2_gobble_next)
244 inc %edx
245 add $32, %ecx
246L(shr_2_gobble_next):
247 test %edx, %edx
248 jnz L(exit)
249
250 pmovmskb %xmm3, %edx
251 movdqa %xmm0, %xmm1
252 lea 32(%edi), %edi
253 lea 32(%esi), %esi
254 sub $0xffff, %edx
255 jnz L(exit)
256
257 lea (%ecx, %edi,1), %eax
258 lea 2(%ecx, %esi,1), %edx
259 POP (%edi)
260 POP (%esi)
261 jmp L(less48bytes)
262
263 CFI_RESTORE_STATE
264 CFI_REMEMBER_STATE
265 .p2align 4
266L(shr_4):
267 cmp $80, %ecx
268 lea -48(%ecx), %ecx
269 mov %edx, %eax
270 jae L(shr_4_gobble)
271
272 movdqa 16(%esi), %xmm1
273 movdqa %xmm1, %xmm2
274 palignr $4,(%esi), %xmm1
275 pcmpeqb (%edi), %xmm1
276
277 movdqa 32(%esi), %xmm3
278 palignr $4,%xmm2, %xmm3
279 pcmpeqb 16(%edi), %xmm3
280
281 pand %xmm1, %xmm3
282 pmovmskb %xmm3, %edx
283 lea 32(%edi), %edi
284 lea 32(%esi), %esi
285 sub $0xffff, %edx
286 jnz L(exit)
287 lea (%ecx, %edi,1), %eax
288 lea 4(%ecx, %esi,1), %edx
289 POP (%edi)
290 POP (%esi)
291 jmp L(less48bytes)
292
293 CFI_RESTORE_STATE
294 CFI_REMEMBER_STATE
295 .p2align 4
296L(shr_4_gobble):
297 sub $32, %ecx
298 movdqa 16(%esi), %xmm0
299 palignr $4,(%esi), %xmm0
300 pcmpeqb (%edi), %xmm0
301
302 movdqa 32(%esi), %xmm3
303 palignr $4,16(%esi), %xmm3
304 pcmpeqb 16(%edi), %xmm3
305
306L(shr_4_gobble_loop):
307 pand %xmm0, %xmm3
308 sub $32, %ecx
309 pmovmskb %xmm3, %edx
310 movdqa %xmm0, %xmm1
311
312 movdqa 64(%esi), %xmm3
313 palignr $4,48(%esi), %xmm3
314 sbb $0xffff, %edx
315 movdqa 48(%esi), %xmm0
316 palignr $4,32(%esi), %xmm0
317 pcmpeqb 32(%edi), %xmm0
318 lea 32(%esi), %esi
319 pcmpeqb 48(%edi), %xmm3
320
321 lea 32(%edi), %edi
322 jz L(shr_4_gobble_loop)
323 pand %xmm0, %xmm3
324
325 cmp $0, %ecx
326 jge L(shr_4_gobble_next)
327 inc %edx
328 add $32, %ecx
329L(shr_4_gobble_next):
330 test %edx, %edx
331 jnz L(exit)
332
333 pmovmskb %xmm3, %edx
334 movdqa %xmm0, %xmm1
335 lea 32(%edi), %edi
336 lea 32(%esi), %esi
337 sub $0xffff, %edx
338 jnz L(exit)
339
340 lea (%ecx, %edi,1), %eax
341 lea 4(%ecx, %esi,1), %edx
342 POP (%edi)
343 POP (%esi)
344 jmp L(less48bytes)
345
346 CFI_RESTORE_STATE
347 CFI_REMEMBER_STATE
348 .p2align 4
349L(shr_6):
350 cmp $80, %ecx
351 lea -48(%ecx), %ecx
352 mov %edx, %eax
353 jae L(shr_6_gobble)
354
355 movdqa 16(%esi), %xmm1
356 movdqa %xmm1, %xmm2
357 palignr $6,(%esi), %xmm1
358 pcmpeqb (%edi), %xmm1
359
360 movdqa 32(%esi), %xmm3
361 palignr $6,%xmm2, %xmm3
362 pcmpeqb 16(%edi), %xmm3
363
364 pand %xmm1, %xmm3
365 pmovmskb %xmm3, %edx
366 lea 32(%edi), %edi
367 lea 32(%esi), %esi
368 sub $0xffff, %edx
369 jnz L(exit)
370 lea (%ecx, %edi,1), %eax
371 lea 6(%ecx, %esi,1), %edx
372 POP (%edi)
373 POP (%esi)
374 jmp L(less48bytes)
375
376 CFI_RESTORE_STATE
377 CFI_REMEMBER_STATE
378 .p2align 4
379L(shr_6_gobble):
380 sub $32, %ecx
381 movdqa 16(%esi), %xmm0
382 palignr $6,(%esi), %xmm0
383 pcmpeqb (%edi), %xmm0
384
385 movdqa 32(%esi), %xmm3
386 palignr $6,16(%esi), %xmm3
387 pcmpeqb 16(%edi), %xmm3
388
389L(shr_6_gobble_loop):
390 pand %xmm0, %xmm3
391 sub $32, %ecx
392 pmovmskb %xmm3, %edx
393 movdqa %xmm0, %xmm1
394
395 movdqa 64(%esi), %xmm3
396 palignr $6,48(%esi), %xmm3
397 sbb $0xffff, %edx
398 movdqa 48(%esi), %xmm0
399 palignr $6,32(%esi), %xmm0
400 pcmpeqb 32(%edi), %xmm0
401 lea 32(%esi), %esi
402 pcmpeqb 48(%edi), %xmm3
403
404 lea 32(%edi), %edi
405 jz L(shr_6_gobble_loop)
406 pand %xmm0, %xmm3
407
408 cmp $0, %ecx
409 jge L(shr_6_gobble_next)
410 inc %edx
411 add $32, %ecx
412L(shr_6_gobble_next):
413 test %edx, %edx
414 jnz L(exit)
415
416 pmovmskb %xmm3, %edx
417 movdqa %xmm0, %xmm1
418 lea 32(%edi), %edi
419 lea 32(%esi), %esi
420 sub $0xffff, %edx
421 jnz L(exit)
422
423 lea (%ecx, %edi,1), %eax
424 lea 6(%ecx, %esi,1), %edx
425 POP (%edi)
426 POP (%esi)
427 jmp L(less48bytes)
428
429 CFI_RESTORE_STATE
430 CFI_REMEMBER_STATE
431 .p2align 4
432L(shr_8):
433 cmp $80, %ecx
434 lea -48(%ecx), %ecx
435 mov %edx, %eax
436 jae L(shr_8_gobble)
437
438 movdqa 16(%esi), %xmm1
439 movdqa %xmm1, %xmm2
440 palignr $8,(%esi), %xmm1
441 pcmpeqb (%edi), %xmm1
442
443 movdqa 32(%esi), %xmm3
444 palignr $8,%xmm2, %xmm3
445 pcmpeqb 16(%edi), %xmm3
446
447 pand %xmm1, %xmm3
448 pmovmskb %xmm3, %edx
449 lea 32(%edi), %edi
450 lea 32(%esi), %esi
451 sub $0xffff, %edx
452 jnz L(exit)
453 lea (%ecx, %edi,1), %eax
454 lea 8(%ecx, %esi,1), %edx
455 POP (%edi)
456 POP (%esi)
457 jmp L(less48bytes)
458
459 CFI_RESTORE_STATE
460 CFI_REMEMBER_STATE
461 .p2align 4
462L(shr_8_gobble):
463 sub $32, %ecx
464 movdqa 16(%esi), %xmm0
465 palignr $8,(%esi), %xmm0
466 pcmpeqb (%edi), %xmm0
467
468 movdqa 32(%esi), %xmm3
469 palignr $8,16(%esi), %xmm3
470 pcmpeqb 16(%edi), %xmm3
471
472L(shr_8_gobble_loop):
473 pand %xmm0, %xmm3
474 sub $32, %ecx
475 pmovmskb %xmm3, %edx
476 movdqa %xmm0, %xmm1
477
478 movdqa 64(%esi), %xmm3
479 palignr $8,48(%esi), %xmm3
480 sbb $0xffff, %edx
481 movdqa 48(%esi), %xmm0
482 palignr $8,32(%esi), %xmm0
483 pcmpeqb 32(%edi), %xmm0
484 lea 32(%esi), %esi
485 pcmpeqb 48(%edi), %xmm3
486
487 lea 32(%edi), %edi
488 jz L(shr_8_gobble_loop)
489 pand %xmm0, %xmm3
490
491 cmp $0, %ecx
492 jge L(shr_8_gobble_next)
493 inc %edx
494 add $32, %ecx
495L(shr_8_gobble_next):
496 test %edx, %edx
497 jnz L(exit)
498
499 pmovmskb %xmm3, %edx
500 movdqa %xmm0, %xmm1
501 lea 32(%edi), %edi
502 lea 32(%esi), %esi
503 sub $0xffff, %edx
504 jnz L(exit)
505
506 lea (%ecx, %edi,1), %eax
507 lea 8(%ecx, %esi,1), %edx
508 POP (%edi)
509 POP (%esi)
510 jmp L(less48bytes)
511
512 CFI_RESTORE_STATE
513 CFI_REMEMBER_STATE
514 .p2align 4
515L(shr_10):
516 cmp $80, %ecx
517 lea -48(%ecx), %ecx
518 mov %edx, %eax
519 jae L(shr_10_gobble)
520
521 movdqa 16(%esi), %xmm1
522 movdqa %xmm1, %xmm2
523 palignr $10, (%esi), %xmm1
524 pcmpeqb (%edi), %xmm1
525
526 movdqa 32(%esi), %xmm3
527 palignr $10,%xmm2, %xmm3
528 pcmpeqb 16(%edi), %xmm3
529
530 pand %xmm1, %xmm3
531 pmovmskb %xmm3, %edx
532 lea 32(%edi), %edi
533 lea 32(%esi), %esi
534 sub $0xffff, %edx
535 jnz L(exit)
536 lea (%ecx, %edi,1), %eax
537 lea 10(%ecx, %esi,1), %edx
538 POP (%edi)
539 POP (%esi)
540 jmp L(less48bytes)
541
542 CFI_RESTORE_STATE
543 CFI_REMEMBER_STATE
544 .p2align 4
545L(shr_10_gobble):
546 sub $32, %ecx
547 movdqa 16(%esi), %xmm0
548 palignr $10, (%esi), %xmm0
549 pcmpeqb (%edi), %xmm0
550
551 movdqa 32(%esi), %xmm3
552 palignr $10, 16(%esi), %xmm3
553 pcmpeqb 16(%edi), %xmm3
554
555L(shr_10_gobble_loop):
556 pand %xmm0, %xmm3
557 sub $32, %ecx
558 pmovmskb %xmm3, %edx
559 movdqa %xmm0, %xmm1
560
561 movdqa 64(%esi), %xmm3
562 palignr $10,48(%esi), %xmm3
563 sbb $0xffff, %edx
564 movdqa 48(%esi), %xmm0
565 palignr $10,32(%esi), %xmm0
566 pcmpeqb 32(%edi), %xmm0
567 lea 32(%esi), %esi
568 pcmpeqb 48(%edi), %xmm3
569
570 lea 32(%edi), %edi
571 jz L(shr_10_gobble_loop)
572 pand %xmm0, %xmm3
573
574 cmp $0, %ecx
575 jge L(shr_10_gobble_next)
576 inc %edx
577 add $32, %ecx
578L(shr_10_gobble_next):
579 test %edx, %edx
580 jnz L(exit)
581
582 pmovmskb %xmm3, %edx
583 movdqa %xmm0, %xmm1
584 lea 32(%edi), %edi
585 lea 32(%esi), %esi
586 sub $0xffff, %edx
587 jnz L(exit)
588
589 lea (%ecx, %edi,1), %eax
590 lea 10(%ecx, %esi,1), %edx
591 POP (%edi)
592 POP (%esi)
593 jmp L(less48bytes)
594
595 CFI_RESTORE_STATE
596 CFI_REMEMBER_STATE
597 .p2align 4
598L(shr_12):
599 cmp $80, %ecx
600 lea -48(%ecx), %ecx
601 mov %edx, %eax
602 jae L(shr_12_gobble)
603
604 movdqa 16(%esi), %xmm1
605 movdqa %xmm1, %xmm2
606 palignr $12, (%esi), %xmm1
607 pcmpeqb (%edi), %xmm1
608
609 movdqa 32(%esi), %xmm3
610 palignr $12, %xmm2, %xmm3
611 pcmpeqb 16(%edi), %xmm3
612
613 pand %xmm1, %xmm3
614 pmovmskb %xmm3, %edx
615 lea 32(%edi), %edi
616 lea 32(%esi), %esi
617 sub $0xffff, %edx
618 jnz L(exit)
619 lea (%ecx, %edi,1), %eax
620 lea 12(%ecx, %esi,1), %edx
621 POP (%edi)
622 POP (%esi)
623 jmp L(less48bytes)
624
625 CFI_RESTORE_STATE
626 CFI_REMEMBER_STATE
627 .p2align 4
628L(shr_12_gobble):
629 sub $32, %ecx
630 movdqa 16(%esi), %xmm0
631 palignr $12, (%esi), %xmm0
632 pcmpeqb (%edi), %xmm0
633
634 movdqa 32(%esi), %xmm3
635 palignr $12, 16(%esi), %xmm3
636 pcmpeqb 16(%edi), %xmm3
637
638L(shr_12_gobble_loop):
639 pand %xmm0, %xmm3
640 sub $32, %ecx
641 pmovmskb %xmm3, %edx
642 movdqa %xmm0, %xmm1
643
644 movdqa 64(%esi), %xmm3
645 palignr $12,48(%esi), %xmm3
646 sbb $0xffff, %edx
647 movdqa 48(%esi), %xmm0
648 palignr $12,32(%esi), %xmm0
649 pcmpeqb 32(%edi), %xmm0
650 lea 32(%esi), %esi
651 pcmpeqb 48(%edi), %xmm3
652
653 lea 32(%edi), %edi
654 jz L(shr_12_gobble_loop)
655 pand %xmm0, %xmm3
656
657 cmp $0, %ecx
658 jge L(shr_12_gobble_next)
659 inc %edx
660 add $32, %ecx
661L(shr_12_gobble_next):
662 test %edx, %edx
663 jnz L(exit)
664
665 pmovmskb %xmm3, %edx
666 movdqa %xmm0, %xmm1
667 lea 32(%edi), %edi
668 lea 32(%esi), %esi
669 sub $0xffff, %edx
670 jnz L(exit)
671
672 lea (%ecx, %edi,1), %eax
673 lea 12(%ecx, %esi,1), %edx
674 POP (%edi)
675 POP (%esi)
676 jmp L(less48bytes)
677
678 CFI_RESTORE_STATE
679 CFI_REMEMBER_STATE
680 .p2align 4
681L(shr_14):
682 cmp $80, %ecx
683 lea -48(%ecx), %ecx
684 mov %edx, %eax
685 jae L(shr_14_gobble)
686
687 movdqa 16(%esi), %xmm1
688 movdqa %xmm1, %xmm2
689 palignr $14, (%esi), %xmm1
690 pcmpeqb (%edi), %xmm1
691
692 movdqa 32(%esi), %xmm3
693 palignr $14, %xmm2, %xmm3
694 pcmpeqb 16(%edi), %xmm3
695
696 pand %xmm1, %xmm3
697 pmovmskb %xmm3, %edx
698 lea 32(%edi), %edi
699 lea 32(%esi), %esi
700 sub $0xffff, %edx
701 jnz L(exit)
702 lea (%ecx, %edi,1), %eax
703 lea 14(%ecx, %esi,1), %edx
704 POP (%edi)
705 POP (%esi)
706 jmp L(less48bytes)
707
708 CFI_RESTORE_STATE
709 CFI_REMEMBER_STATE
710 .p2align 4
711L(shr_14_gobble):
712 sub $32, %ecx
713 movdqa 16(%esi), %xmm0
714 palignr $14, (%esi), %xmm0
715 pcmpeqb (%edi), %xmm0
716
717 movdqa 32(%esi), %xmm3
718 palignr $14, 16(%esi), %xmm3
719 pcmpeqb 16(%edi), %xmm3
720
721L(shr_14_gobble_loop):
722 pand %xmm0, %xmm3
723 sub $32, %ecx
724 pmovmskb %xmm3, %edx
725 movdqa %xmm0, %xmm1
726
727 movdqa 64(%esi), %xmm3
728 palignr $14,48(%esi), %xmm3
729 sbb $0xffff, %edx
730 movdqa 48(%esi), %xmm0
731 palignr $14,32(%esi), %xmm0
732 pcmpeqb 32(%edi), %xmm0
733 lea 32(%esi), %esi
734 pcmpeqb 48(%edi), %xmm3
735
736 lea 32(%edi), %edi
737 jz L(shr_14_gobble_loop)
738 pand %xmm0, %xmm3
739
740 cmp $0, %ecx
741 jge L(shr_14_gobble_next)
742 inc %edx
743 add $32, %ecx
744L(shr_14_gobble_next):
745 test %edx, %edx
746 jnz L(exit)
747
748 pmovmskb %xmm3, %edx
749 movdqa %xmm0, %xmm1
750 lea 32(%edi), %edi
751 lea 32(%esi), %esi
752 sub $0xffff, %edx
753 jnz L(exit)
754
755 lea (%ecx, %edi,1), %eax
756 lea 14(%ecx, %esi,1), %edx
757 POP (%edi)
758 POP (%esi)
759 jmp L(less48bytes)
760
761 CFI_RESTORE_STATE
762 CFI_REMEMBER_STATE
763 .p2align 4
764L(exit):
765 pmovmskb %xmm1, %ebx
766 sub $0xffff, %ebx
767 jz L(first16bytes)
768 lea -16(%esi), %esi
769 lea -16(%edi), %edi
770 mov %ebx, %edx
771
772L(first16bytes):
773 add %eax, %esi
774L(less16bytes):
775 test %dl, %dl
776 jz L(next_four_words)
777 test $15, %dl
778 jz L(second_two_words)
779 test $3, %dl
780 jz L(second_word)
781 movzwl -16(%edi), %eax
782 movzwl -16(%esi), %ebx
783 subl %ebx, %eax
784 RETURN
785
786 .p2align 4
787L(second_word):
788 movzwl -14(%edi), %eax
789 movzwl -14(%esi), %ebx
790 subl %ebx, %eax
791 RETURN
792
793 .p2align 4
794L(second_two_words):
795 test $63, %dl
796 jz L(fourth_word)
797 movzwl -12(%edi), %eax
798 movzwl -12(%esi), %ebx
799 subl %ebx, %eax
800 RETURN
801
802 .p2align 4
803L(fourth_word):
804 movzwl -10(%edi), %eax
805 movzwl -10(%esi), %ebx
806 subl %ebx, %eax
807 RETURN
808
809 .p2align 4
810L(next_four_words):
811 test $15, %dh
812 jz L(fourth_two_words)
813 test $3, %dh
814 jz L(sixth_word)
815 movzwl -8(%edi), %eax
816 movzwl -8(%esi), %ebx
817 subl %ebx, %eax
818 RETURN
819
820 .p2align 4
821L(sixth_word):
822 movzwl -6(%edi), %eax
823 movzwl -6(%esi), %ebx
824 subl %ebx, %eax
825 RETURN
826
827 .p2align 4
828L(fourth_two_words):
829 test $63, %dh
830 jz L(eighth_word)
831 movzwl -4(%edi), %eax
832 movzwl -4(%esi), %ebx
833 subl %ebx, %eax
834 RETURN
835
836 .p2align 4
837L(eighth_word):
838 movzwl -2(%edi), %eax
839 movzwl -2(%esi), %ebx
840 subl %ebx, %eax
841 RETURN
842
843
844 CFI_PUSH (%ebx)
845
846 .p2align 4
847L(more8bytes):
848 cmp $16, %ecx
849 jae L(more16bytes)
850 cmp $8, %ecx
851 je L(8bytes)
852 cmp $10, %ecx
853 je L(10bytes)
854 cmp $12, %ecx
855 je L(12bytes)
856 jmp L(14bytes)
857
858 .p2align 4
859L(more16bytes):
860 cmp $24, %ecx
861 jae L(more24bytes)
862 cmp $16, %ecx
863 je L(16bytes)
864 cmp $18, %ecx
865 je L(18bytes)
866 cmp $20, %ecx
867 je L(20bytes)
868 jmp L(22bytes)
869
870 .p2align 4
871L(more24bytes):
872 cmp $32, %ecx
873 jae L(more32bytes)
874 cmp $24, %ecx
875 je L(24bytes)
876 cmp $26, %ecx
877 je L(26bytes)
878 cmp $28, %ecx
879 je L(28bytes)
880 jmp L(30bytes)
881
882 .p2align 4
883L(more32bytes):
884 cmp $40, %ecx
885 jae L(more40bytes)
886 cmp $32, %ecx
887 je L(32bytes)
888 cmp $34, %ecx
889 je L(34bytes)
890 cmp $36, %ecx
891 je L(36bytes)
892 jmp L(38bytes)
893
894 .p2align 4
895L(less48bytes):
896 cmp $8, %ecx
897 jae L(more8bytes)
898 cmp $2, %ecx
899 je L(2bytes)
900 cmp $4, %ecx
901 je L(4bytes)
902 jmp L(6bytes)
903
904 .p2align 4
905L(more40bytes):
906 cmp $40, %ecx
907 je L(40bytes)
908 cmp $42, %ecx
909 je L(42bytes)
910 cmp $44, %ecx
911 je L(44bytes)
912 jmp L(46bytes)
913
914 .p2align 4
915L(46bytes):
916 movzwl -46(%eax), %ecx
917 movzwl -46(%edx), %ebx
918 subl %ebx, %ecx
919 jne L(memcmp16_exit)
920L(44bytes):
921 movzwl -44(%eax), %ecx
922 movzwl -44(%edx), %ebx
923 subl %ebx, %ecx
924 jne L(memcmp16_exit)
925L(42bytes):
926 movzwl -42(%eax), %ecx
927 movzwl -42(%edx), %ebx
928 subl %ebx, %ecx
929 jne L(memcmp16_exit)
930L(40bytes):
931 movzwl -40(%eax), %ecx
932 movzwl -40(%edx), %ebx
933 subl %ebx, %ecx
934 jne L(memcmp16_exit)
935L(38bytes):
936 movzwl -38(%eax), %ecx
937 movzwl -38(%edx), %ebx
938 subl %ebx, %ecx
939 jne L(memcmp16_exit)
940L(36bytes):
941 movzwl -36(%eax), %ecx
942 movzwl -36(%edx), %ebx
943 subl %ebx, %ecx
944 jne L(memcmp16_exit)
945L(34bytes):
946 movzwl -34(%eax), %ecx
947 movzwl -34(%edx), %ebx
948 subl %ebx, %ecx
949 jne L(memcmp16_exit)
950L(32bytes):
951 movzwl -32(%eax), %ecx
952 movzwl -32(%edx), %ebx
953 subl %ebx, %ecx
954 jne L(memcmp16_exit)
955L(30bytes):
956 movzwl -30(%eax), %ecx
957 movzwl -30(%edx), %ebx
958 subl %ebx, %ecx
959 jne L(memcmp16_exit)
960L(28bytes):
961 movzwl -28(%eax), %ecx
962 movzwl -28(%edx), %ebx
963 subl %ebx, %ecx
964 jne L(memcmp16_exit)
965L(26bytes):
966 movzwl -26(%eax), %ecx
967 movzwl -26(%edx), %ebx
968 subl %ebx, %ecx
969 jne L(memcmp16_exit)
970L(24bytes):
971 movzwl -24(%eax), %ecx
972 movzwl -24(%edx), %ebx
973 subl %ebx, %ecx
974 jne L(memcmp16_exit)
975L(22bytes):
976 movzwl -22(%eax), %ecx
977 movzwl -22(%edx), %ebx
978 subl %ebx, %ecx
979 jne L(memcmp16_exit)
980L(20bytes):
981 movzwl -20(%eax), %ecx
982 movzwl -20(%edx), %ebx
983 subl %ebx, %ecx
984 jne L(memcmp16_exit)
985L(18bytes):
986 movzwl -18(%eax), %ecx
987 movzwl -18(%edx), %ebx
988 subl %ebx, %ecx
989 jne L(memcmp16_exit)
990L(16bytes):
991 movzwl -16(%eax), %ecx
992 movzwl -16(%edx), %ebx
993 subl %ebx, %ecx
994 jne L(memcmp16_exit)
995L(14bytes):
996 movzwl -14(%eax), %ecx
997 movzwl -14(%edx), %ebx
998 subl %ebx, %ecx
999 jne L(memcmp16_exit)
1000L(12bytes):
1001 movzwl -12(%eax), %ecx
1002 movzwl -12(%edx), %ebx
1003 subl %ebx, %ecx
1004 jne L(memcmp16_exit)
1005L(10bytes):
1006 movzwl -10(%eax), %ecx
1007 movzwl -10(%edx), %ebx
1008 subl %ebx, %ecx
1009 jne L(memcmp16_exit)
1010L(8bytes):
1011 movzwl -8(%eax), %ecx
1012 movzwl -8(%edx), %ebx
1013 subl %ebx, %ecx
1014 jne L(memcmp16_exit)
1015L(6bytes):
1016 movzwl -6(%eax), %ecx
1017 movzwl -6(%edx), %ebx
1018 subl %ebx, %ecx
1019 jne L(memcmp16_exit)
1020L(4bytes):
1021 movzwl -4(%eax), %ecx
1022 movzwl -4(%edx), %ebx
1023 subl %ebx, %ecx
1024 jne L(memcmp16_exit)
1025L(2bytes):
1026 movzwl -2(%eax), %eax
1027 movzwl -2(%edx), %ebx
1028 subl %ebx, %eax
1029 POP (%ebx)
1030 ret
1031 CFI_PUSH (%ebx)
1032
1033 .p2align 4
1034L(memcmp16_exit):
1035 POP (%ebx)
1036 mov %ecx, %eax
1037 ret
1038END_FUNCTION MEMCMP