blob: 8867d28ae0cbc3fad958a5ef03f0d7cdf7cc61b0 [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60name: \
61 cfi_startproc
62#endif
63
64#ifndef END
65# define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68#endif
69
70#define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#define ENTRANCE PUSH(%esi); PUSH(%edi)
82#define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
83#define PARMS 4
84#define STR1 PARMS
85#define STR2 STR1+4
86
87 .text
88ENTRY (wcscmp)
89/*
90 * This implementation uses SSE to compare up to 16 bytes at a time.
91*/
92 mov STR1(%esp), %edx
93 mov STR2(%esp), %eax
94
95 mov (%eax), %ecx
96 cmp %ecx, (%edx)
97 jne L(neq)
98 test %ecx, %ecx
99 jz L(eq)
100
101 mov 4(%eax), %ecx
102 cmp %ecx, 4(%edx)
103 jne L(neq)
104 test %ecx, %ecx
105 jz L(eq)
106
107 mov 8(%eax), %ecx
108 cmp %ecx, 8(%edx)
109 jne L(neq)
110 test %ecx, %ecx
111 jz L(eq)
112
113 mov 12(%eax), %ecx
114 cmp %ecx, 12(%edx)
115 jne L(neq)
116 test %ecx, %ecx
117 jz L(eq)
118
119 ENTRANCE
120 add $16, %eax
121 add $16, %edx
122
123 mov %eax, %esi
124 mov %edx, %edi
125 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
126 mov %al, %ch
127 mov %dl, %cl
128 and $63, %eax /* esi alignment in cache line */
129 and $63, %edx /* edi alignment in cache line */
130 and $15, %cl
131 jz L(continue_00)
132 cmp $16, %edx
133 jb L(continue_0)
134 cmp $32, %edx
135 jb L(continue_16)
136 cmp $48, %edx
137 jb L(continue_32)
138
139L(continue_48):
140 and $15, %ch
141 jz L(continue_48_00)
142 cmp $16, %eax
143 jb L(continue_0_48)
144 cmp $32, %eax
145 jb L(continue_16_48)
146 cmp $48, %eax
147 jb L(continue_32_48)
148
149 .p2align 4
150L(continue_48_48):
151 mov (%esi), %ecx
152 cmp %ecx, (%edi)
153 jne L(nequal)
154 test %ecx, %ecx
155 jz L(equal)
156
157 mov 4(%esi), %ecx
158 cmp %ecx, 4(%edi)
159 jne L(nequal)
160 test %ecx, %ecx
161 jz L(equal)
162
163 mov 8(%esi), %ecx
164 cmp %ecx, 8(%edi)
165 jne L(nequal)
166 test %ecx, %ecx
167 jz L(equal)
168
169 mov 12(%esi), %ecx
170 cmp %ecx, 12(%edi)
171 jne L(nequal)
172 test %ecx, %ecx
173 jz L(equal)
174
175 movdqu 16(%edi), %xmm1
176 movdqu 16(%esi), %xmm2
177 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
178 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
179 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
180 pmovmskb %xmm1, %edx
181 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
182 jnz L(less4_double_words_16)
183
184 movdqu 32(%edi), %xmm1
185 movdqu 32(%esi), %xmm2
186 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
187 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
188 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
189 pmovmskb %xmm1, %edx
190 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
191 jnz L(less4_double_words_32)
192
193 movdqu 48(%edi), %xmm1
194 movdqu 48(%esi), %xmm2
195 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
196 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
197 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
198 pmovmskb %xmm1, %edx
199 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
200 jnz L(less4_double_words_48)
201
202 add $64, %esi
203 add $64, %edi
204 jmp L(continue_48_48)
205
206L(continue_0):
207 and $15, %ch
208 jz L(continue_0_00)
209 cmp $16, %eax
210 jb L(continue_0_0)
211 cmp $32, %eax
212 jb L(continue_0_16)
213 cmp $48, %eax
214 jb L(continue_0_32)
215
216 .p2align 4
217L(continue_0_48):
218 mov (%esi), %ecx
219 cmp %ecx, (%edi)
220 jne L(nequal)
221 test %ecx, %ecx
222 jz L(equal)
223
224 mov 4(%esi), %ecx
225 cmp %ecx, 4(%edi)
226 jne L(nequal)
227 test %ecx, %ecx
228 jz L(equal)
229
230 mov 8(%esi), %ecx
231 cmp %ecx, 8(%edi)
232 jne L(nequal)
233 test %ecx, %ecx
234 jz L(equal)
235
236 mov 12(%esi), %ecx
237 cmp %ecx, 12(%edi)
238 jne L(nequal)
239 test %ecx, %ecx
240 jz L(equal)
241
242 movdqu 16(%edi), %xmm1
243 movdqu 16(%esi), %xmm2
244 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
245 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
246 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
247 pmovmskb %xmm1, %edx
248 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
249 jnz L(less4_double_words_16)
250
251 movdqu 32(%edi), %xmm1
252 movdqu 32(%esi), %xmm2
253 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
254 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
255 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
256 pmovmskb %xmm1, %edx
257 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
258 jnz L(less4_double_words_32)
259
260 mov 48(%esi), %ecx
261 cmp %ecx, 48(%edi)
262 jne L(nequal)
263 test %ecx, %ecx
264 jz L(equal)
265
266 mov 52(%esi), %ecx
267 cmp %ecx, 52(%edi)
268 jne L(nequal)
269 test %ecx, %ecx
270 jz L(equal)
271
272 mov 56(%esi), %ecx
273 cmp %ecx, 56(%edi)
274 jne L(nequal)
275 test %ecx, %ecx
276 jz L(equal)
277
278 mov 60(%esi), %ecx
279 cmp %ecx, 60(%edi)
280 jne L(nequal)
281 test %ecx, %ecx
282 jz L(equal)
283
284 add $64, %esi
285 add $64, %edi
286 jmp L(continue_0_48)
287
288 .p2align 4
289L(continue_00):
290 and $15, %ch
291 jz L(continue_00_00)
292 cmp $16, %eax
293 jb L(continue_00_0)
294 cmp $32, %eax
295 jb L(continue_00_16)
296 cmp $48, %eax
297 jb L(continue_00_32)
298
299 .p2align 4
300L(continue_00_48):
301 pcmpeqd (%edi), %xmm0
302 mov (%edi), %eax
303 pmovmskb %xmm0, %ecx
304 test %ecx, %ecx
305 jnz L(less4_double_words1)
306
307 cmp (%esi), %eax
308 jne L(nequal)
309
310 mov 4(%edi), %eax
311 cmp 4(%esi), %eax
312 jne L(nequal)
313
314 mov 8(%edi), %eax
315 cmp 8(%esi), %eax
316 jne L(nequal)
317
318 mov 12(%edi), %eax
319 cmp 12(%esi), %eax
320 jne L(nequal)
321
322 movdqu 16(%esi), %xmm2
323 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
324 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
325 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
326 pmovmskb %xmm2, %edx
327 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
328 jnz L(less4_double_words_16)
329
330 movdqu 32(%esi), %xmm2
331 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
332 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
333 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
334 pmovmskb %xmm2, %edx
335 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
336 jnz L(less4_double_words_32)
337
338 movdqu 48(%esi), %xmm2
339 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
340 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
341 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
342 pmovmskb %xmm2, %edx
343 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
344 jnz L(less4_double_words_48)
345
346 add $64, %esi
347 add $64, %edi
348 jmp L(continue_00_48)
349
350 .p2align 4
351L(continue_32):
352 and $15, %ch
353 jz L(continue_32_00)
354 cmp $16, %eax
355 jb L(continue_0_32)
356 cmp $32, %eax
357 jb L(continue_16_32)
358 cmp $48, %eax
359 jb L(continue_32_32)
360
361 .p2align 4
362L(continue_32_48):
363 mov (%esi), %ecx
364 cmp %ecx, (%edi)
365 jne L(nequal)
366 test %ecx, %ecx
367 jz L(equal)
368
369 mov 4(%esi), %ecx
370 cmp %ecx, 4(%edi)
371 jne L(nequal)
372 test %ecx, %ecx
373 jz L(equal)
374
375 mov 8(%esi), %ecx
376 cmp %ecx, 8(%edi)
377 jne L(nequal)
378 test %ecx, %ecx
379 jz L(equal)
380
381 mov 12(%esi), %ecx
382 cmp %ecx, 12(%edi)
383 jne L(nequal)
384 test %ecx, %ecx
385 jz L(equal)
386
387 mov 16(%esi), %ecx
388 cmp %ecx, 16(%edi)
389 jne L(nequal)
390 test %ecx, %ecx
391 jz L(equal)
392
393 mov 20(%esi), %ecx
394 cmp %ecx, 20(%edi)
395 jne L(nequal)
396 test %ecx, %ecx
397 jz L(equal)
398
399 mov 24(%esi), %ecx
400 cmp %ecx, 24(%edi)
401 jne L(nequal)
402 test %ecx, %ecx
403 jz L(equal)
404
405 mov 28(%esi), %ecx
406 cmp %ecx, 28(%edi)
407 jne L(nequal)
408 test %ecx, %ecx
409 jz L(equal)
410
411 movdqu 32(%edi), %xmm1
412 movdqu 32(%esi), %xmm2
413 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
414 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
415 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
416 pmovmskb %xmm1, %edx
417 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
418 jnz L(less4_double_words_32)
419
420 movdqu 48(%edi), %xmm1
421 movdqu 48(%esi), %xmm2
422 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
423 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
424 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
425 pmovmskb %xmm1, %edx
426 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
427 jnz L(less4_double_words_48)
428
429 add $64, %esi
430 add $64, %edi
431 jmp L(continue_32_48)
432
433 .p2align 4
434L(continue_16):
435 and $15, %ch
436 jz L(continue_16_00)
437 cmp $16, %eax
438 jb L(continue_0_16)
439 cmp $32, %eax
440 jb L(continue_16_16)
441 cmp $48, %eax
442 jb L(continue_16_32)
443
444 .p2align 4
445L(continue_16_48):
446 mov (%esi), %ecx
447 cmp %ecx, (%edi)
448 jne L(nequal)
449 test %ecx, %ecx
450 jz L(equal)
451
452 mov 4(%esi), %ecx
453 cmp %ecx, 4(%edi)
454 jne L(nequal)
455 test %ecx, %ecx
456 jz L(equal)
457
458 mov 8(%esi), %ecx
459 cmp %ecx, 8(%edi)
460 jne L(nequal)
461 test %ecx, %ecx
462 jz L(equal)
463
464 mov 12(%esi), %ecx
465 cmp %ecx, 12(%edi)
466 jne L(nequal)
467 test %ecx, %ecx
468 jz L(equal)
469
470 movdqu 16(%edi), %xmm1
471 movdqu 16(%esi), %xmm2
472 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
473 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
474 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
475 pmovmskb %xmm1, %edx
476 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
477 jnz L(less4_double_words_16)
478
479 mov 32(%esi), %ecx
480 cmp %ecx, 32(%edi)
481 jne L(nequal)
482 test %ecx, %ecx
483 jz L(equal)
484
485 mov 36(%esi), %ecx
486 cmp %ecx, 36(%edi)
487 jne L(nequal)
488 test %ecx, %ecx
489 jz L(equal)
490
491 mov 40(%esi), %ecx
492 cmp %ecx, 40(%edi)
493 jne L(nequal)
494 test %ecx, %ecx
495 jz L(equal)
496
497 mov 44(%esi), %ecx
498 cmp %ecx, 44(%edi)
499 jne L(nequal)
500 test %ecx, %ecx
501 jz L(equal)
502
503 movdqu 48(%edi), %xmm1
504 movdqu 48(%esi), %xmm2
505 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
506 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
507 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
508 pmovmskb %xmm1, %edx
509 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
510 jnz L(less4_double_words_48)
511
512 add $64, %esi
513 add $64, %edi
514 jmp L(continue_16_48)
515
516 .p2align 4
517L(continue_00_00):
518 movdqa (%edi), %xmm1
519 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
520 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
521 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
522 pmovmskb %xmm1, %edx
523 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
524 jnz L(less4_double_words)
525
526 movdqa 16(%edi), %xmm3
527 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
528 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
529 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
530 pmovmskb %xmm3, %edx
531 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
532 jnz L(less4_double_words_16)
533
534 movdqa 32(%edi), %xmm5
535 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
536 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
537 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
538 pmovmskb %xmm5, %edx
539 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
540 jnz L(less4_double_words_32)
541
542 movdqa 48(%edi), %xmm1
543 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
544 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
545 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
546 pmovmskb %xmm1, %edx
547 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
548 jnz L(less4_double_words_48)
549
550 add $64, %esi
551 add $64, %edi
552 jmp L(continue_00_00)
553
554 .p2align 4
555L(continue_00_32):
556 movdqu (%esi), %xmm2
557 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
558 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
559 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
560 pmovmskb %xmm2, %edx
561 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
562 jnz L(less4_double_words)
563
564 add $16, %esi
565 add $16, %edi
566 jmp L(continue_00_48)
567
568 .p2align 4
569L(continue_00_16):
570 movdqu (%esi), %xmm2
571 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
572 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
573 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
574 pmovmskb %xmm2, %edx
575 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
576 jnz L(less4_double_words)
577
578 movdqu 16(%esi), %xmm2
579 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
580 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
581 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
582 pmovmskb %xmm2, %edx
583 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
584 jnz L(less4_double_words_16)
585
586 add $32, %esi
587 add $32, %edi
588 jmp L(continue_00_48)
589
590 .p2align 4
591L(continue_00_0):
592 movdqu (%esi), %xmm2
593 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
594 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
595 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
596 pmovmskb %xmm2, %edx
597 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
598 jnz L(less4_double_words)
599
600 movdqu 16(%esi), %xmm2
601 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
602 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
603 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
604 pmovmskb %xmm2, %edx
605 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
606 jnz L(less4_double_words_16)
607
608 movdqu 32(%esi), %xmm2
609 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
610 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
611 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
612 pmovmskb %xmm2, %edx
613 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
614 jnz L(less4_double_words_32)
615
616 add $48, %esi
617 add $48, %edi
618 jmp L(continue_00_48)
619
620 .p2align 4
621L(continue_48_00):
622 pcmpeqd (%esi), %xmm0
623 mov (%edi), %eax
624 pmovmskb %xmm0, %ecx
625 test %ecx, %ecx
626 jnz L(less4_double_words1)
627
628 cmp (%esi), %eax
629 jne L(nequal)
630
631 mov 4(%edi), %eax
632 cmp 4(%esi), %eax
633 jne L(nequal)
634
635 mov 8(%edi), %eax
636 cmp 8(%esi), %eax
637 jne L(nequal)
638
639 mov 12(%edi), %eax
640 cmp 12(%esi), %eax
641 jne L(nequal)
642
643 movdqu 16(%edi), %xmm1
644 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
645 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
646 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
647 pmovmskb %xmm1, %edx
648 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
649 jnz L(less4_double_words_16)
650
651 movdqu 32(%edi), %xmm1
652 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
653 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
654 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
655 pmovmskb %xmm1, %edx
656 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
657 jnz L(less4_double_words_32)
658
659 movdqu 48(%edi), %xmm1
660 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
661 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
662 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
663 pmovmskb %xmm1, %edx
664 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
665 jnz L(less4_double_words_48)
666
667 add $64, %esi
668 add $64, %edi
669 jmp L(continue_48_00)
670
671 .p2align 4
672L(continue_32_00):
673 movdqu (%edi), %xmm1
674 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
675 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
676 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
677 pmovmskb %xmm1, %edx
678 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
679 jnz L(less4_double_words)
680
681 add $16, %esi
682 add $16, %edi
683 jmp L(continue_48_00)
684
685 .p2align 4
686L(continue_16_00):
687 movdqu (%edi), %xmm1
688 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
689 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
690 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
691 pmovmskb %xmm1, %edx
692 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
693 jnz L(less4_double_words)
694
695 movdqu 16(%edi), %xmm1
696 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
697 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
698 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
699 pmovmskb %xmm1, %edx
700 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
701 jnz L(less4_double_words_16)
702
703 add $32, %esi
704 add $32, %edi
705 jmp L(continue_48_00)
706
707 .p2align 4
708L(continue_0_00):
709 movdqu (%edi), %xmm1
710 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
711 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
712 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
713 pmovmskb %xmm1, %edx
714 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
715 jnz L(less4_double_words)
716
717 movdqu 16(%edi), %xmm1
718 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
719 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
720 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
721 pmovmskb %xmm1, %edx
722 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
723 jnz L(less4_double_words_16)
724
725 movdqu 32(%edi), %xmm1
726 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
727 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
728 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
729 pmovmskb %xmm1, %edx
730 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
731 jnz L(less4_double_words_32)
732
733 add $48, %esi
734 add $48, %edi
735 jmp L(continue_48_00)
736
737 .p2align 4
738L(continue_32_32):
739 movdqu (%edi), %xmm1
740 movdqu (%esi), %xmm2
741 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
742 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
743 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
744 pmovmskb %xmm1, %edx
745 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
746 jnz L(less4_double_words)
747
748 add $16, %esi
749 add $16, %edi
750 jmp L(continue_48_48)
751
752 .p2align 4
753L(continue_16_16):
754 movdqu (%edi), %xmm1
755 movdqu (%esi), %xmm2
756 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
757 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
758 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
759 pmovmskb %xmm1, %edx
760 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
761 jnz L(less4_double_words)
762
763 movdqu 16(%edi), %xmm3
764 movdqu 16(%esi), %xmm4
765 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
766 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
767 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
768 pmovmskb %xmm3, %edx
769 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
770 jnz L(less4_double_words_16)
771
772 add $32, %esi
773 add $32, %edi
774 jmp L(continue_48_48)
775
776 .p2align 4
777L(continue_0_0):
778 movdqu (%edi), %xmm1
779 movdqu (%esi), %xmm2
780 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
781 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
782 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
783 pmovmskb %xmm1, %edx
784 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
785 jnz L(less4_double_words)
786
787 movdqu 16(%edi), %xmm3
788 movdqu 16(%esi), %xmm4
789 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
790 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
791 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
792 pmovmskb %xmm3, %edx
793 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
794 jnz L(less4_double_words_16)
795
796 movdqu 32(%edi), %xmm1
797 movdqu 32(%esi), %xmm2
798 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
799 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
800 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
801 pmovmskb %xmm1, %edx
802 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
803 jnz L(less4_double_words_32)
804
805 add $48, %esi
806 add $48, %edi
807 jmp L(continue_48_48)
808
809 .p2align 4
810L(continue_0_16):
811 movdqu (%edi), %xmm1
812 movdqu (%esi), %xmm2
813 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
814 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
815 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
816 pmovmskb %xmm1, %edx
817 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
818 jnz L(less4_double_words)
819
820 movdqu 16(%edi), %xmm1
821 movdqu 16(%esi), %xmm2
822 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
823 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
824 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
825 pmovmskb %xmm1, %edx
826 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
827 jnz L(less4_double_words_16)
828
829 add $32, %esi
830 add $32, %edi
831 jmp L(continue_32_48)
832
833 .p2align 4
834L(continue_0_32):
835 movdqu (%edi), %xmm1
836 movdqu (%esi), %xmm2
837 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
838 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
839 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
840 pmovmskb %xmm1, %edx
841 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
842 jnz L(less4_double_words)
843
844 add $16, %esi
845 add $16, %edi
846 jmp L(continue_16_48)
847
848 .p2align 4
849L(continue_16_32):
850 movdqu (%edi), %xmm1
851 movdqu (%esi), %xmm2
852 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
853 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
854 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
855 pmovmskb %xmm1, %edx
856 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
857 jnz L(less4_double_words)
858
859 add $16, %esi
860 add $16, %edi
861 jmp L(continue_32_48)
862
863 .p2align 4
864L(less4_double_words1):
865 cmp (%esi), %eax
866 jne L(nequal)
867 test %eax, %eax
868 jz L(equal)
869
870 mov 4(%esi), %ecx
871 cmp %ecx, 4(%edi)
872 jne L(nequal)
873 test %ecx, %ecx
874 jz L(equal)
875
876 mov 8(%esi), %ecx
877 cmp %ecx, 8(%edi)
878 jne L(nequal)
879 test %ecx, %ecx
880 jz L(equal)
881
882 mov 12(%esi), %ecx
883 cmp %ecx, 12(%edi)
884 jne L(nequal)
885 xor %eax, %eax
886 RETURN
887
888 .p2align 4
889L(less4_double_words):
890 xor %eax, %eax
891 test %dl, %dl
892 jz L(next_two_double_words)
893 and $15, %dl
894 jz L(second_double_word)
895 mov (%esi), %ecx
896 cmp %ecx, (%edi)
897 jne L(nequal)
898 RETURN
899
900 .p2align 4
901L(second_double_word):
902 mov 4(%esi), %ecx
903 cmp %ecx, 4(%edi)
904 jne L(nequal)
905 RETURN
906
907 .p2align 4
908L(next_two_double_words):
909 and $15, %dh
910 jz L(fourth_double_word)
911 mov 8(%esi), %ecx
912 cmp %ecx, 8(%edi)
913 jne L(nequal)
914 RETURN
915
916 .p2align 4
917L(fourth_double_word):
918 mov 12(%esi), %ecx
919 cmp %ecx, 12(%edi)
920 jne L(nequal)
921 RETURN
922
923 .p2align 4
924L(less4_double_words_16):
925 xor %eax, %eax
926 test %dl, %dl
927 jz L(next_two_double_words_16)
928 and $15, %dl
929 jz L(second_double_word_16)
930 mov 16(%esi), %ecx
931 cmp %ecx, 16(%edi)
932 jne L(nequal)
933 RETURN
934
935 .p2align 4
936L(second_double_word_16):
937 mov 20(%esi), %ecx
938 cmp %ecx, 20(%edi)
939 jne L(nequal)
940 RETURN
941
942 .p2align 4
943L(next_two_double_words_16):
944 and $15, %dh
945 jz L(fourth_double_word_16)
946 mov 24(%esi), %ecx
947 cmp %ecx, 24(%edi)
948 jne L(nequal)
949 RETURN
950
951 .p2align 4
952L(fourth_double_word_16):
953 mov 28(%esi), %ecx
954 cmp %ecx, 28(%edi)
955 jne L(nequal)
956 RETURN
957
958 .p2align 4
959L(less4_double_words_32):
960 xor %eax, %eax
961 test %dl, %dl
962 jz L(next_two_double_words_32)
963 and $15, %dl
964 jz L(second_double_word_32)
965 mov 32(%esi), %ecx
966 cmp %ecx, 32(%edi)
967 jne L(nequal)
968 RETURN
969
970 .p2align 4
971L(second_double_word_32):
972 mov 36(%esi), %ecx
973 cmp %ecx, 36(%edi)
974 jne L(nequal)
975 RETURN
976
977 .p2align 4
978L(next_two_double_words_32):
979 and $15, %dh
980 jz L(fourth_double_word_32)
981 mov 40(%esi), %ecx
982 cmp %ecx, 40(%edi)
983 jne L(nequal)
984 RETURN
985
986 .p2align 4
987L(fourth_double_word_32):
988 mov 44(%esi), %ecx
989 cmp %ecx, 44(%edi)
990 jne L(nequal)
991 RETURN
992
993 .p2align 4
994L(less4_double_words_48):
995 xor %eax, %eax
996 test %dl, %dl
997 jz L(next_two_double_words_48)
998 and $15, %dl
999 jz L(second_double_word_48)
1000 mov 48(%esi), %ecx
1001 cmp %ecx, 48(%edi)
1002 jne L(nequal)
1003 RETURN
1004
1005 .p2align 4
1006L(second_double_word_48):
1007 mov 52(%esi), %ecx
1008 cmp %ecx, 52(%edi)
1009 jne L(nequal)
1010 RETURN
1011
1012 .p2align 4
1013L(next_two_double_words_48):
1014 and $15, %dh
1015 jz L(fourth_double_word_48)
1016 mov 56(%esi), %ecx
1017 cmp %ecx, 56(%edi)
1018 jne L(nequal)
1019 RETURN
1020
1021 .p2align 4
1022L(fourth_double_word_48):
1023 mov 60(%esi), %ecx
1024 cmp %ecx, 60(%edi)
1025 jne L(nequal)
1026 RETURN
1027
1028 .p2align 4
1029L(nequal):
1030 mov $1, %eax
1031 jg L(return)
1032 neg %eax
1033 RETURN
1034
1035 .p2align 4
1036L(return):
1037 RETURN
1038
1039 .p2align 4
1040L(equal):
1041 xorl %eax, %eax
1042 RETURN
1043
1044 CFI_POP (%edi)
1045 CFI_POP (%esi)
1046
1047 .p2align 4
1048L(neq):
1049 mov $1, %eax
1050 jg L(neq_bigger)
1051 neg %eax
1052
1053L(neq_bigger):
1054 ret
1055
1056 .p2align 4
1057L(eq):
1058 xorl %eax, %eax
1059 ret
1060
1061END (wcscmp)
1062