blob: 30e3173f6ac2790a51a28f871f84522b82689e1e [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002Copyright (c) 2010, 2011 Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Bruce Beare8ff1a272010-03-04 11:03:37 -080031#ifndef L
32# define L(label) .L##label
33#endif
34
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef cfi_startproc
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040036# define cfi_startproc .cfi_startproc
Bruce Beare8ff1a272010-03-04 11:03:37 -080037#endif
38
39#ifndef cfi_endproc
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040040# define cfi_endproc .cfi_endproc
Bruce Beare8ff1a272010-03-04 11:03:37 -080041#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare124a5422010-10-11 12:24:41 -070055#ifndef cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040056# define cfi_remember_state .cfi_remember_state
Bruce Beare124a5422010-10-11 12:24:41 -070057#endif
58
59#ifndef cfi_restore_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040060# define cfi_restore_state .cfi_restore_state
Bruce Beare124a5422010-10-11 12:24:41 -070061#endif
62
Bruce Beare8ff1a272010-03-04 11:03:37 -080063#ifndef ENTRY
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040064# define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68name: \
Bruce Beare8ff1a272010-03-04 11:03:37 -080069 cfi_startproc
70#endif
71
72#ifndef END
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040073# define END(name) \
74 cfi_endproc; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080075 .size name, .-name
76#endif
77
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040078#ifndef MEMCMP
79# define MEMCMP memcmp
80#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -080081
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040082#define CFI_PUSH(REG) \
83 cfi_adjust_cfa_offset (4); \
84 cfi_rel_offset (REG, 0)
85
86#define CFI_POP(REG) \
87 cfi_adjust_cfa_offset (-4); \
88 cfi_restore (REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080089
90#define PUSH(REG) pushl REG; CFI_PUSH (REG)
91#define POP(REG) popl REG; CFI_POP (REG)
92
93#define PARMS 4
94#define BLK1 PARMS
95#define BLK2 BLK1+4
96#define LEN BLK2+4
97#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
Bruce Beare124a5422010-10-11 12:24:41 -070098#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -080099
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400100/* Warning!
101 wmemcmp has to use SIGNED comparison for elements.
102 memcmp has to use UNSIGNED comparison for elemnts.
103*/
104
105 .text
Bruce Beare8ff1a272010-03-04 11:03:37 -0800106ENTRY (MEMCMP)
107 movl LEN(%esp), %ecx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400108
109#ifdef USE_AS_WMEMCMP
110 shl $2, %ecx
111 jz L(zero)
112#endif
113
Bruce Beare8ff1a272010-03-04 11:03:37 -0800114 movl BLK1(%esp), %eax
115 cmp $48, %ecx
116 movl BLK2(%esp), %edx
117 jae L(48bytesormore)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400118
119#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -0800120 cmp $1, %ecx
121 jbe L(less1bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400122#endif
123
124 PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800125 add %ecx, %edx
126 add %ecx, %eax
127 jmp L(less48bytes)
128
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400129 CFI_POP (%ebx)
130
131#ifndef USE_AS_WMEMCMP
132 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800133L(less1bytes):
134 jb L(zero)
135 movb (%eax), %cl
136 cmp (%edx), %cl
137 je L(zero)
138 mov $1, %eax
139 ja L(1bytesend)
140 neg %eax
141L(1bytesend):
142 ret
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400143#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800144
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400145 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800146L(zero):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400147 xor %eax, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800148 ret
149
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400150 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800151L(48bytesormore):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400152 PUSH (%ebx)
153 PUSH (%esi)
154 PUSH (%edi)
Bruce Beare124a5422010-10-11 12:24:41 -0700155 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400156 movdqu (%eax), %xmm3
157 movdqu (%edx), %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800158 movl %eax, %edi
159 movl %edx, %esi
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400160 pcmpeqb %xmm0, %xmm3
161 pmovmskb %xmm3, %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800162 lea 16(%edi), %edi
163
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400164 sub $0xffff, %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800165 lea 16(%esi), %esi
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400166 jnz L(less16bytes)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800167 mov %edi, %edx
168 and $0xf, %edx
169 xor %edx, %edi
170 sub %edx, %esi
171 add %edx, %ecx
172 mov %esi, %edx
173 and $0xf, %edx
174 jz L(shr_0)
175 xor %edx, %esi
176
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400177#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -0800178 cmp $8, %edx
179 jae L(next_unaligned_table)
180 cmp $0, %edx
181 je L(shr_0)
182 cmp $1, %edx
183 je L(shr_1)
184 cmp $2, %edx
185 je L(shr_2)
186 cmp $3, %edx
187 je L(shr_3)
188 cmp $4, %edx
189 je L(shr_4)
190 cmp $5, %edx
191 je L(shr_5)
192 cmp $6, %edx
193 je L(shr_6)
194 jmp L(shr_7)
195
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400196 .p2align 2
Bruce Beare8ff1a272010-03-04 11:03:37 -0800197L(next_unaligned_table):
198 cmp $8, %edx
199 je L(shr_8)
200 cmp $9, %edx
201 je L(shr_9)
202 cmp $10, %edx
203 je L(shr_10)
204 cmp $11, %edx
205 je L(shr_11)
206 cmp $12, %edx
207 je L(shr_12)
208 cmp $13, %edx
209 je L(shr_13)
210 cmp $14, %edx
211 je L(shr_14)
212 jmp L(shr_15)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400213#else
214 cmp $0, %edx
215 je L(shr_0)
216 cmp $4, %edx
217 je L(shr_4)
218 cmp $8, %edx
219 je L(shr_8)
220 jmp L(shr_12)
221#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800222
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400223 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800224L(shr_0):
225 cmp $80, %ecx
226 jae L(shr_0_gobble)
227 lea -48(%ecx), %ecx
228 xor %eax, %eax
229 movaps (%esi), %xmm1
230 pcmpeqb (%edi), %xmm1
231 movaps 16(%esi), %xmm2
232 pcmpeqb 16(%edi), %xmm2
233 pand %xmm1, %xmm2
234 pmovmskb %xmm2, %edx
235 add $32, %edi
236 add $32, %esi
237 sub $0xffff, %edx
238 jnz L(exit)
239
240 lea (%ecx, %edi,1), %eax
241 lea (%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400242 POP (%edi)
243 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800244 jmp L(less48bytes)
245
Bruce Beare124a5422010-10-11 12:24:41 -0700246 cfi_restore_state
247 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400248 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800249L(shr_0_gobble):
250 lea -48(%ecx), %ecx
251 movdqa (%esi), %xmm0
252 xor %eax, %eax
253 pcmpeqb (%edi), %xmm0
254 sub $32, %ecx
255 movdqa 16(%esi), %xmm2
256 pcmpeqb 16(%edi), %xmm2
257L(shr_0_gobble_loop):
258 pand %xmm0, %xmm2
259 sub $32, %ecx
260 pmovmskb %xmm2, %edx
261 movdqa %xmm0, %xmm1
262 movdqa 32(%esi), %xmm0
263 movdqa 48(%esi), %xmm2
264 sbb $0xffff, %edx
265 pcmpeqb 32(%edi), %xmm0
266 pcmpeqb 48(%edi), %xmm2
267 lea 32(%edi), %edi
268 lea 32(%esi), %esi
269 jz L(shr_0_gobble_loop)
270
271 pand %xmm0, %xmm2
272 cmp $0, %ecx
273 jge L(shr_0_gobble_loop_next)
274 inc %edx
275 add $32, %ecx
276L(shr_0_gobble_loop_next):
277 test %edx, %edx
278 jnz L(exit)
279
280 pmovmskb %xmm2, %edx
281 movdqa %xmm0, %xmm1
282 lea 32(%edi), %edi
283 lea 32(%esi), %esi
284 sub $0xffff, %edx
285 jnz L(exit)
286 lea (%ecx, %edi,1), %eax
287 lea (%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400288 POP (%edi)
289 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800290 jmp L(less48bytes)
291
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400292#ifndef USE_AS_WMEMCMP
Bruce Beare124a5422010-10-11 12:24:41 -0700293 cfi_restore_state
294 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400295 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800296L(shr_1):
297 cmp $80, %ecx
298 lea -48(%ecx), %ecx
299 mov %edx, %eax
300 jae L(shr_1_gobble)
301
302 movdqa 16(%esi), %xmm1
303 movdqa %xmm1, %xmm2
304 palignr $1,(%esi), %xmm1
305 pcmpeqb (%edi), %xmm1
306
307 movdqa 32(%esi), %xmm3
308 palignr $1,%xmm2, %xmm3
309 pcmpeqb 16(%edi), %xmm3
310
311 pand %xmm1, %xmm3
312 pmovmskb %xmm3, %edx
313 lea 32(%edi), %edi
314 lea 32(%esi), %esi
315 sub $0xffff, %edx
316 jnz L(exit)
317 lea (%ecx, %edi,1), %eax
318 lea 1(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400319 POP (%edi)
320 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800321 jmp L(less48bytes)
322
Bruce Beare124a5422010-10-11 12:24:41 -0700323 cfi_restore_state
324 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400325 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800326L(shr_1_gobble):
327 sub $32, %ecx
328 movdqa 16(%esi), %xmm0
329 palignr $1,(%esi), %xmm0
330 pcmpeqb (%edi), %xmm0
331
332 movdqa 32(%esi), %xmm3
333 palignr $1,16(%esi), %xmm3
334 pcmpeqb 16(%edi), %xmm3
335
336L(shr_1_gobble_loop):
337 pand %xmm0, %xmm3
338 sub $32, %ecx
339 pmovmskb %xmm3, %edx
340 movdqa %xmm0, %xmm1
341
342 movdqa 64(%esi), %xmm3
343 palignr $1,48(%esi), %xmm3
344 sbb $0xffff, %edx
345 movdqa 48(%esi), %xmm0
346 palignr $1,32(%esi), %xmm0
347 pcmpeqb 32(%edi), %xmm0
348 lea 32(%esi), %esi
349 pcmpeqb 48(%edi), %xmm3
350
351 lea 32(%edi), %edi
352 jz L(shr_1_gobble_loop)
353 pand %xmm0, %xmm3
354
355 cmp $0, %ecx
356 jge L(shr_1_gobble_next)
357 inc %edx
358 add $32, %ecx
359L(shr_1_gobble_next):
360 test %edx, %edx
361 jnz L(exit)
362
363 pmovmskb %xmm3, %edx
364 movdqa %xmm0, %xmm1
365 lea 32(%edi), %edi
366 lea 32(%esi), %esi
367 sub $0xffff, %edx
368 jnz L(exit)
369
370 lea (%ecx, %edi,1), %eax
371 lea 1(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400372 POP (%edi)
373 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800374 jmp L(less48bytes)
375
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400376
Bruce Beare124a5422010-10-11 12:24:41 -0700377 cfi_restore_state
378 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400379 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800380L(shr_2):
381 cmp $80, %ecx
382 lea -48(%ecx), %ecx
383 mov %edx, %eax
384 jae L(shr_2_gobble)
385
386 movdqa 16(%esi), %xmm1
387 movdqa %xmm1, %xmm2
388 palignr $2,(%esi), %xmm1
389 pcmpeqb (%edi), %xmm1
390
391 movdqa 32(%esi), %xmm3
392 palignr $2,%xmm2, %xmm3
393 pcmpeqb 16(%edi), %xmm3
394
395 pand %xmm1, %xmm3
396 pmovmskb %xmm3, %edx
397 lea 32(%edi), %edi
398 lea 32(%esi), %esi
399 sub $0xffff, %edx
400 jnz L(exit)
401 lea (%ecx, %edi,1), %eax
402 lea 2(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400403 POP (%edi)
404 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800405 jmp L(less48bytes)
406
Bruce Beare124a5422010-10-11 12:24:41 -0700407 cfi_restore_state
408 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400409 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800410L(shr_2_gobble):
411 sub $32, %ecx
412 movdqa 16(%esi), %xmm0
413 palignr $2,(%esi), %xmm0
414 pcmpeqb (%edi), %xmm0
415
416 movdqa 32(%esi), %xmm3
417 palignr $2,16(%esi), %xmm3
418 pcmpeqb 16(%edi), %xmm3
419
420L(shr_2_gobble_loop):
421 pand %xmm0, %xmm3
422 sub $32, %ecx
423 pmovmskb %xmm3, %edx
424 movdqa %xmm0, %xmm1
425
426 movdqa 64(%esi), %xmm3
427 palignr $2,48(%esi), %xmm3
428 sbb $0xffff, %edx
429 movdqa 48(%esi), %xmm0
430 palignr $2,32(%esi), %xmm0
431 pcmpeqb 32(%edi), %xmm0
432 lea 32(%esi), %esi
433 pcmpeqb 48(%edi), %xmm3
434
435 lea 32(%edi), %edi
436 jz L(shr_2_gobble_loop)
437 pand %xmm0, %xmm3
438
439 cmp $0, %ecx
440 jge L(shr_2_gobble_next)
441 inc %edx
442 add $32, %ecx
443L(shr_2_gobble_next):
444 test %edx, %edx
445 jnz L(exit)
446
447 pmovmskb %xmm3, %edx
448 movdqa %xmm0, %xmm1
449 lea 32(%edi), %edi
450 lea 32(%esi), %esi
451 sub $0xffff, %edx
452 jnz L(exit)
453
454 lea (%ecx, %edi,1), %eax
455 lea 2(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400456 POP (%edi)
457 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800458 jmp L(less48bytes)
459
Bruce Beare124a5422010-10-11 12:24:41 -0700460 cfi_restore_state
461 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400462 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800463L(shr_3):
464 cmp $80, %ecx
465 lea -48(%ecx), %ecx
466 mov %edx, %eax
467 jae L(shr_3_gobble)
468
469 movdqa 16(%esi), %xmm1
470 movdqa %xmm1, %xmm2
471 palignr $3,(%esi), %xmm1
472 pcmpeqb (%edi), %xmm1
473
474 movdqa 32(%esi), %xmm3
475 palignr $3,%xmm2, %xmm3
476 pcmpeqb 16(%edi), %xmm3
477
478 pand %xmm1, %xmm3
479 pmovmskb %xmm3, %edx
480 lea 32(%edi), %edi
481 lea 32(%esi), %esi
482 sub $0xffff, %edx
483 jnz L(exit)
484 lea (%ecx, %edi,1), %eax
485 lea 3(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400486 POP (%edi)
487 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800488 jmp L(less48bytes)
489
Bruce Beare124a5422010-10-11 12:24:41 -0700490 cfi_restore_state
491 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400492 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800493L(shr_3_gobble):
494 sub $32, %ecx
495 movdqa 16(%esi), %xmm0
496 palignr $3,(%esi), %xmm0
497 pcmpeqb (%edi), %xmm0
498
499 movdqa 32(%esi), %xmm3
500 palignr $3,16(%esi), %xmm3
501 pcmpeqb 16(%edi), %xmm3
502
503L(shr_3_gobble_loop):
504 pand %xmm0, %xmm3
505 sub $32, %ecx
506 pmovmskb %xmm3, %edx
507 movdqa %xmm0, %xmm1
508
509 movdqa 64(%esi), %xmm3
510 palignr $3,48(%esi), %xmm3
511 sbb $0xffff, %edx
512 movdqa 48(%esi), %xmm0
513 palignr $3,32(%esi), %xmm0
514 pcmpeqb 32(%edi), %xmm0
515 lea 32(%esi), %esi
516 pcmpeqb 48(%edi), %xmm3
517
518 lea 32(%edi), %edi
519 jz L(shr_3_gobble_loop)
520 pand %xmm0, %xmm3
521
522 cmp $0, %ecx
523 jge L(shr_3_gobble_next)
524 inc %edx
525 add $32, %ecx
526L(shr_3_gobble_next):
527 test %edx, %edx
528 jnz L(exit)
529
530 pmovmskb %xmm3, %edx
531 movdqa %xmm0, %xmm1
532 lea 32(%edi), %edi
533 lea 32(%esi), %esi
534 sub $0xffff, %edx
535 jnz L(exit)
536
537 lea (%ecx, %edi,1), %eax
538 lea 3(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400539 POP (%edi)
540 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800541 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400542#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800543
Bruce Beare124a5422010-10-11 12:24:41 -0700544 cfi_restore_state
545 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400546 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800547L(shr_4):
548 cmp $80, %ecx
549 lea -48(%ecx), %ecx
550 mov %edx, %eax
551 jae L(shr_4_gobble)
552
553 movdqa 16(%esi), %xmm1
554 movdqa %xmm1, %xmm2
555 palignr $4,(%esi), %xmm1
556 pcmpeqb (%edi), %xmm1
557
558 movdqa 32(%esi), %xmm3
559 palignr $4,%xmm2, %xmm3
560 pcmpeqb 16(%edi), %xmm3
561
562 pand %xmm1, %xmm3
563 pmovmskb %xmm3, %edx
564 lea 32(%edi), %edi
565 lea 32(%esi), %esi
566 sub $0xffff, %edx
567 jnz L(exit)
568 lea (%ecx, %edi,1), %eax
569 lea 4(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400570 POP (%edi)
571 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800572 jmp L(less48bytes)
573
Bruce Beare124a5422010-10-11 12:24:41 -0700574 cfi_restore_state
575 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400576 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800577L(shr_4_gobble):
578 sub $32, %ecx
579 movdqa 16(%esi), %xmm0
580 palignr $4,(%esi), %xmm0
581 pcmpeqb (%edi), %xmm0
582
583 movdqa 32(%esi), %xmm3
584 palignr $4,16(%esi), %xmm3
585 pcmpeqb 16(%edi), %xmm3
586
587L(shr_4_gobble_loop):
588 pand %xmm0, %xmm3
589 sub $32, %ecx
590 pmovmskb %xmm3, %edx
591 movdqa %xmm0, %xmm1
592
593 movdqa 64(%esi), %xmm3
594 palignr $4,48(%esi), %xmm3
595 sbb $0xffff, %edx
596 movdqa 48(%esi), %xmm0
597 palignr $4,32(%esi), %xmm0
598 pcmpeqb 32(%edi), %xmm0
599 lea 32(%esi), %esi
600 pcmpeqb 48(%edi), %xmm3
601
602 lea 32(%edi), %edi
603 jz L(shr_4_gobble_loop)
604 pand %xmm0, %xmm3
605
606 cmp $0, %ecx
607 jge L(shr_4_gobble_next)
608 inc %edx
609 add $32, %ecx
610L(shr_4_gobble_next):
611 test %edx, %edx
612 jnz L(exit)
613
614 pmovmskb %xmm3, %edx
615 movdqa %xmm0, %xmm1
616 lea 32(%edi), %edi
617 lea 32(%esi), %esi
618 sub $0xffff, %edx
619 jnz L(exit)
620
621 lea (%ecx, %edi,1), %eax
622 lea 4(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400623 POP (%edi)
624 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800625 jmp L(less48bytes)
626
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400627#ifndef USE_AS_WMEMCMP
Bruce Beare124a5422010-10-11 12:24:41 -0700628 cfi_restore_state
629 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400630 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800631L(shr_5):
632 cmp $80, %ecx
633 lea -48(%ecx), %ecx
634 mov %edx, %eax
635 jae L(shr_5_gobble)
636
637 movdqa 16(%esi), %xmm1
638 movdqa %xmm1, %xmm2
639 palignr $5,(%esi), %xmm1
640 pcmpeqb (%edi), %xmm1
641
642 movdqa 32(%esi), %xmm3
643 palignr $5,%xmm2, %xmm3
644 pcmpeqb 16(%edi), %xmm3
645
646 pand %xmm1, %xmm3
647 pmovmskb %xmm3, %edx
648 lea 32(%edi), %edi
649 lea 32(%esi), %esi
650 sub $0xffff, %edx
651 jnz L(exit)
652 lea (%ecx, %edi,1), %eax
653 lea 5(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400654 POP (%edi)
655 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800656 jmp L(less48bytes)
657
Bruce Beare124a5422010-10-11 12:24:41 -0700658 cfi_restore_state
659 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400660 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800661L(shr_5_gobble):
662 sub $32, %ecx
663 movdqa 16(%esi), %xmm0
664 palignr $5,(%esi), %xmm0
665 pcmpeqb (%edi), %xmm0
666
667 movdqa 32(%esi), %xmm3
668 palignr $5,16(%esi), %xmm3
669 pcmpeqb 16(%edi), %xmm3
670
671L(shr_5_gobble_loop):
672 pand %xmm0, %xmm3
673 sub $32, %ecx
674 pmovmskb %xmm3, %edx
675 movdqa %xmm0, %xmm1
676
677 movdqa 64(%esi), %xmm3
678 palignr $5,48(%esi), %xmm3
679 sbb $0xffff, %edx
680 movdqa 48(%esi), %xmm0
681 palignr $5,32(%esi), %xmm0
682 pcmpeqb 32(%edi), %xmm0
683 lea 32(%esi), %esi
684 pcmpeqb 48(%edi), %xmm3
685
686 lea 32(%edi), %edi
687 jz L(shr_5_gobble_loop)
688 pand %xmm0, %xmm3
689
690 cmp $0, %ecx
691 jge L(shr_5_gobble_next)
692 inc %edx
693 add $32, %ecx
694L(shr_5_gobble_next):
695 test %edx, %edx
696 jnz L(exit)
697
698 pmovmskb %xmm3, %edx
699 movdqa %xmm0, %xmm1
700 lea 32(%edi), %edi
701 lea 32(%esi), %esi
702 sub $0xffff, %edx
703 jnz L(exit)
704
705 lea (%ecx, %edi,1), %eax
706 lea 5(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400707 POP (%edi)
708 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800709 jmp L(less48bytes)
710
Bruce Beare124a5422010-10-11 12:24:41 -0700711 cfi_restore_state
712 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400713 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800714L(shr_6):
715 cmp $80, %ecx
716 lea -48(%ecx), %ecx
717 mov %edx, %eax
718 jae L(shr_6_gobble)
719
720 movdqa 16(%esi), %xmm1
721 movdqa %xmm1, %xmm2
722 palignr $6,(%esi), %xmm1
723 pcmpeqb (%edi), %xmm1
724
725 movdqa 32(%esi), %xmm3
726 palignr $6,%xmm2, %xmm3
727 pcmpeqb 16(%edi), %xmm3
728
729 pand %xmm1, %xmm3
730 pmovmskb %xmm3, %edx
731 lea 32(%edi), %edi
732 lea 32(%esi), %esi
733 sub $0xffff, %edx
734 jnz L(exit)
735 lea (%ecx, %edi,1), %eax
736 lea 6(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400737 POP (%edi)
738 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800739 jmp L(less48bytes)
740
Bruce Beare124a5422010-10-11 12:24:41 -0700741 cfi_restore_state
742 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400743 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800744L(shr_6_gobble):
745 sub $32, %ecx
746 movdqa 16(%esi), %xmm0
747 palignr $6,(%esi), %xmm0
748 pcmpeqb (%edi), %xmm0
749
750 movdqa 32(%esi), %xmm3
751 palignr $6,16(%esi), %xmm3
752 pcmpeqb 16(%edi), %xmm3
753
754L(shr_6_gobble_loop):
755 pand %xmm0, %xmm3
756 sub $32, %ecx
757 pmovmskb %xmm3, %edx
758 movdqa %xmm0, %xmm1
759
760 movdqa 64(%esi), %xmm3
761 palignr $6,48(%esi), %xmm3
762 sbb $0xffff, %edx
763 movdqa 48(%esi), %xmm0
764 palignr $6,32(%esi), %xmm0
765 pcmpeqb 32(%edi), %xmm0
766 lea 32(%esi), %esi
767 pcmpeqb 48(%edi), %xmm3
768
769 lea 32(%edi), %edi
770 jz L(shr_6_gobble_loop)
771 pand %xmm0, %xmm3
772
773 cmp $0, %ecx
774 jge L(shr_6_gobble_next)
775 inc %edx
776 add $32, %ecx
777L(shr_6_gobble_next):
778 test %edx, %edx
779 jnz L(exit)
780
781 pmovmskb %xmm3, %edx
782 movdqa %xmm0, %xmm1
783 lea 32(%edi), %edi
784 lea 32(%esi), %esi
785 sub $0xffff, %edx
786 jnz L(exit)
787
788 lea (%ecx, %edi,1), %eax
789 lea 6(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400790 POP (%edi)
791 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800792 jmp L(less48bytes)
793
Bruce Beare124a5422010-10-11 12:24:41 -0700794 cfi_restore_state
795 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400796 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800797L(shr_7):
798 cmp $80, %ecx
799 lea -48(%ecx), %ecx
800 mov %edx, %eax
801 jae L(shr_7_gobble)
802
803 movdqa 16(%esi), %xmm1
804 movdqa %xmm1, %xmm2
805 palignr $7,(%esi), %xmm1
806 pcmpeqb (%edi), %xmm1
807
808 movdqa 32(%esi), %xmm3
809 palignr $7,%xmm2, %xmm3
810 pcmpeqb 16(%edi), %xmm3
811
812 pand %xmm1, %xmm3
813 pmovmskb %xmm3, %edx
814 lea 32(%edi), %edi
815 lea 32(%esi), %esi
816 sub $0xffff, %edx
817 jnz L(exit)
818 lea (%ecx, %edi,1), %eax
819 lea 7(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400820 POP (%edi)
821 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800822 jmp L(less48bytes)
823
Bruce Beare124a5422010-10-11 12:24:41 -0700824 cfi_restore_state
825 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400826 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800827L(shr_7_gobble):
828 sub $32, %ecx
829 movdqa 16(%esi), %xmm0
830 palignr $7,(%esi), %xmm0
831 pcmpeqb (%edi), %xmm0
832
833 movdqa 32(%esi), %xmm3
834 palignr $7,16(%esi), %xmm3
835 pcmpeqb 16(%edi), %xmm3
836
837L(shr_7_gobble_loop):
838 pand %xmm0, %xmm3
839 sub $32, %ecx
840 pmovmskb %xmm3, %edx
841 movdqa %xmm0, %xmm1
842
843 movdqa 64(%esi), %xmm3
844 palignr $7,48(%esi), %xmm3
845 sbb $0xffff, %edx
846 movdqa 48(%esi), %xmm0
847 palignr $7,32(%esi), %xmm0
848 pcmpeqb 32(%edi), %xmm0
849 lea 32(%esi), %esi
850 pcmpeqb 48(%edi), %xmm3
851
852 lea 32(%edi), %edi
853 jz L(shr_7_gobble_loop)
854 pand %xmm0, %xmm3
855
856 cmp $0, %ecx
857 jge L(shr_7_gobble_next)
858 inc %edx
859 add $32, %ecx
860L(shr_7_gobble_next):
861 test %edx, %edx
862 jnz L(exit)
863
864 pmovmskb %xmm3, %edx
865 movdqa %xmm0, %xmm1
866 lea 32(%edi), %edi
867 lea 32(%esi), %esi
868 sub $0xffff, %edx
869 jnz L(exit)
870
871 lea (%ecx, %edi,1), %eax
872 lea 7(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400873 POP (%edi)
874 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800875 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400876#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800877
Bruce Beare124a5422010-10-11 12:24:41 -0700878 cfi_restore_state
879 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400880 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800881L(shr_8):
882 cmp $80, %ecx
883 lea -48(%ecx), %ecx
884 mov %edx, %eax
885 jae L(shr_8_gobble)
886
887 movdqa 16(%esi), %xmm1
888 movdqa %xmm1, %xmm2
889 palignr $8,(%esi), %xmm1
890 pcmpeqb (%edi), %xmm1
891
892 movdqa 32(%esi), %xmm3
893 palignr $8,%xmm2, %xmm3
894 pcmpeqb 16(%edi), %xmm3
895
896 pand %xmm1, %xmm3
897 pmovmskb %xmm3, %edx
898 lea 32(%edi), %edi
899 lea 32(%esi), %esi
900 sub $0xffff, %edx
901 jnz L(exit)
902 lea (%ecx, %edi,1), %eax
903 lea 8(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400904 POP (%edi)
905 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800906 jmp L(less48bytes)
907
Bruce Beare124a5422010-10-11 12:24:41 -0700908 cfi_restore_state
909 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400910 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800911L(shr_8_gobble):
912 sub $32, %ecx
913 movdqa 16(%esi), %xmm0
914 palignr $8,(%esi), %xmm0
915 pcmpeqb (%edi), %xmm0
916
917 movdqa 32(%esi), %xmm3
918 palignr $8,16(%esi), %xmm3
919 pcmpeqb 16(%edi), %xmm3
920
921L(shr_8_gobble_loop):
922 pand %xmm0, %xmm3
923 sub $32, %ecx
924 pmovmskb %xmm3, %edx
925 movdqa %xmm0, %xmm1
926
927 movdqa 64(%esi), %xmm3
928 palignr $8,48(%esi), %xmm3
929 sbb $0xffff, %edx
930 movdqa 48(%esi), %xmm0
931 palignr $8,32(%esi), %xmm0
932 pcmpeqb 32(%edi), %xmm0
933 lea 32(%esi), %esi
934 pcmpeqb 48(%edi), %xmm3
935
936 lea 32(%edi), %edi
937 jz L(shr_8_gobble_loop)
938 pand %xmm0, %xmm3
939
940 cmp $0, %ecx
941 jge L(shr_8_gobble_next)
942 inc %edx
943 add $32, %ecx
944L(shr_8_gobble_next):
945 test %edx, %edx
946 jnz L(exit)
947
948 pmovmskb %xmm3, %edx
949 movdqa %xmm0, %xmm1
950 lea 32(%edi), %edi
951 lea 32(%esi), %esi
952 sub $0xffff, %edx
953 jnz L(exit)
954
955 lea (%ecx, %edi,1), %eax
956 lea 8(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400957 POP (%edi)
958 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800959 jmp L(less48bytes)
960
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400961#ifndef USE_AS_WMEMCMP
Bruce Beare124a5422010-10-11 12:24:41 -0700962 cfi_restore_state
963 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400964 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800965L(shr_9):
966 cmp $80, %ecx
967 lea -48(%ecx), %ecx
968 mov %edx, %eax
969 jae L(shr_9_gobble)
970
971 movdqa 16(%esi), %xmm1
972 movdqa %xmm1, %xmm2
973 palignr $9,(%esi), %xmm1
974 pcmpeqb (%edi), %xmm1
975
976 movdqa 32(%esi), %xmm3
977 palignr $9,%xmm2, %xmm3
978 pcmpeqb 16(%edi), %xmm3
979
980 pand %xmm1, %xmm3
981 pmovmskb %xmm3, %edx
982 lea 32(%edi), %edi
983 lea 32(%esi), %esi
984 sub $0xffff, %edx
985 jnz L(exit)
986 lea (%ecx, %edi,1), %eax
987 lea 9(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400988 POP (%edi)
989 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800990 jmp L(less48bytes)
991
Bruce Beare124a5422010-10-11 12:24:41 -0700992 cfi_restore_state
993 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400994 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800995L(shr_9_gobble):
996 sub $32, %ecx
997 movdqa 16(%esi), %xmm0
998 palignr $9,(%esi), %xmm0
999 pcmpeqb (%edi), %xmm0
1000
1001 movdqa 32(%esi), %xmm3
1002 palignr $9,16(%esi), %xmm3
1003 pcmpeqb 16(%edi), %xmm3
1004
1005L(shr_9_gobble_loop):
1006 pand %xmm0, %xmm3
1007 sub $32, %ecx
1008 pmovmskb %xmm3, %edx
1009 movdqa %xmm0, %xmm1
1010
1011 movdqa 64(%esi), %xmm3
1012 palignr $9,48(%esi), %xmm3
1013 sbb $0xffff, %edx
1014 movdqa 48(%esi), %xmm0
1015 palignr $9,32(%esi), %xmm0
1016 pcmpeqb 32(%edi), %xmm0
1017 lea 32(%esi), %esi
1018 pcmpeqb 48(%edi), %xmm3
1019
1020 lea 32(%edi), %edi
1021 jz L(shr_9_gobble_loop)
1022 pand %xmm0, %xmm3
1023
1024 cmp $0, %ecx
1025 jge L(shr_9_gobble_next)
1026 inc %edx
1027 add $32, %ecx
1028L(shr_9_gobble_next):
1029 test %edx, %edx
1030 jnz L(exit)
1031
1032 pmovmskb %xmm3, %edx
1033 movdqa %xmm0, %xmm1
1034 lea 32(%edi), %edi
1035 lea 32(%esi), %esi
1036 sub $0xffff, %edx
1037 jnz L(exit)
1038
1039 lea (%ecx, %edi,1), %eax
1040 lea 9(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001041 POP (%edi)
1042 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001043 jmp L(less48bytes)
1044
Bruce Beare124a5422010-10-11 12:24:41 -07001045 cfi_restore_state
1046 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001047 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001048L(shr_10):
1049 cmp $80, %ecx
1050 lea -48(%ecx), %ecx
1051 mov %edx, %eax
1052 jae L(shr_10_gobble)
1053
1054 movdqa 16(%esi), %xmm1
1055 movdqa %xmm1, %xmm2
1056 palignr $10, (%esi), %xmm1
1057 pcmpeqb (%edi), %xmm1
1058
1059 movdqa 32(%esi), %xmm3
1060 palignr $10,%xmm2, %xmm3
1061 pcmpeqb 16(%edi), %xmm3
1062
1063 pand %xmm1, %xmm3
1064 pmovmskb %xmm3, %edx
1065 lea 32(%edi), %edi
1066 lea 32(%esi), %esi
1067 sub $0xffff, %edx
1068 jnz L(exit)
1069 lea (%ecx, %edi,1), %eax
1070 lea 10(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001071 POP (%edi)
1072 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001073 jmp L(less48bytes)
1074
Bruce Beare124a5422010-10-11 12:24:41 -07001075 cfi_restore_state
1076 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001077 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001078L(shr_10_gobble):
1079 sub $32, %ecx
1080 movdqa 16(%esi), %xmm0
1081 palignr $10, (%esi), %xmm0
1082 pcmpeqb (%edi), %xmm0
1083
1084 movdqa 32(%esi), %xmm3
1085 palignr $10, 16(%esi), %xmm3
1086 pcmpeqb 16(%edi), %xmm3
1087
1088L(shr_10_gobble_loop):
1089 pand %xmm0, %xmm3
1090 sub $32, %ecx
1091 pmovmskb %xmm3, %edx
1092 movdqa %xmm0, %xmm1
1093
1094 movdqa 64(%esi), %xmm3
1095 palignr $10,48(%esi), %xmm3
1096 sbb $0xffff, %edx
1097 movdqa 48(%esi), %xmm0
1098 palignr $10,32(%esi), %xmm0
1099 pcmpeqb 32(%edi), %xmm0
1100 lea 32(%esi), %esi
1101 pcmpeqb 48(%edi), %xmm3
1102
1103 lea 32(%edi), %edi
1104 jz L(shr_10_gobble_loop)
1105 pand %xmm0, %xmm3
1106
1107 cmp $0, %ecx
1108 jge L(shr_10_gobble_next)
1109 inc %edx
1110 add $32, %ecx
1111L(shr_10_gobble_next):
1112 test %edx, %edx
1113 jnz L(exit)
1114
1115 pmovmskb %xmm3, %edx
1116 movdqa %xmm0, %xmm1
1117 lea 32(%edi), %edi
1118 lea 32(%esi), %esi
1119 sub $0xffff, %edx
1120 jnz L(exit)
1121
1122 lea (%ecx, %edi,1), %eax
1123 lea 10(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001124 POP (%edi)
1125 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001126 jmp L(less48bytes)
1127
Bruce Beare124a5422010-10-11 12:24:41 -07001128 cfi_restore_state
1129 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001130 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001131L(shr_11):
1132 cmp $80, %ecx
1133 lea -48(%ecx), %ecx
1134 mov %edx, %eax
1135 jae L(shr_11_gobble)
1136
1137 movdqa 16(%esi), %xmm1
1138 movdqa %xmm1, %xmm2
1139 palignr $11, (%esi), %xmm1
1140 pcmpeqb (%edi), %xmm1
1141
1142 movdqa 32(%esi), %xmm3
1143 palignr $11, %xmm2, %xmm3
1144 pcmpeqb 16(%edi), %xmm3
1145
1146 pand %xmm1, %xmm3
1147 pmovmskb %xmm3, %edx
1148 lea 32(%edi), %edi
1149 lea 32(%esi), %esi
1150 sub $0xffff, %edx
1151 jnz L(exit)
1152 lea (%ecx, %edi,1), %eax
1153 lea 11(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001154 POP (%edi)
1155 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001156 jmp L(less48bytes)
1157
Bruce Beare124a5422010-10-11 12:24:41 -07001158 cfi_restore_state
1159 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001160 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001161L(shr_11_gobble):
1162 sub $32, %ecx
1163 movdqa 16(%esi), %xmm0
1164 palignr $11, (%esi), %xmm0
1165 pcmpeqb (%edi), %xmm0
1166
1167 movdqa 32(%esi), %xmm3
1168 palignr $11, 16(%esi), %xmm3
1169 pcmpeqb 16(%edi), %xmm3
1170
1171L(shr_11_gobble_loop):
1172 pand %xmm0, %xmm3
1173 sub $32, %ecx
1174 pmovmskb %xmm3, %edx
1175 movdqa %xmm0, %xmm1
1176
1177 movdqa 64(%esi), %xmm3
1178 palignr $11,48(%esi), %xmm3
1179 sbb $0xffff, %edx
1180 movdqa 48(%esi), %xmm0
1181 palignr $11,32(%esi), %xmm0
1182 pcmpeqb 32(%edi), %xmm0
1183 lea 32(%esi), %esi
1184 pcmpeqb 48(%edi), %xmm3
1185
1186 lea 32(%edi), %edi
1187 jz L(shr_11_gobble_loop)
1188 pand %xmm0, %xmm3
1189
1190 cmp $0, %ecx
1191 jge L(shr_11_gobble_next)
1192 inc %edx
1193 add $32, %ecx
1194L(shr_11_gobble_next):
1195 test %edx, %edx
1196 jnz L(exit)
1197
1198 pmovmskb %xmm3, %edx
1199 movdqa %xmm0, %xmm1
1200 lea 32(%edi), %edi
1201 lea 32(%esi), %esi
1202 sub $0xffff, %edx
1203 jnz L(exit)
1204
1205 lea (%ecx, %edi,1), %eax
1206 lea 11(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001207 POP (%edi)
1208 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001209 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001210#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001211
Bruce Beare124a5422010-10-11 12:24:41 -07001212 cfi_restore_state
1213 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001214 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001215L(shr_12):
1216 cmp $80, %ecx
1217 lea -48(%ecx), %ecx
1218 mov %edx, %eax
1219 jae L(shr_12_gobble)
1220
1221 movdqa 16(%esi), %xmm1
1222 movdqa %xmm1, %xmm2
1223 palignr $12, (%esi), %xmm1
1224 pcmpeqb (%edi), %xmm1
1225
1226 movdqa 32(%esi), %xmm3
1227 palignr $12, %xmm2, %xmm3
1228 pcmpeqb 16(%edi), %xmm3
1229
1230 pand %xmm1, %xmm3
1231 pmovmskb %xmm3, %edx
1232 lea 32(%edi), %edi
1233 lea 32(%esi), %esi
1234 sub $0xffff, %edx
1235 jnz L(exit)
1236 lea (%ecx, %edi,1), %eax
1237 lea 12(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001238 POP (%edi)
1239 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001240 jmp L(less48bytes)
1241
Bruce Beare124a5422010-10-11 12:24:41 -07001242 cfi_restore_state
1243 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001244 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001245L(shr_12_gobble):
1246 sub $32, %ecx
1247 movdqa 16(%esi), %xmm0
1248 palignr $12, (%esi), %xmm0
1249 pcmpeqb (%edi), %xmm0
1250
1251 movdqa 32(%esi), %xmm3
1252 palignr $12, 16(%esi), %xmm3
1253 pcmpeqb 16(%edi), %xmm3
1254
1255L(shr_12_gobble_loop):
1256 pand %xmm0, %xmm3
1257 sub $32, %ecx
1258 pmovmskb %xmm3, %edx
1259 movdqa %xmm0, %xmm1
1260
1261 movdqa 64(%esi), %xmm3
1262 palignr $12,48(%esi), %xmm3
1263 sbb $0xffff, %edx
1264 movdqa 48(%esi), %xmm0
1265 palignr $12,32(%esi), %xmm0
1266 pcmpeqb 32(%edi), %xmm0
1267 lea 32(%esi), %esi
1268 pcmpeqb 48(%edi), %xmm3
1269
1270 lea 32(%edi), %edi
1271 jz L(shr_12_gobble_loop)
1272 pand %xmm0, %xmm3
1273
1274 cmp $0, %ecx
1275 jge L(shr_12_gobble_next)
1276 inc %edx
1277 add $32, %ecx
1278L(shr_12_gobble_next):
1279 test %edx, %edx
1280 jnz L(exit)
1281
1282 pmovmskb %xmm3, %edx
1283 movdqa %xmm0, %xmm1
1284 lea 32(%edi), %edi
1285 lea 32(%esi), %esi
1286 sub $0xffff, %edx
1287 jnz L(exit)
1288
1289 lea (%ecx, %edi,1), %eax
1290 lea 12(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001291 POP (%edi)
1292 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001293 jmp L(less48bytes)
1294
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001295#ifndef USE_AS_WMEMCMP
Bruce Beare124a5422010-10-11 12:24:41 -07001296 cfi_restore_state
1297 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001298 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001299L(shr_13):
1300 cmp $80, %ecx
1301 lea -48(%ecx), %ecx
1302 mov %edx, %eax
1303 jae L(shr_13_gobble)
1304
1305 movdqa 16(%esi), %xmm1
1306 movdqa %xmm1, %xmm2
1307 palignr $13, (%esi), %xmm1
1308 pcmpeqb (%edi), %xmm1
1309
1310 movdqa 32(%esi), %xmm3
1311 palignr $13, %xmm2, %xmm3
1312 pcmpeqb 16(%edi), %xmm3
1313
1314 pand %xmm1, %xmm3
1315 pmovmskb %xmm3, %edx
1316 lea 32(%edi), %edi
1317 lea 32(%esi), %esi
1318 sub $0xffff, %edx
1319 jnz L(exit)
1320 lea (%ecx, %edi,1), %eax
1321 lea 13(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001322 POP (%edi)
1323 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001324 jmp L(less48bytes)
1325
Bruce Beare124a5422010-10-11 12:24:41 -07001326 cfi_restore_state
1327 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001328 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001329L(shr_13_gobble):
1330 sub $32, %ecx
1331 movdqa 16(%esi), %xmm0
1332 palignr $13, (%esi), %xmm0
1333 pcmpeqb (%edi), %xmm0
1334
1335 movdqa 32(%esi), %xmm3
1336 palignr $13, 16(%esi), %xmm3
1337 pcmpeqb 16(%edi), %xmm3
1338
1339L(shr_13_gobble_loop):
1340 pand %xmm0, %xmm3
1341 sub $32, %ecx
1342 pmovmskb %xmm3, %edx
1343 movdqa %xmm0, %xmm1
1344
1345 movdqa 64(%esi), %xmm3
1346 palignr $13,48(%esi), %xmm3
1347 sbb $0xffff, %edx
1348 movdqa 48(%esi), %xmm0
1349 palignr $13,32(%esi), %xmm0
1350 pcmpeqb 32(%edi), %xmm0
1351 lea 32(%esi), %esi
1352 pcmpeqb 48(%edi), %xmm3
1353
1354 lea 32(%edi), %edi
1355 jz L(shr_13_gobble_loop)
1356 pand %xmm0, %xmm3
1357
1358 cmp $0, %ecx
1359 jge L(shr_13_gobble_next)
1360 inc %edx
1361 add $32, %ecx
1362L(shr_13_gobble_next):
1363 test %edx, %edx
1364 jnz L(exit)
1365
1366 pmovmskb %xmm3, %edx
1367 movdqa %xmm0, %xmm1
1368 lea 32(%edi), %edi
1369 lea 32(%esi), %esi
1370 sub $0xffff, %edx
1371 jnz L(exit)
1372
1373 lea (%ecx, %edi,1), %eax
1374 lea 13(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001375 POP (%edi)
1376 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001377 jmp L(less48bytes)
1378
Bruce Beare124a5422010-10-11 12:24:41 -07001379 cfi_restore_state
1380 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001381 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001382L(shr_14):
1383 cmp $80, %ecx
1384 lea -48(%ecx), %ecx
1385 mov %edx, %eax
1386 jae L(shr_14_gobble)
1387
1388 movdqa 16(%esi), %xmm1
1389 movdqa %xmm1, %xmm2
1390 palignr $14, (%esi), %xmm1
1391 pcmpeqb (%edi), %xmm1
1392
1393 movdqa 32(%esi), %xmm3
1394 palignr $14, %xmm2, %xmm3
1395 pcmpeqb 16(%edi), %xmm3
1396
1397 pand %xmm1, %xmm3
1398 pmovmskb %xmm3, %edx
1399 lea 32(%edi), %edi
1400 lea 32(%esi), %esi
1401 sub $0xffff, %edx
1402 jnz L(exit)
1403 lea (%ecx, %edi,1), %eax
1404 lea 14(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001405 POP (%edi)
1406 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001407 jmp L(less48bytes)
1408
Bruce Beare124a5422010-10-11 12:24:41 -07001409 cfi_restore_state
1410 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001411 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001412L(shr_14_gobble):
1413 sub $32, %ecx
1414 movdqa 16(%esi), %xmm0
1415 palignr $14, (%esi), %xmm0
1416 pcmpeqb (%edi), %xmm0
1417
1418 movdqa 32(%esi), %xmm3
1419 palignr $14, 16(%esi), %xmm3
1420 pcmpeqb 16(%edi), %xmm3
1421
1422L(shr_14_gobble_loop):
1423 pand %xmm0, %xmm3
1424 sub $32, %ecx
1425 pmovmskb %xmm3, %edx
1426 movdqa %xmm0, %xmm1
1427
1428 movdqa 64(%esi), %xmm3
1429 palignr $14,48(%esi), %xmm3
1430 sbb $0xffff, %edx
1431 movdqa 48(%esi), %xmm0
1432 palignr $14,32(%esi), %xmm0
1433 pcmpeqb 32(%edi), %xmm0
1434 lea 32(%esi), %esi
1435 pcmpeqb 48(%edi), %xmm3
1436
1437 lea 32(%edi), %edi
1438 jz L(shr_14_gobble_loop)
1439 pand %xmm0, %xmm3
1440
1441 cmp $0, %ecx
1442 jge L(shr_14_gobble_next)
1443 inc %edx
1444 add $32, %ecx
1445L(shr_14_gobble_next):
1446 test %edx, %edx
1447 jnz L(exit)
1448
1449 pmovmskb %xmm3, %edx
1450 movdqa %xmm0, %xmm1
1451 lea 32(%edi), %edi
1452 lea 32(%esi), %esi
1453 sub $0xffff, %edx
1454 jnz L(exit)
1455
1456 lea (%ecx, %edi,1), %eax
1457 lea 14(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001458 POP (%edi)
1459 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001460 jmp L(less48bytes)
1461
Bruce Beare124a5422010-10-11 12:24:41 -07001462 cfi_restore_state
1463 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001464 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001465L(shr_15):
1466 cmp $80, %ecx
1467 lea -48(%ecx), %ecx
1468 mov %edx, %eax
1469 jae L(shr_15_gobble)
1470
1471 movdqa 16(%esi), %xmm1
1472 movdqa %xmm1, %xmm2
1473 palignr $15, (%esi), %xmm1
1474 pcmpeqb (%edi), %xmm1
1475
1476 movdqa 32(%esi), %xmm3
1477 palignr $15, %xmm2, %xmm3
1478 pcmpeqb 16(%edi), %xmm3
1479
1480 pand %xmm1, %xmm3
1481 pmovmskb %xmm3, %edx
1482 lea 32(%edi), %edi
1483 lea 32(%esi), %esi
1484 sub $0xffff, %edx
1485 jnz L(exit)
1486 lea (%ecx, %edi,1), %eax
1487 lea 15(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001488 POP (%edi)
1489 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001490 jmp L(less48bytes)
1491
Bruce Beare124a5422010-10-11 12:24:41 -07001492 cfi_restore_state
1493 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001494 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001495L(shr_15_gobble):
1496 sub $32, %ecx
1497 movdqa 16(%esi), %xmm0
1498 palignr $15, (%esi), %xmm0
1499 pcmpeqb (%edi), %xmm0
1500
1501 movdqa 32(%esi), %xmm3
1502 palignr $15, 16(%esi), %xmm3
1503 pcmpeqb 16(%edi), %xmm3
1504
1505L(shr_15_gobble_loop):
1506 pand %xmm0, %xmm3
1507 sub $32, %ecx
1508 pmovmskb %xmm3, %edx
1509 movdqa %xmm0, %xmm1
1510
1511 movdqa 64(%esi), %xmm3
1512 palignr $15,48(%esi), %xmm3
1513 sbb $0xffff, %edx
1514 movdqa 48(%esi), %xmm0
1515 palignr $15,32(%esi), %xmm0
1516 pcmpeqb 32(%edi), %xmm0
1517 lea 32(%esi), %esi
1518 pcmpeqb 48(%edi), %xmm3
1519
1520 lea 32(%edi), %edi
1521 jz L(shr_15_gobble_loop)
1522 pand %xmm0, %xmm3
1523
1524 cmp $0, %ecx
1525 jge L(shr_15_gobble_next)
1526 inc %edx
1527 add $32, %ecx
1528L(shr_15_gobble_next):
1529 test %edx, %edx
1530 jnz L(exit)
1531
1532 pmovmskb %xmm3, %edx
1533 movdqa %xmm0, %xmm1
1534 lea 32(%edi), %edi
1535 lea 32(%esi), %esi
1536 sub $0xffff, %edx
1537 jnz L(exit)
1538
1539 lea (%ecx, %edi,1), %eax
1540 lea 15(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001541 POP (%edi)
1542 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001543 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001544#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001545
Bruce Beare124a5422010-10-11 12:24:41 -07001546 cfi_restore_state
1547 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001548 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001549L(exit):
1550 pmovmskb %xmm1, %ebx
1551 sub $0xffff, %ebx
1552 jz L(first16bytes)
1553 lea -16(%esi), %esi
1554 lea -16(%edi), %edi
1555 mov %ebx, %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001556
Bruce Beare8ff1a272010-03-04 11:03:37 -08001557L(first16bytes):
1558 add %eax, %esi
1559L(less16bytes):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001560
1561#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08001562 test %dl, %dl
1563 jz L(next_24_bytes)
1564
1565 test $0x01, %dl
1566 jnz L(Byte16)
1567
1568 test $0x02, %dl
1569 jnz L(Byte17)
1570
1571 test $0x04, %dl
1572 jnz L(Byte18)
1573
1574 test $0x08, %dl
1575 jnz L(Byte19)
1576
1577 test $0x10, %dl
1578 jnz L(Byte20)
1579
1580 test $0x20, %dl
1581 jnz L(Byte21)
1582
1583 test $0x40, %dl
1584 jnz L(Byte22)
1585L(Byte23):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001586 movzbl -9(%edi), %eax
1587 movzbl -9(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001588 sub %edx, %eax
1589 RETURN
1590
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001591 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001592L(Byte16):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001593 movzbl -16(%edi), %eax
1594 movzbl -16(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001595 sub %edx, %eax
1596 RETURN
1597
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001598 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001599L(Byte17):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001600 movzbl -15(%edi), %eax
1601 movzbl -15(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001602 sub %edx, %eax
1603 RETURN
1604
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001605 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001606L(Byte18):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001607 movzbl -14(%edi), %eax
1608 movzbl -14(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001609 sub %edx, %eax
1610 RETURN
1611
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001612 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001613L(Byte19):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001614 movzbl -13(%edi), %eax
1615 movzbl -13(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001616 sub %edx, %eax
1617 RETURN
1618
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001619 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001620L(Byte20):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001621 movzbl -12(%edi), %eax
1622 movzbl -12(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001623 sub %edx, %eax
1624 RETURN
1625
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001626 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001627L(Byte21):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001628 movzbl -11(%edi), %eax
1629 movzbl -11(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001630 sub %edx, %eax
1631 RETURN
1632
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001633 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001634L(Byte22):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001635 movzbl -10(%edi), %eax
1636 movzbl -10(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001637 sub %edx, %eax
1638 RETURN
1639
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001640 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001641L(next_24_bytes):
1642 lea 8(%edi), %edi
1643 lea 8(%esi), %esi
1644 test $0x01, %dh
1645 jnz L(Byte16)
1646
1647 test $0x02, %dh
1648 jnz L(Byte17)
1649
1650 test $0x04, %dh
1651 jnz L(Byte18)
1652
1653 test $0x08, %dh
1654 jnz L(Byte19)
1655
1656 test $0x10, %dh
1657 jnz L(Byte20)
1658
1659 test $0x20, %dh
1660 jnz L(Byte21)
1661
1662 test $0x40, %dh
1663 jnz L(Byte22)
1664
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001665 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001666L(Byte31):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001667 movzbl -9(%edi), %eax
1668 movzbl -9(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001669 sub %edx, %eax
1670 RETURN_END
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001671#else
1672
1673/* special for wmemcmp */
1674 test %dl, %dl
1675 jz L(next_two_double_words)
1676 and $15, %dl
1677 jz L(second_double_word)
1678 mov -16(%edi), %ecx
1679 cmp -16(%esi), %ecx
1680 mov $1, %eax
1681 jg L(nequal_bigger)
1682 neg %eax
1683 RETURN
1684
1685
1686 .p2align 4
1687L(second_double_word):
1688 mov -12(%edi), %ecx
1689 cmp -12(%esi), %ecx
1690 mov $1, %eax
1691 jg L(nequal_bigger)
1692 neg %eax
1693 RETURN
1694
1695 .p2align 4
1696L(next_two_double_words):
1697 and $15, %dh
1698 jz L(fourth_double_word)
1699 mov -8(%edi), %ecx
1700 cmp -8(%esi), %ecx
1701 mov $1, %eax
1702 jg L(nequal_bigger)
1703 neg %eax
1704 RETURN
1705
1706 .p2align 4
1707L(fourth_double_word):
1708 mov -4(%edi), %ecx
1709 cmp -4(%esi), %ecx
1710 mov $1, %eax
1711 jg L(nequal_bigger)
1712 neg %eax
1713 RETURN
1714
1715 .p2align 4
1716L(nequal_bigger):
1717 RETURN_END
1718#endif
1719
Bruce Beare8ff1a272010-03-04 11:03:37 -08001720 CFI_PUSH (%ebx)
1721
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001722 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001723L(more8bytes):
1724 cmp $16, %ecx
1725 jae L(more16bytes)
1726 cmp $8, %ecx
1727 je L(8bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001728#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08001729 cmp $9, %ecx
1730 je L(9bytes)
1731 cmp $10, %ecx
1732 je L(10bytes)
1733 cmp $11, %ecx
1734 je L(11bytes)
1735 cmp $12, %ecx
1736 je L(12bytes)
1737 cmp $13, %ecx
1738 je L(13bytes)
1739 cmp $14, %ecx
1740 je L(14bytes)
1741 jmp L(15bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001742#else
1743 jmp L(12bytes)
1744#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001745
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001746 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001747L(more16bytes):
1748 cmp $24, %ecx
1749 jae L(more24bytes)
1750 cmp $16, %ecx
1751 je L(16bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001752#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08001753 cmp $17, %ecx
1754 je L(17bytes)
1755 cmp $18, %ecx
1756 je L(18bytes)
1757 cmp $19, %ecx
1758 je L(19bytes)
1759 cmp $20, %ecx
1760 je L(20bytes)
1761 cmp $21, %ecx
1762 je L(21bytes)
1763 cmp $22, %ecx
1764 je L(22bytes)
1765 jmp L(23bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001766#else
1767 jmp L(20bytes)
1768#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001769
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001770 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001771L(more24bytes):
1772 cmp $32, %ecx
1773 jae L(more32bytes)
1774 cmp $24, %ecx
1775 je L(24bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001776#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08001777 cmp $25, %ecx
1778 je L(25bytes)
1779 cmp $26, %ecx
1780 je L(26bytes)
1781 cmp $27, %ecx
1782 je L(27bytes)
1783 cmp $28, %ecx
1784 je L(28bytes)
1785 cmp $29, %ecx
1786 je L(29bytes)
1787 cmp $30, %ecx
1788 je L(30bytes)
1789 jmp L(31bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001790#else
1791 jmp L(28bytes)
1792#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001793
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001794 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001795L(more32bytes):
1796 cmp $40, %ecx
1797 jae L(more40bytes)
1798 cmp $32, %ecx
1799 je L(32bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001800#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08001801 cmp $33, %ecx
1802 je L(33bytes)
1803 cmp $34, %ecx
1804 je L(34bytes)
1805 cmp $35, %ecx
1806 je L(35bytes)
1807 cmp $36, %ecx
1808 je L(36bytes)
1809 cmp $37, %ecx
1810 je L(37bytes)
1811 cmp $38, %ecx
1812 je L(38bytes)
1813 jmp L(39bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001814#else
1815 jmp L(36bytes)
1816#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001817
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001818 .p2align 4
1819L(less48bytes):
1820 cmp $8, %ecx
1821 jae L(more8bytes)
1822#ifndef USE_AS_WMEMCMP
1823 cmp $2, %ecx
1824 je L(2bytes)
1825 cmp $3, %ecx
1826 je L(3bytes)
1827 cmp $4, %ecx
1828 je L(4bytes)
1829 cmp $5, %ecx
1830 je L(5bytes)
1831 cmp $6, %ecx
1832 je L(6bytes)
1833 jmp L(7bytes)
1834#else
1835 jmp L(4bytes)
1836#endif
1837
1838 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001839L(more40bytes):
1840 cmp $40, %ecx
1841 je L(40bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001842#ifndef USE_AS_WMEMCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08001843 cmp $41, %ecx
1844 je L(41bytes)
1845 cmp $42, %ecx
1846 je L(42bytes)
1847 cmp $43, %ecx
1848 je L(43bytes)
1849 cmp $44, %ecx
1850 je L(44bytes)
1851 cmp $45, %ecx
1852 je L(45bytes)
1853 cmp $46, %ecx
1854 je L(46bytes)
1855 jmp L(47bytes)
1856
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001857 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001858L(44bytes):
1859 mov -44(%eax), %ecx
1860 mov -44(%edx), %ebx
1861 cmp %ebx, %ecx
1862 jne L(find_diff)
1863L(40bytes):
1864 mov -40(%eax), %ecx
1865 mov -40(%edx), %ebx
1866 cmp %ebx, %ecx
1867 jne L(find_diff)
1868L(36bytes):
1869 mov -36(%eax), %ecx
1870 mov -36(%edx), %ebx
1871 cmp %ebx, %ecx
1872 jne L(find_diff)
1873L(32bytes):
1874 mov -32(%eax), %ecx
1875 mov -32(%edx), %ebx
1876 cmp %ebx, %ecx
1877 jne L(find_diff)
1878L(28bytes):
1879 mov -28(%eax), %ecx
1880 mov -28(%edx), %ebx
1881 cmp %ebx, %ecx
1882 jne L(find_diff)
1883L(24bytes):
1884 mov -24(%eax), %ecx
1885 mov -24(%edx), %ebx
1886 cmp %ebx, %ecx
1887 jne L(find_diff)
1888L(20bytes):
1889 mov -20(%eax), %ecx
1890 mov -20(%edx), %ebx
1891 cmp %ebx, %ecx
1892 jne L(find_diff)
1893L(16bytes):
1894 mov -16(%eax), %ecx
1895 mov -16(%edx), %ebx
1896 cmp %ebx, %ecx
1897 jne L(find_diff)
1898L(12bytes):
1899 mov -12(%eax), %ecx
1900 mov -12(%edx), %ebx
1901 cmp %ebx, %ecx
1902 jne L(find_diff)
1903L(8bytes):
1904 mov -8(%eax), %ecx
1905 mov -8(%edx), %ebx
1906 cmp %ebx, %ecx
1907 jne L(find_diff)
1908L(4bytes):
1909 mov -4(%eax), %ecx
1910 mov -4(%edx), %ebx
1911 cmp %ebx, %ecx
1912 mov $0, %eax
1913 jne L(find_diff)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001914 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001915 ret
1916 CFI_PUSH (%ebx)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001917#else
1918 .p2align 4
1919L(44bytes):
1920 mov -44(%eax), %ecx
1921 cmp -44(%edx), %ecx
1922 jne L(find_diff)
1923L(40bytes):
1924 mov -40(%eax), %ecx
1925 cmp -40(%edx), %ecx
1926 jne L(find_diff)
1927L(36bytes):
1928 mov -36(%eax), %ecx
1929 cmp -36(%edx), %ecx
1930 jne L(find_diff)
1931L(32bytes):
1932 mov -32(%eax), %ecx
1933 cmp -32(%edx), %ecx
1934 jne L(find_diff)
1935L(28bytes):
1936 mov -28(%eax), %ecx
1937 cmp -28(%edx), %ecx
1938 jne L(find_diff)
1939L(24bytes):
1940 mov -24(%eax), %ecx
1941 cmp -24(%edx), %ecx
1942 jne L(find_diff)
1943L(20bytes):
1944 mov -20(%eax), %ecx
1945 cmp -20(%edx), %ecx
1946 jne L(find_diff)
1947L(16bytes):
1948 mov -16(%eax), %ecx
1949 cmp -16(%edx), %ecx
1950 jne L(find_diff)
1951L(12bytes):
1952 mov -12(%eax), %ecx
1953 cmp -12(%edx), %ecx
1954 jne L(find_diff)
1955L(8bytes):
1956 mov -8(%eax), %ecx
1957 cmp -8(%edx), %ecx
1958 jne L(find_diff)
1959L(4bytes):
1960 mov -4(%eax), %ecx
1961 xor %eax, %eax
1962 cmp -4(%edx), %ecx
1963 jne L(find_diff)
1964 POP (%ebx)
1965 ret
1966 CFI_PUSH (%ebx)
1967#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001968
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001969#ifndef USE_AS_WMEMCMP
1970
1971 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001972L(45bytes):
1973 mov -45(%eax), %ecx
1974 mov -45(%edx), %ebx
1975 cmp %ebx, %ecx
1976 jne L(find_diff)
1977L(41bytes):
1978 mov -41(%eax), %ecx
1979 mov -41(%edx), %ebx
1980 cmp %ebx, %ecx
1981 jne L(find_diff)
1982L(37bytes):
1983 mov -37(%eax), %ecx
1984 mov -37(%edx), %ebx
1985 cmp %ebx, %ecx
1986 jne L(find_diff)
1987L(33bytes):
1988 mov -33(%eax), %ecx
1989 mov -33(%edx), %ebx
1990 cmp %ebx, %ecx
1991 jne L(find_diff)
1992L(29bytes):
1993 mov -29(%eax), %ecx
1994 mov -29(%edx), %ebx
1995 cmp %ebx, %ecx
1996 jne L(find_diff)
1997L(25bytes):
1998 mov -25(%eax), %ecx
1999 mov -25(%edx), %ebx
2000 cmp %ebx, %ecx
2001 jne L(find_diff)
2002L(21bytes):
2003 mov -21(%eax), %ecx
2004 mov -21(%edx), %ebx
2005 cmp %ebx, %ecx
2006 jne L(find_diff)
2007L(17bytes):
2008 mov -17(%eax), %ecx
2009 mov -17(%edx), %ebx
2010 cmp %ebx, %ecx
2011 jne L(find_diff)
2012L(13bytes):
2013 mov -13(%eax), %ecx
2014 mov -13(%edx), %ebx
2015 cmp %ebx, %ecx
2016 jne L(find_diff)
2017L(9bytes):
2018 mov -9(%eax), %ecx
2019 mov -9(%edx), %ebx
2020 cmp %ebx, %ecx
2021 jne L(find_diff)
2022L(5bytes):
2023 mov -5(%eax), %ecx
2024 mov -5(%edx), %ebx
2025 cmp %ebx, %ecx
2026 jne L(find_diff)
2027 movzbl -1(%eax), %ecx
2028 cmp -1(%edx), %cl
2029 mov $0, %eax
2030 jne L(end)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002031 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002032 ret
2033 CFI_PUSH (%ebx)
2034
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002035 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002036L(46bytes):
2037 mov -46(%eax), %ecx
2038 mov -46(%edx), %ebx
2039 cmp %ebx, %ecx
2040 jne L(find_diff)
2041L(42bytes):
2042 mov -42(%eax), %ecx
2043 mov -42(%edx), %ebx
2044 cmp %ebx, %ecx
2045 jne L(find_diff)
2046L(38bytes):
2047 mov -38(%eax), %ecx
2048 mov -38(%edx), %ebx
2049 cmp %ebx, %ecx
2050 jne L(find_diff)
2051L(34bytes):
2052 mov -34(%eax), %ecx
2053 mov -34(%edx), %ebx
2054 cmp %ebx, %ecx
2055 jne L(find_diff)
2056L(30bytes):
2057 mov -30(%eax), %ecx
2058 mov -30(%edx), %ebx
2059 cmp %ebx, %ecx
2060 jne L(find_diff)
2061L(26bytes):
2062 mov -26(%eax), %ecx
2063 mov -26(%edx), %ebx
2064 cmp %ebx, %ecx
2065 jne L(find_diff)
2066L(22bytes):
2067 mov -22(%eax), %ecx
2068 mov -22(%edx), %ebx
2069 cmp %ebx, %ecx
2070 jne L(find_diff)
2071L(18bytes):
2072 mov -18(%eax), %ecx
2073 mov -18(%edx), %ebx
2074 cmp %ebx, %ecx
2075 jne L(find_diff)
2076L(14bytes):
2077 mov -14(%eax), %ecx
2078 mov -14(%edx), %ebx
2079 cmp %ebx, %ecx
2080 jne L(find_diff)
2081L(10bytes):
2082 mov -10(%eax), %ecx
2083 mov -10(%edx), %ebx
2084 cmp %ebx, %ecx
2085 jne L(find_diff)
2086L(6bytes):
2087 mov -6(%eax), %ecx
2088 mov -6(%edx), %ebx
2089 cmp %ebx, %ecx
2090 jne L(find_diff)
2091L(2bytes):
2092 movzwl -2(%eax), %ecx
2093 movzwl -2(%edx), %ebx
2094 cmp %bl, %cl
2095 jne L(end)
2096 cmp %bh, %ch
2097 mov $0, %eax
2098 jne L(end)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002099 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002100 ret
2101 CFI_PUSH (%ebx)
2102
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002103 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002104L(47bytes):
2105 movl -47(%eax), %ecx
2106 movl -47(%edx), %ebx
2107 cmp %ebx, %ecx
2108 jne L(find_diff)
2109L(43bytes):
2110 movl -43(%eax), %ecx
2111 movl -43(%edx), %ebx
2112 cmp %ebx, %ecx
2113 jne L(find_diff)
2114L(39bytes):
2115 movl -39(%eax), %ecx
2116 movl -39(%edx), %ebx
2117 cmp %ebx, %ecx
2118 jne L(find_diff)
2119L(35bytes):
2120 movl -35(%eax), %ecx
2121 movl -35(%edx), %ebx
2122 cmp %ebx, %ecx
2123 jne L(find_diff)
2124L(31bytes):
2125 movl -31(%eax), %ecx
2126 movl -31(%edx), %ebx
2127 cmp %ebx, %ecx
2128 jne L(find_diff)
2129L(27bytes):
2130 movl -27(%eax), %ecx
2131 movl -27(%edx), %ebx
2132 cmp %ebx, %ecx
2133 jne L(find_diff)
2134L(23bytes):
2135 movl -23(%eax), %ecx
2136 movl -23(%edx), %ebx
2137 cmp %ebx, %ecx
2138 jne L(find_diff)
2139L(19bytes):
2140 movl -19(%eax), %ecx
2141 movl -19(%edx), %ebx
2142 cmp %ebx, %ecx
2143 jne L(find_diff)
2144L(15bytes):
2145 movl -15(%eax), %ecx
2146 movl -15(%edx), %ebx
2147 cmp %ebx, %ecx
2148 jne L(find_diff)
2149L(11bytes):
2150 movl -11(%eax), %ecx
2151 movl -11(%edx), %ebx
2152 cmp %ebx, %ecx
2153 jne L(find_diff)
2154L(7bytes):
2155 movl -7(%eax), %ecx
2156 movl -7(%edx), %ebx
2157 cmp %ebx, %ecx
2158 jne L(find_diff)
2159L(3bytes):
2160 movzwl -3(%eax), %ecx
2161 movzwl -3(%edx), %ebx
2162 cmpb %bl, %cl
2163 jne L(end)
2164 cmp %bx, %cx
2165 jne L(end)
2166 movzbl -1(%eax), %eax
2167 cmpb -1(%edx), %al
2168 mov $0, %eax
2169 jne L(end)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002170 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002171 ret
2172 CFI_PUSH (%ebx)
2173
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002174 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002175L(find_diff):
2176 cmpb %bl, %cl
2177 jne L(end)
2178 cmp %bx, %cx
2179 jne L(end)
2180 shr $16,%ecx
2181 shr $16,%ebx
2182 cmp %bl, %cl
2183 jne L(end)
2184 cmp %bx, %cx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002185
2186 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002187L(end):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002188 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002189 mov $1, %eax
2190 ja L(bigger)
2191 neg %eax
2192L(bigger):
2193 ret
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002194#else
Bruce Beare8ff1a272010-03-04 11:03:37 -08002195
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002196/* for wmemcmp */
2197 .p2align 4
2198L(find_diff):
2199 POP (%ebx)
2200 mov $1, %eax
2201 jg L(find_diff_bigger)
2202 neg %eax
2203 ret
2204
2205 .p2align 4
2206L(find_diff_bigger):
2207 ret
2208
2209#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08002210END (MEMCMP)