blob: 03870841966cea893121db424b22779194ae9a83 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Bruce Beare8ff1a272010-03-04 11:03:37 -080031#ifndef L
32# define L(label) .L##label
33#endif
34
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef cfi_startproc
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040036# define cfi_startproc .cfi_startproc
Bruce Beare8ff1a272010-03-04 11:03:37 -080037#endif
38
39#ifndef cfi_endproc
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040040# define cfi_endproc .cfi_endproc
Bruce Beare8ff1a272010-03-04 11:03:37 -080041#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare124a5422010-10-11 12:24:41 -070055#ifndef cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040056# define cfi_remember_state .cfi_remember_state
Bruce Beare124a5422010-10-11 12:24:41 -070057#endif
58
59#ifndef cfi_restore_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040060# define cfi_restore_state .cfi_restore_state
Bruce Beare124a5422010-10-11 12:24:41 -070061#endif
62
Bruce Beare8ff1a272010-03-04 11:03:37 -080063#ifndef ENTRY
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040064# define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68name: \
Bruce Beare8ff1a272010-03-04 11:03:37 -080069 cfi_startproc
70#endif
71
72#ifndef END
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040073# define END(name) \
74 cfi_endproc; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080075 .size name, .-name
76#endif
77
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040078#ifndef MEMCMP
79# define MEMCMP memcmp
80#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -080081
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040082#define CFI_PUSH(REG) \
83 cfi_adjust_cfa_offset (4); \
84 cfi_rel_offset (REG, 0)
85
86#define CFI_POP(REG) \
87 cfi_adjust_cfa_offset (-4); \
88 cfi_restore (REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080089
90#define PUSH(REG) pushl REG; CFI_PUSH (REG)
91#define POP(REG) popl REG; CFI_POP (REG)
92
93#define PARMS 4
94#define BLK1 PARMS
95#define BLK2 BLK1+4
96#define LEN BLK2+4
97#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
Bruce Beare124a5422010-10-11 12:24:41 -070098#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -080099
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400100/* Warning!
101 wmemcmp has to use SIGNED comparison for elements.
102 memcmp has to use UNSIGNED comparison for elemnts.
103*/
104
105 .text
Bruce Beare8ff1a272010-03-04 11:03:37 -0800106ENTRY (MEMCMP)
107 movl LEN(%esp), %ecx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400108
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400109#ifdef USE_WCHAR
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400110 shl $2, %ecx
111 jz L(zero)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400112#elif defined USE_UTF16
113 shl $1, %ecx
114 jz L(zero)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400115#endif
116
Bruce Beare8ff1a272010-03-04 11:03:37 -0800117 movl BLK1(%esp), %eax
118 cmp $48, %ecx
119 movl BLK2(%esp), %edx
120 jae L(48bytesormore)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400121
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400122#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800123 cmp $1, %ecx
124 jbe L(less1bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400125#endif
126
127 PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800128 add %ecx, %edx
129 add %ecx, %eax
130 jmp L(less48bytes)
131
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400132 CFI_POP (%ebx)
133
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400134#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400135 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800136L(less1bytes):
137 jb L(zero)
138 movb (%eax), %cl
139 cmp (%edx), %cl
140 je L(zero)
141 mov $1, %eax
142 ja L(1bytesend)
143 neg %eax
144L(1bytesend):
145 ret
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400146#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800147
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400148 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800149L(zero):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400150 xor %eax, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800151 ret
152
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400153 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800154L(48bytesormore):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400155 PUSH (%ebx)
156 PUSH (%esi)
157 PUSH (%edi)
Bruce Beare124a5422010-10-11 12:24:41 -0700158 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400159 movdqu (%eax), %xmm3
160 movdqu (%edx), %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800161 movl %eax, %edi
162 movl %edx, %esi
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400163 pcmpeqb %xmm0, %xmm3
164 pmovmskb %xmm3, %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800165 lea 16(%edi), %edi
166
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400167 sub $0xffff, %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800168 lea 16(%esi), %esi
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400169 jnz L(less16bytes)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800170 mov %edi, %edx
171 and $0xf, %edx
172 xor %edx, %edi
173 sub %edx, %esi
174 add %edx, %ecx
175 mov %esi, %edx
176 and $0xf, %edx
177 jz L(shr_0)
178 xor %edx, %esi
179
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400180#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800181 cmp $8, %edx
182 jae L(next_unaligned_table)
183 cmp $0, %edx
184 je L(shr_0)
185 cmp $1, %edx
186 je L(shr_1)
187 cmp $2, %edx
188 je L(shr_2)
189 cmp $3, %edx
190 je L(shr_3)
191 cmp $4, %edx
192 je L(shr_4)
193 cmp $5, %edx
194 je L(shr_5)
195 cmp $6, %edx
196 je L(shr_6)
197 jmp L(shr_7)
198
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400199 .p2align 2
Bruce Beare8ff1a272010-03-04 11:03:37 -0800200L(next_unaligned_table):
201 cmp $8, %edx
202 je L(shr_8)
203 cmp $9, %edx
204 je L(shr_9)
205 cmp $10, %edx
206 je L(shr_10)
207 cmp $11, %edx
208 je L(shr_11)
209 cmp $12, %edx
210 je L(shr_12)
211 cmp $13, %edx
212 je L(shr_13)
213 cmp $14, %edx
214 je L(shr_14)
215 jmp L(shr_15)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400216#elif defined(USE_WCHAR)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400217 cmp $0, %edx
218 je L(shr_0)
219 cmp $4, %edx
220 je L(shr_4)
221 cmp $8, %edx
222 je L(shr_8)
223 jmp L(shr_12)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400224#elif defined(USE_UTF16)
225 cmp $0, %edx
226 je L(shr_0)
227 cmp $2, %edx
228 je L(shr_2)
229 cmp $4, %edx
230 je L(shr_4)
231 cmp $6, %edx
232 je L(shr_6)
233 cmp $8, %edx
234 je L(shr_8)
235 cmp $10, %edx
236 je L(shr_10)
237 cmp $12, %edx
238 je L(shr_12)
239 jmp L(shr_14)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400240#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800241
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400242 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800243L(shr_0):
244 cmp $80, %ecx
245 jae L(shr_0_gobble)
246 lea -48(%ecx), %ecx
247 xor %eax, %eax
248 movaps (%esi), %xmm1
249 pcmpeqb (%edi), %xmm1
250 movaps 16(%esi), %xmm2
251 pcmpeqb 16(%edi), %xmm2
252 pand %xmm1, %xmm2
253 pmovmskb %xmm2, %edx
254 add $32, %edi
255 add $32, %esi
256 sub $0xffff, %edx
257 jnz L(exit)
258
259 lea (%ecx, %edi,1), %eax
260 lea (%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400261 POP (%edi)
262 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800263 jmp L(less48bytes)
264
Bruce Beare124a5422010-10-11 12:24:41 -0700265 cfi_restore_state
266 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400267 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800268L(shr_0_gobble):
269 lea -48(%ecx), %ecx
270 movdqa (%esi), %xmm0
271 xor %eax, %eax
272 pcmpeqb (%edi), %xmm0
273 sub $32, %ecx
274 movdqa 16(%esi), %xmm2
275 pcmpeqb 16(%edi), %xmm2
276L(shr_0_gobble_loop):
277 pand %xmm0, %xmm2
278 sub $32, %ecx
279 pmovmskb %xmm2, %edx
280 movdqa %xmm0, %xmm1
281 movdqa 32(%esi), %xmm0
282 movdqa 48(%esi), %xmm2
283 sbb $0xffff, %edx
284 pcmpeqb 32(%edi), %xmm0
285 pcmpeqb 48(%edi), %xmm2
286 lea 32(%edi), %edi
287 lea 32(%esi), %esi
288 jz L(shr_0_gobble_loop)
289
290 pand %xmm0, %xmm2
291 cmp $0, %ecx
292 jge L(shr_0_gobble_loop_next)
293 inc %edx
294 add $32, %ecx
295L(shr_0_gobble_loop_next):
296 test %edx, %edx
297 jnz L(exit)
298
299 pmovmskb %xmm2, %edx
300 movdqa %xmm0, %xmm1
301 lea 32(%edi), %edi
302 lea 32(%esi), %esi
303 sub $0xffff, %edx
304 jnz L(exit)
305 lea (%ecx, %edi,1), %eax
306 lea (%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400307 POP (%edi)
308 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800309 jmp L(less48bytes)
310
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400311#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -0700312 cfi_restore_state
313 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400314 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800315L(shr_1):
316 cmp $80, %ecx
317 lea -48(%ecx), %ecx
318 mov %edx, %eax
319 jae L(shr_1_gobble)
320
321 movdqa 16(%esi), %xmm1
322 movdqa %xmm1, %xmm2
323 palignr $1,(%esi), %xmm1
324 pcmpeqb (%edi), %xmm1
325
326 movdqa 32(%esi), %xmm3
327 palignr $1,%xmm2, %xmm3
328 pcmpeqb 16(%edi), %xmm3
329
330 pand %xmm1, %xmm3
331 pmovmskb %xmm3, %edx
332 lea 32(%edi), %edi
333 lea 32(%esi), %esi
334 sub $0xffff, %edx
335 jnz L(exit)
336 lea (%ecx, %edi,1), %eax
337 lea 1(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400338 POP (%edi)
339 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800340 jmp L(less48bytes)
341
Bruce Beare124a5422010-10-11 12:24:41 -0700342 cfi_restore_state
343 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400344 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800345L(shr_1_gobble):
346 sub $32, %ecx
347 movdqa 16(%esi), %xmm0
348 palignr $1,(%esi), %xmm0
349 pcmpeqb (%edi), %xmm0
350
351 movdqa 32(%esi), %xmm3
352 palignr $1,16(%esi), %xmm3
353 pcmpeqb 16(%edi), %xmm3
354
355L(shr_1_gobble_loop):
356 pand %xmm0, %xmm3
357 sub $32, %ecx
358 pmovmskb %xmm3, %edx
359 movdqa %xmm0, %xmm1
360
361 movdqa 64(%esi), %xmm3
362 palignr $1,48(%esi), %xmm3
363 sbb $0xffff, %edx
364 movdqa 48(%esi), %xmm0
365 palignr $1,32(%esi), %xmm0
366 pcmpeqb 32(%edi), %xmm0
367 lea 32(%esi), %esi
368 pcmpeqb 48(%edi), %xmm3
369
370 lea 32(%edi), %edi
371 jz L(shr_1_gobble_loop)
372 pand %xmm0, %xmm3
373
374 cmp $0, %ecx
375 jge L(shr_1_gobble_next)
376 inc %edx
377 add $32, %ecx
378L(shr_1_gobble_next):
379 test %edx, %edx
380 jnz L(exit)
381
382 pmovmskb %xmm3, %edx
383 movdqa %xmm0, %xmm1
384 lea 32(%edi), %edi
385 lea 32(%esi), %esi
386 sub $0xffff, %edx
387 jnz L(exit)
388
389 lea (%ecx, %edi,1), %eax
390 lea 1(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400391 POP (%edi)
392 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800393 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400394#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800395
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400396
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400397#if !defined(USE_WCHAR)
Bruce Beare124a5422010-10-11 12:24:41 -0700398 cfi_restore_state
399 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400400 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800401L(shr_2):
402 cmp $80, %ecx
403 lea -48(%ecx), %ecx
404 mov %edx, %eax
405 jae L(shr_2_gobble)
406
407 movdqa 16(%esi), %xmm1
408 movdqa %xmm1, %xmm2
409 palignr $2,(%esi), %xmm1
410 pcmpeqb (%edi), %xmm1
411
412 movdqa 32(%esi), %xmm3
413 palignr $2,%xmm2, %xmm3
414 pcmpeqb 16(%edi), %xmm3
415
416 pand %xmm1, %xmm3
417 pmovmskb %xmm3, %edx
418 lea 32(%edi), %edi
419 lea 32(%esi), %esi
420 sub $0xffff, %edx
421 jnz L(exit)
422 lea (%ecx, %edi,1), %eax
423 lea 2(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400424 POP (%edi)
425 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800426 jmp L(less48bytes)
427
Bruce Beare124a5422010-10-11 12:24:41 -0700428 cfi_restore_state
429 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400430 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800431L(shr_2_gobble):
432 sub $32, %ecx
433 movdqa 16(%esi), %xmm0
434 palignr $2,(%esi), %xmm0
435 pcmpeqb (%edi), %xmm0
436
437 movdqa 32(%esi), %xmm3
438 palignr $2,16(%esi), %xmm3
439 pcmpeqb 16(%edi), %xmm3
440
441L(shr_2_gobble_loop):
442 pand %xmm0, %xmm3
443 sub $32, %ecx
444 pmovmskb %xmm3, %edx
445 movdqa %xmm0, %xmm1
446
447 movdqa 64(%esi), %xmm3
448 palignr $2,48(%esi), %xmm3
449 sbb $0xffff, %edx
450 movdqa 48(%esi), %xmm0
451 palignr $2,32(%esi), %xmm0
452 pcmpeqb 32(%edi), %xmm0
453 lea 32(%esi), %esi
454 pcmpeqb 48(%edi), %xmm3
455
456 lea 32(%edi), %edi
457 jz L(shr_2_gobble_loop)
458 pand %xmm0, %xmm3
459
460 cmp $0, %ecx
461 jge L(shr_2_gobble_next)
462 inc %edx
463 add $32, %ecx
464L(shr_2_gobble_next):
465 test %edx, %edx
466 jnz L(exit)
467
468 pmovmskb %xmm3, %edx
469 movdqa %xmm0, %xmm1
470 lea 32(%edi), %edi
471 lea 32(%esi), %esi
472 sub $0xffff, %edx
473 jnz L(exit)
474
475 lea (%ecx, %edi,1), %eax
476 lea 2(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400477 POP (%edi)
478 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800479 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400480#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800481
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400482#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -0700483 cfi_restore_state
484 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400485 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800486L(shr_3):
487 cmp $80, %ecx
488 lea -48(%ecx), %ecx
489 mov %edx, %eax
490 jae L(shr_3_gobble)
491
492 movdqa 16(%esi), %xmm1
493 movdqa %xmm1, %xmm2
494 palignr $3,(%esi), %xmm1
495 pcmpeqb (%edi), %xmm1
496
497 movdqa 32(%esi), %xmm3
498 palignr $3,%xmm2, %xmm3
499 pcmpeqb 16(%edi), %xmm3
500
501 pand %xmm1, %xmm3
502 pmovmskb %xmm3, %edx
503 lea 32(%edi), %edi
504 lea 32(%esi), %esi
505 sub $0xffff, %edx
506 jnz L(exit)
507 lea (%ecx, %edi,1), %eax
508 lea 3(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400509 POP (%edi)
510 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800511 jmp L(less48bytes)
512
Bruce Beare124a5422010-10-11 12:24:41 -0700513 cfi_restore_state
514 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400515 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800516L(shr_3_gobble):
517 sub $32, %ecx
518 movdqa 16(%esi), %xmm0
519 palignr $3,(%esi), %xmm0
520 pcmpeqb (%edi), %xmm0
521
522 movdqa 32(%esi), %xmm3
523 palignr $3,16(%esi), %xmm3
524 pcmpeqb 16(%edi), %xmm3
525
526L(shr_3_gobble_loop):
527 pand %xmm0, %xmm3
528 sub $32, %ecx
529 pmovmskb %xmm3, %edx
530 movdqa %xmm0, %xmm1
531
532 movdqa 64(%esi), %xmm3
533 palignr $3,48(%esi), %xmm3
534 sbb $0xffff, %edx
535 movdqa 48(%esi), %xmm0
536 palignr $3,32(%esi), %xmm0
537 pcmpeqb 32(%edi), %xmm0
538 lea 32(%esi), %esi
539 pcmpeqb 48(%edi), %xmm3
540
541 lea 32(%edi), %edi
542 jz L(shr_3_gobble_loop)
543 pand %xmm0, %xmm3
544
545 cmp $0, %ecx
546 jge L(shr_3_gobble_next)
547 inc %edx
548 add $32, %ecx
549L(shr_3_gobble_next):
550 test %edx, %edx
551 jnz L(exit)
552
553 pmovmskb %xmm3, %edx
554 movdqa %xmm0, %xmm1
555 lea 32(%edi), %edi
556 lea 32(%esi), %esi
557 sub $0xffff, %edx
558 jnz L(exit)
559
560 lea (%ecx, %edi,1), %eax
561 lea 3(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400562 POP (%edi)
563 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800564 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400565#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800566
Bruce Beare124a5422010-10-11 12:24:41 -0700567 cfi_restore_state
568 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400569 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800570L(shr_4):
571 cmp $80, %ecx
572 lea -48(%ecx), %ecx
573 mov %edx, %eax
574 jae L(shr_4_gobble)
575
576 movdqa 16(%esi), %xmm1
577 movdqa %xmm1, %xmm2
578 palignr $4,(%esi), %xmm1
579 pcmpeqb (%edi), %xmm1
580
581 movdqa 32(%esi), %xmm3
582 palignr $4,%xmm2, %xmm3
583 pcmpeqb 16(%edi), %xmm3
584
585 pand %xmm1, %xmm3
586 pmovmskb %xmm3, %edx
587 lea 32(%edi), %edi
588 lea 32(%esi), %esi
589 sub $0xffff, %edx
590 jnz L(exit)
591 lea (%ecx, %edi,1), %eax
592 lea 4(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400593 POP (%edi)
594 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800595 jmp L(less48bytes)
596
Bruce Beare124a5422010-10-11 12:24:41 -0700597 cfi_restore_state
598 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400599 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800600L(shr_4_gobble):
601 sub $32, %ecx
602 movdqa 16(%esi), %xmm0
603 palignr $4,(%esi), %xmm0
604 pcmpeqb (%edi), %xmm0
605
606 movdqa 32(%esi), %xmm3
607 palignr $4,16(%esi), %xmm3
608 pcmpeqb 16(%edi), %xmm3
609
610L(shr_4_gobble_loop):
611 pand %xmm0, %xmm3
612 sub $32, %ecx
613 pmovmskb %xmm3, %edx
614 movdqa %xmm0, %xmm1
615
616 movdqa 64(%esi), %xmm3
617 palignr $4,48(%esi), %xmm3
618 sbb $0xffff, %edx
619 movdqa 48(%esi), %xmm0
620 palignr $4,32(%esi), %xmm0
621 pcmpeqb 32(%edi), %xmm0
622 lea 32(%esi), %esi
623 pcmpeqb 48(%edi), %xmm3
624
625 lea 32(%edi), %edi
626 jz L(shr_4_gobble_loop)
627 pand %xmm0, %xmm3
628
629 cmp $0, %ecx
630 jge L(shr_4_gobble_next)
631 inc %edx
632 add $32, %ecx
633L(shr_4_gobble_next):
634 test %edx, %edx
635 jnz L(exit)
636
637 pmovmskb %xmm3, %edx
638 movdqa %xmm0, %xmm1
639 lea 32(%edi), %edi
640 lea 32(%esi), %esi
641 sub $0xffff, %edx
642 jnz L(exit)
643
644 lea (%ecx, %edi,1), %eax
645 lea 4(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400646 POP (%edi)
647 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800648 jmp L(less48bytes)
649
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400650#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -0700651 cfi_restore_state
652 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400653 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800654L(shr_5):
655 cmp $80, %ecx
656 lea -48(%ecx), %ecx
657 mov %edx, %eax
658 jae L(shr_5_gobble)
659
660 movdqa 16(%esi), %xmm1
661 movdqa %xmm1, %xmm2
662 palignr $5,(%esi), %xmm1
663 pcmpeqb (%edi), %xmm1
664
665 movdqa 32(%esi), %xmm3
666 palignr $5,%xmm2, %xmm3
667 pcmpeqb 16(%edi), %xmm3
668
669 pand %xmm1, %xmm3
670 pmovmskb %xmm3, %edx
671 lea 32(%edi), %edi
672 lea 32(%esi), %esi
673 sub $0xffff, %edx
674 jnz L(exit)
675 lea (%ecx, %edi,1), %eax
676 lea 5(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400677 POP (%edi)
678 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800679 jmp L(less48bytes)
680
Bruce Beare124a5422010-10-11 12:24:41 -0700681 cfi_restore_state
682 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400683 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800684L(shr_5_gobble):
685 sub $32, %ecx
686 movdqa 16(%esi), %xmm0
687 palignr $5,(%esi), %xmm0
688 pcmpeqb (%edi), %xmm0
689
690 movdqa 32(%esi), %xmm3
691 palignr $5,16(%esi), %xmm3
692 pcmpeqb 16(%edi), %xmm3
693
694L(shr_5_gobble_loop):
695 pand %xmm0, %xmm3
696 sub $32, %ecx
697 pmovmskb %xmm3, %edx
698 movdqa %xmm0, %xmm1
699
700 movdqa 64(%esi), %xmm3
701 palignr $5,48(%esi), %xmm3
702 sbb $0xffff, %edx
703 movdqa 48(%esi), %xmm0
704 palignr $5,32(%esi), %xmm0
705 pcmpeqb 32(%edi), %xmm0
706 lea 32(%esi), %esi
707 pcmpeqb 48(%edi), %xmm3
708
709 lea 32(%edi), %edi
710 jz L(shr_5_gobble_loop)
711 pand %xmm0, %xmm3
712
713 cmp $0, %ecx
714 jge L(shr_5_gobble_next)
715 inc %edx
716 add $32, %ecx
717L(shr_5_gobble_next):
718 test %edx, %edx
719 jnz L(exit)
720
721 pmovmskb %xmm3, %edx
722 movdqa %xmm0, %xmm1
723 lea 32(%edi), %edi
724 lea 32(%esi), %esi
725 sub $0xffff, %edx
726 jnz L(exit)
727
728 lea (%ecx, %edi,1), %eax
729 lea 5(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400730 POP (%edi)
731 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800732 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400733#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800734
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400735#if !defined(USE_WCHAR)
Bruce Beare124a5422010-10-11 12:24:41 -0700736 cfi_restore_state
737 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400738 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800739L(shr_6):
740 cmp $80, %ecx
741 lea -48(%ecx), %ecx
742 mov %edx, %eax
743 jae L(shr_6_gobble)
744
745 movdqa 16(%esi), %xmm1
746 movdqa %xmm1, %xmm2
747 palignr $6,(%esi), %xmm1
748 pcmpeqb (%edi), %xmm1
749
750 movdqa 32(%esi), %xmm3
751 palignr $6,%xmm2, %xmm3
752 pcmpeqb 16(%edi), %xmm3
753
754 pand %xmm1, %xmm3
755 pmovmskb %xmm3, %edx
756 lea 32(%edi), %edi
757 lea 32(%esi), %esi
758 sub $0xffff, %edx
759 jnz L(exit)
760 lea (%ecx, %edi,1), %eax
761 lea 6(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400762 POP (%edi)
763 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800764 jmp L(less48bytes)
765
Bruce Beare124a5422010-10-11 12:24:41 -0700766 cfi_restore_state
767 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400768 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800769L(shr_6_gobble):
770 sub $32, %ecx
771 movdqa 16(%esi), %xmm0
772 palignr $6,(%esi), %xmm0
773 pcmpeqb (%edi), %xmm0
774
775 movdqa 32(%esi), %xmm3
776 palignr $6,16(%esi), %xmm3
777 pcmpeqb 16(%edi), %xmm3
778
779L(shr_6_gobble_loop):
780 pand %xmm0, %xmm3
781 sub $32, %ecx
782 pmovmskb %xmm3, %edx
783 movdqa %xmm0, %xmm1
784
785 movdqa 64(%esi), %xmm3
786 palignr $6,48(%esi), %xmm3
787 sbb $0xffff, %edx
788 movdqa 48(%esi), %xmm0
789 palignr $6,32(%esi), %xmm0
790 pcmpeqb 32(%edi), %xmm0
791 lea 32(%esi), %esi
792 pcmpeqb 48(%edi), %xmm3
793
794 lea 32(%edi), %edi
795 jz L(shr_6_gobble_loop)
796 pand %xmm0, %xmm3
797
798 cmp $0, %ecx
799 jge L(shr_6_gobble_next)
800 inc %edx
801 add $32, %ecx
802L(shr_6_gobble_next):
803 test %edx, %edx
804 jnz L(exit)
805
806 pmovmskb %xmm3, %edx
807 movdqa %xmm0, %xmm1
808 lea 32(%edi), %edi
809 lea 32(%esi), %esi
810 sub $0xffff, %edx
811 jnz L(exit)
812
813 lea (%ecx, %edi,1), %eax
814 lea 6(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400815 POP (%edi)
816 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800817 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400818#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800819
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400820#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -0700821 cfi_restore_state
822 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400823 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800824L(shr_7):
825 cmp $80, %ecx
826 lea -48(%ecx), %ecx
827 mov %edx, %eax
828 jae L(shr_7_gobble)
829
830 movdqa 16(%esi), %xmm1
831 movdqa %xmm1, %xmm2
832 palignr $7,(%esi), %xmm1
833 pcmpeqb (%edi), %xmm1
834
835 movdqa 32(%esi), %xmm3
836 palignr $7,%xmm2, %xmm3
837 pcmpeqb 16(%edi), %xmm3
838
839 pand %xmm1, %xmm3
840 pmovmskb %xmm3, %edx
841 lea 32(%edi), %edi
842 lea 32(%esi), %esi
843 sub $0xffff, %edx
844 jnz L(exit)
845 lea (%ecx, %edi,1), %eax
846 lea 7(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400847 POP (%edi)
848 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800849 jmp L(less48bytes)
850
Bruce Beare124a5422010-10-11 12:24:41 -0700851 cfi_restore_state
852 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400853 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800854L(shr_7_gobble):
855 sub $32, %ecx
856 movdqa 16(%esi), %xmm0
857 palignr $7,(%esi), %xmm0
858 pcmpeqb (%edi), %xmm0
859
860 movdqa 32(%esi), %xmm3
861 palignr $7,16(%esi), %xmm3
862 pcmpeqb 16(%edi), %xmm3
863
864L(shr_7_gobble_loop):
865 pand %xmm0, %xmm3
866 sub $32, %ecx
867 pmovmskb %xmm3, %edx
868 movdqa %xmm0, %xmm1
869
870 movdqa 64(%esi), %xmm3
871 palignr $7,48(%esi), %xmm3
872 sbb $0xffff, %edx
873 movdqa 48(%esi), %xmm0
874 palignr $7,32(%esi), %xmm0
875 pcmpeqb 32(%edi), %xmm0
876 lea 32(%esi), %esi
877 pcmpeqb 48(%edi), %xmm3
878
879 lea 32(%edi), %edi
880 jz L(shr_7_gobble_loop)
881 pand %xmm0, %xmm3
882
883 cmp $0, %ecx
884 jge L(shr_7_gobble_next)
885 inc %edx
886 add $32, %ecx
887L(shr_7_gobble_next):
888 test %edx, %edx
889 jnz L(exit)
890
891 pmovmskb %xmm3, %edx
892 movdqa %xmm0, %xmm1
893 lea 32(%edi), %edi
894 lea 32(%esi), %esi
895 sub $0xffff, %edx
896 jnz L(exit)
897
898 lea (%ecx, %edi,1), %eax
899 lea 7(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400900 POP (%edi)
901 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800902 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400903#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800904
Bruce Beare124a5422010-10-11 12:24:41 -0700905 cfi_restore_state
906 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400907 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800908L(shr_8):
909 cmp $80, %ecx
910 lea -48(%ecx), %ecx
911 mov %edx, %eax
912 jae L(shr_8_gobble)
913
914 movdqa 16(%esi), %xmm1
915 movdqa %xmm1, %xmm2
916 palignr $8,(%esi), %xmm1
917 pcmpeqb (%edi), %xmm1
918
919 movdqa 32(%esi), %xmm3
920 palignr $8,%xmm2, %xmm3
921 pcmpeqb 16(%edi), %xmm3
922
923 pand %xmm1, %xmm3
924 pmovmskb %xmm3, %edx
925 lea 32(%edi), %edi
926 lea 32(%esi), %esi
927 sub $0xffff, %edx
928 jnz L(exit)
929 lea (%ecx, %edi,1), %eax
930 lea 8(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400931 POP (%edi)
932 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800933 jmp L(less48bytes)
934
Bruce Beare124a5422010-10-11 12:24:41 -0700935 cfi_restore_state
936 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400937 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800938L(shr_8_gobble):
939 sub $32, %ecx
940 movdqa 16(%esi), %xmm0
941 palignr $8,(%esi), %xmm0
942 pcmpeqb (%edi), %xmm0
943
944 movdqa 32(%esi), %xmm3
945 palignr $8,16(%esi), %xmm3
946 pcmpeqb 16(%edi), %xmm3
947
948L(shr_8_gobble_loop):
949 pand %xmm0, %xmm3
950 sub $32, %ecx
951 pmovmskb %xmm3, %edx
952 movdqa %xmm0, %xmm1
953
954 movdqa 64(%esi), %xmm3
955 palignr $8,48(%esi), %xmm3
956 sbb $0xffff, %edx
957 movdqa 48(%esi), %xmm0
958 palignr $8,32(%esi), %xmm0
959 pcmpeqb 32(%edi), %xmm0
960 lea 32(%esi), %esi
961 pcmpeqb 48(%edi), %xmm3
962
963 lea 32(%edi), %edi
964 jz L(shr_8_gobble_loop)
965 pand %xmm0, %xmm3
966
967 cmp $0, %ecx
968 jge L(shr_8_gobble_next)
969 inc %edx
970 add $32, %ecx
971L(shr_8_gobble_next):
972 test %edx, %edx
973 jnz L(exit)
974
975 pmovmskb %xmm3, %edx
976 movdqa %xmm0, %xmm1
977 lea 32(%edi), %edi
978 lea 32(%esi), %esi
979 sub $0xffff, %edx
980 jnz L(exit)
981
982 lea (%ecx, %edi,1), %eax
983 lea 8(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400984 POP (%edi)
985 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800986 jmp L(less48bytes)
987
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +0400988#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -0700989 cfi_restore_state
990 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400991 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -0800992L(shr_9):
993 cmp $80, %ecx
994 lea -48(%ecx), %ecx
995 mov %edx, %eax
996 jae L(shr_9_gobble)
997
998 movdqa 16(%esi), %xmm1
999 movdqa %xmm1, %xmm2
1000 palignr $9,(%esi), %xmm1
1001 pcmpeqb (%edi), %xmm1
1002
1003 movdqa 32(%esi), %xmm3
1004 palignr $9,%xmm2, %xmm3
1005 pcmpeqb 16(%edi), %xmm3
1006
1007 pand %xmm1, %xmm3
1008 pmovmskb %xmm3, %edx
1009 lea 32(%edi), %edi
1010 lea 32(%esi), %esi
1011 sub $0xffff, %edx
1012 jnz L(exit)
1013 lea (%ecx, %edi,1), %eax
1014 lea 9(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001015 POP (%edi)
1016 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001017 jmp L(less48bytes)
1018
Bruce Beare124a5422010-10-11 12:24:41 -07001019 cfi_restore_state
1020 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001021 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001022L(shr_9_gobble):
1023 sub $32, %ecx
1024 movdqa 16(%esi), %xmm0
1025 palignr $9,(%esi), %xmm0
1026 pcmpeqb (%edi), %xmm0
1027
1028 movdqa 32(%esi), %xmm3
1029 palignr $9,16(%esi), %xmm3
1030 pcmpeqb 16(%edi), %xmm3
1031
1032L(shr_9_gobble_loop):
1033 pand %xmm0, %xmm3
1034 sub $32, %ecx
1035 pmovmskb %xmm3, %edx
1036 movdqa %xmm0, %xmm1
1037
1038 movdqa 64(%esi), %xmm3
1039 palignr $9,48(%esi), %xmm3
1040 sbb $0xffff, %edx
1041 movdqa 48(%esi), %xmm0
1042 palignr $9,32(%esi), %xmm0
1043 pcmpeqb 32(%edi), %xmm0
1044 lea 32(%esi), %esi
1045 pcmpeqb 48(%edi), %xmm3
1046
1047 lea 32(%edi), %edi
1048 jz L(shr_9_gobble_loop)
1049 pand %xmm0, %xmm3
1050
1051 cmp $0, %ecx
1052 jge L(shr_9_gobble_next)
1053 inc %edx
1054 add $32, %ecx
1055L(shr_9_gobble_next):
1056 test %edx, %edx
1057 jnz L(exit)
1058
1059 pmovmskb %xmm3, %edx
1060 movdqa %xmm0, %xmm1
1061 lea 32(%edi), %edi
1062 lea 32(%esi), %esi
1063 sub $0xffff, %edx
1064 jnz L(exit)
1065
1066 lea (%ecx, %edi,1), %eax
1067 lea 9(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001068 POP (%edi)
1069 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001070 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001071#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001072
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001073#if !defined(USE_WCHAR)
Bruce Beare124a5422010-10-11 12:24:41 -07001074 cfi_restore_state
1075 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001076 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001077L(shr_10):
1078 cmp $80, %ecx
1079 lea -48(%ecx), %ecx
1080 mov %edx, %eax
1081 jae L(shr_10_gobble)
1082
1083 movdqa 16(%esi), %xmm1
1084 movdqa %xmm1, %xmm2
1085 palignr $10, (%esi), %xmm1
1086 pcmpeqb (%edi), %xmm1
1087
1088 movdqa 32(%esi), %xmm3
1089 palignr $10,%xmm2, %xmm3
1090 pcmpeqb 16(%edi), %xmm3
1091
1092 pand %xmm1, %xmm3
1093 pmovmskb %xmm3, %edx
1094 lea 32(%edi), %edi
1095 lea 32(%esi), %esi
1096 sub $0xffff, %edx
1097 jnz L(exit)
1098 lea (%ecx, %edi,1), %eax
1099 lea 10(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001100 POP (%edi)
1101 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001102 jmp L(less48bytes)
1103
Bruce Beare124a5422010-10-11 12:24:41 -07001104 cfi_restore_state
1105 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001106 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001107L(shr_10_gobble):
1108 sub $32, %ecx
1109 movdqa 16(%esi), %xmm0
1110 palignr $10, (%esi), %xmm0
1111 pcmpeqb (%edi), %xmm0
1112
1113 movdqa 32(%esi), %xmm3
1114 palignr $10, 16(%esi), %xmm3
1115 pcmpeqb 16(%edi), %xmm3
1116
1117L(shr_10_gobble_loop):
1118 pand %xmm0, %xmm3
1119 sub $32, %ecx
1120 pmovmskb %xmm3, %edx
1121 movdqa %xmm0, %xmm1
1122
1123 movdqa 64(%esi), %xmm3
1124 palignr $10,48(%esi), %xmm3
1125 sbb $0xffff, %edx
1126 movdqa 48(%esi), %xmm0
1127 palignr $10,32(%esi), %xmm0
1128 pcmpeqb 32(%edi), %xmm0
1129 lea 32(%esi), %esi
1130 pcmpeqb 48(%edi), %xmm3
1131
1132 lea 32(%edi), %edi
1133 jz L(shr_10_gobble_loop)
1134 pand %xmm0, %xmm3
1135
1136 cmp $0, %ecx
1137 jge L(shr_10_gobble_next)
1138 inc %edx
1139 add $32, %ecx
1140L(shr_10_gobble_next):
1141 test %edx, %edx
1142 jnz L(exit)
1143
1144 pmovmskb %xmm3, %edx
1145 movdqa %xmm0, %xmm1
1146 lea 32(%edi), %edi
1147 lea 32(%esi), %esi
1148 sub $0xffff, %edx
1149 jnz L(exit)
1150
1151 lea (%ecx, %edi,1), %eax
1152 lea 10(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001153 POP (%edi)
1154 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001155 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001156#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001157
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001158#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -07001159 cfi_restore_state
1160 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001161 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001162L(shr_11):
1163 cmp $80, %ecx
1164 lea -48(%ecx), %ecx
1165 mov %edx, %eax
1166 jae L(shr_11_gobble)
1167
1168 movdqa 16(%esi), %xmm1
1169 movdqa %xmm1, %xmm2
1170 palignr $11, (%esi), %xmm1
1171 pcmpeqb (%edi), %xmm1
1172
1173 movdqa 32(%esi), %xmm3
1174 palignr $11, %xmm2, %xmm3
1175 pcmpeqb 16(%edi), %xmm3
1176
1177 pand %xmm1, %xmm3
1178 pmovmskb %xmm3, %edx
1179 lea 32(%edi), %edi
1180 lea 32(%esi), %esi
1181 sub $0xffff, %edx
1182 jnz L(exit)
1183 lea (%ecx, %edi,1), %eax
1184 lea 11(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001185 POP (%edi)
1186 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001187 jmp L(less48bytes)
1188
Bruce Beare124a5422010-10-11 12:24:41 -07001189 cfi_restore_state
1190 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001191 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001192L(shr_11_gobble):
1193 sub $32, %ecx
1194 movdqa 16(%esi), %xmm0
1195 palignr $11, (%esi), %xmm0
1196 pcmpeqb (%edi), %xmm0
1197
1198 movdqa 32(%esi), %xmm3
1199 palignr $11, 16(%esi), %xmm3
1200 pcmpeqb 16(%edi), %xmm3
1201
1202L(shr_11_gobble_loop):
1203 pand %xmm0, %xmm3
1204 sub $32, %ecx
1205 pmovmskb %xmm3, %edx
1206 movdqa %xmm0, %xmm1
1207
1208 movdqa 64(%esi), %xmm3
1209 palignr $11,48(%esi), %xmm3
1210 sbb $0xffff, %edx
1211 movdqa 48(%esi), %xmm0
1212 palignr $11,32(%esi), %xmm0
1213 pcmpeqb 32(%edi), %xmm0
1214 lea 32(%esi), %esi
1215 pcmpeqb 48(%edi), %xmm3
1216
1217 lea 32(%edi), %edi
1218 jz L(shr_11_gobble_loop)
1219 pand %xmm0, %xmm3
1220
1221 cmp $0, %ecx
1222 jge L(shr_11_gobble_next)
1223 inc %edx
1224 add $32, %ecx
1225L(shr_11_gobble_next):
1226 test %edx, %edx
1227 jnz L(exit)
1228
1229 pmovmskb %xmm3, %edx
1230 movdqa %xmm0, %xmm1
1231 lea 32(%edi), %edi
1232 lea 32(%esi), %esi
1233 sub $0xffff, %edx
1234 jnz L(exit)
1235
1236 lea (%ecx, %edi,1), %eax
1237 lea 11(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001238 POP (%edi)
1239 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001240 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001241#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001242
Bruce Beare124a5422010-10-11 12:24:41 -07001243 cfi_restore_state
1244 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001245 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001246L(shr_12):
1247 cmp $80, %ecx
1248 lea -48(%ecx), %ecx
1249 mov %edx, %eax
1250 jae L(shr_12_gobble)
1251
1252 movdqa 16(%esi), %xmm1
1253 movdqa %xmm1, %xmm2
1254 palignr $12, (%esi), %xmm1
1255 pcmpeqb (%edi), %xmm1
1256
1257 movdqa 32(%esi), %xmm3
1258 palignr $12, %xmm2, %xmm3
1259 pcmpeqb 16(%edi), %xmm3
1260
1261 pand %xmm1, %xmm3
1262 pmovmskb %xmm3, %edx
1263 lea 32(%edi), %edi
1264 lea 32(%esi), %esi
1265 sub $0xffff, %edx
1266 jnz L(exit)
1267 lea (%ecx, %edi,1), %eax
1268 lea 12(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001269 POP (%edi)
1270 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001271 jmp L(less48bytes)
1272
Bruce Beare124a5422010-10-11 12:24:41 -07001273 cfi_restore_state
1274 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001275 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001276L(shr_12_gobble):
1277 sub $32, %ecx
1278 movdqa 16(%esi), %xmm0
1279 palignr $12, (%esi), %xmm0
1280 pcmpeqb (%edi), %xmm0
1281
1282 movdqa 32(%esi), %xmm3
1283 palignr $12, 16(%esi), %xmm3
1284 pcmpeqb 16(%edi), %xmm3
1285
1286L(shr_12_gobble_loop):
1287 pand %xmm0, %xmm3
1288 sub $32, %ecx
1289 pmovmskb %xmm3, %edx
1290 movdqa %xmm0, %xmm1
1291
1292 movdqa 64(%esi), %xmm3
1293 palignr $12,48(%esi), %xmm3
1294 sbb $0xffff, %edx
1295 movdqa 48(%esi), %xmm0
1296 palignr $12,32(%esi), %xmm0
1297 pcmpeqb 32(%edi), %xmm0
1298 lea 32(%esi), %esi
1299 pcmpeqb 48(%edi), %xmm3
1300
1301 lea 32(%edi), %edi
1302 jz L(shr_12_gobble_loop)
1303 pand %xmm0, %xmm3
1304
1305 cmp $0, %ecx
1306 jge L(shr_12_gobble_next)
1307 inc %edx
1308 add $32, %ecx
1309L(shr_12_gobble_next):
1310 test %edx, %edx
1311 jnz L(exit)
1312
1313 pmovmskb %xmm3, %edx
1314 movdqa %xmm0, %xmm1
1315 lea 32(%edi), %edi
1316 lea 32(%esi), %esi
1317 sub $0xffff, %edx
1318 jnz L(exit)
1319
1320 lea (%ecx, %edi,1), %eax
1321 lea 12(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001322 POP (%edi)
1323 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001324 jmp L(less48bytes)
1325
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001326#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -07001327 cfi_restore_state
1328 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001329 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001330L(shr_13):
1331 cmp $80, %ecx
1332 lea -48(%ecx), %ecx
1333 mov %edx, %eax
1334 jae L(shr_13_gobble)
1335
1336 movdqa 16(%esi), %xmm1
1337 movdqa %xmm1, %xmm2
1338 palignr $13, (%esi), %xmm1
1339 pcmpeqb (%edi), %xmm1
1340
1341 movdqa 32(%esi), %xmm3
1342 palignr $13, %xmm2, %xmm3
1343 pcmpeqb 16(%edi), %xmm3
1344
1345 pand %xmm1, %xmm3
1346 pmovmskb %xmm3, %edx
1347 lea 32(%edi), %edi
1348 lea 32(%esi), %esi
1349 sub $0xffff, %edx
1350 jnz L(exit)
1351 lea (%ecx, %edi,1), %eax
1352 lea 13(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001353 POP (%edi)
1354 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001355 jmp L(less48bytes)
1356
Bruce Beare124a5422010-10-11 12:24:41 -07001357 cfi_restore_state
1358 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001359 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001360L(shr_13_gobble):
1361 sub $32, %ecx
1362 movdqa 16(%esi), %xmm0
1363 palignr $13, (%esi), %xmm0
1364 pcmpeqb (%edi), %xmm0
1365
1366 movdqa 32(%esi), %xmm3
1367 palignr $13, 16(%esi), %xmm3
1368 pcmpeqb 16(%edi), %xmm3
1369
1370L(shr_13_gobble_loop):
1371 pand %xmm0, %xmm3
1372 sub $32, %ecx
1373 pmovmskb %xmm3, %edx
1374 movdqa %xmm0, %xmm1
1375
1376 movdqa 64(%esi), %xmm3
1377 palignr $13,48(%esi), %xmm3
1378 sbb $0xffff, %edx
1379 movdqa 48(%esi), %xmm0
1380 palignr $13,32(%esi), %xmm0
1381 pcmpeqb 32(%edi), %xmm0
1382 lea 32(%esi), %esi
1383 pcmpeqb 48(%edi), %xmm3
1384
1385 lea 32(%edi), %edi
1386 jz L(shr_13_gobble_loop)
1387 pand %xmm0, %xmm3
1388
1389 cmp $0, %ecx
1390 jge L(shr_13_gobble_next)
1391 inc %edx
1392 add $32, %ecx
1393L(shr_13_gobble_next):
1394 test %edx, %edx
1395 jnz L(exit)
1396
1397 pmovmskb %xmm3, %edx
1398 movdqa %xmm0, %xmm1
1399 lea 32(%edi), %edi
1400 lea 32(%esi), %esi
1401 sub $0xffff, %edx
1402 jnz L(exit)
1403
1404 lea (%ecx, %edi,1), %eax
1405 lea 13(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001406 POP (%edi)
1407 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001408 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001409#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001410
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001411#if !defined(USE_WCHAR)
Bruce Beare124a5422010-10-11 12:24:41 -07001412 cfi_restore_state
1413 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001414 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001415L(shr_14):
1416 cmp $80, %ecx
1417 lea -48(%ecx), %ecx
1418 mov %edx, %eax
1419 jae L(shr_14_gobble)
1420
1421 movdqa 16(%esi), %xmm1
1422 movdqa %xmm1, %xmm2
1423 palignr $14, (%esi), %xmm1
1424 pcmpeqb (%edi), %xmm1
1425
1426 movdqa 32(%esi), %xmm3
1427 palignr $14, %xmm2, %xmm3
1428 pcmpeqb 16(%edi), %xmm3
1429
1430 pand %xmm1, %xmm3
1431 pmovmskb %xmm3, %edx
1432 lea 32(%edi), %edi
1433 lea 32(%esi), %esi
1434 sub $0xffff, %edx
1435 jnz L(exit)
1436 lea (%ecx, %edi,1), %eax
1437 lea 14(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001438 POP (%edi)
1439 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001440 jmp L(less48bytes)
1441
Bruce Beare124a5422010-10-11 12:24:41 -07001442 cfi_restore_state
1443 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001444 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001445L(shr_14_gobble):
1446 sub $32, %ecx
1447 movdqa 16(%esi), %xmm0
1448 palignr $14, (%esi), %xmm0
1449 pcmpeqb (%edi), %xmm0
1450
1451 movdqa 32(%esi), %xmm3
1452 palignr $14, 16(%esi), %xmm3
1453 pcmpeqb 16(%edi), %xmm3
1454
1455L(shr_14_gobble_loop):
1456 pand %xmm0, %xmm3
1457 sub $32, %ecx
1458 pmovmskb %xmm3, %edx
1459 movdqa %xmm0, %xmm1
1460
1461 movdqa 64(%esi), %xmm3
1462 palignr $14,48(%esi), %xmm3
1463 sbb $0xffff, %edx
1464 movdqa 48(%esi), %xmm0
1465 palignr $14,32(%esi), %xmm0
1466 pcmpeqb 32(%edi), %xmm0
1467 lea 32(%esi), %esi
1468 pcmpeqb 48(%edi), %xmm3
1469
1470 lea 32(%edi), %edi
1471 jz L(shr_14_gobble_loop)
1472 pand %xmm0, %xmm3
1473
1474 cmp $0, %ecx
1475 jge L(shr_14_gobble_next)
1476 inc %edx
1477 add $32, %ecx
1478L(shr_14_gobble_next):
1479 test %edx, %edx
1480 jnz L(exit)
1481
1482 pmovmskb %xmm3, %edx
1483 movdqa %xmm0, %xmm1
1484 lea 32(%edi), %edi
1485 lea 32(%esi), %esi
1486 sub $0xffff, %edx
1487 jnz L(exit)
1488
1489 lea (%ecx, %edi,1), %eax
1490 lea 14(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001491 POP (%edi)
1492 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001493 jmp L(less48bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001494#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001495
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001496#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare124a5422010-10-11 12:24:41 -07001497 cfi_restore_state
1498 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001499 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001500L(shr_15):
1501 cmp $80, %ecx
1502 lea -48(%ecx), %ecx
1503 mov %edx, %eax
1504 jae L(shr_15_gobble)
1505
1506 movdqa 16(%esi), %xmm1
1507 movdqa %xmm1, %xmm2
1508 palignr $15, (%esi), %xmm1
1509 pcmpeqb (%edi), %xmm1
1510
1511 movdqa 32(%esi), %xmm3
1512 palignr $15, %xmm2, %xmm3
1513 pcmpeqb 16(%edi), %xmm3
1514
1515 pand %xmm1, %xmm3
1516 pmovmskb %xmm3, %edx
1517 lea 32(%edi), %edi
1518 lea 32(%esi), %esi
1519 sub $0xffff, %edx
1520 jnz L(exit)
1521 lea (%ecx, %edi,1), %eax
1522 lea 15(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001523 POP (%edi)
1524 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001525 jmp L(less48bytes)
1526
Bruce Beare124a5422010-10-11 12:24:41 -07001527 cfi_restore_state
1528 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001529 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001530L(shr_15_gobble):
1531 sub $32, %ecx
1532 movdqa 16(%esi), %xmm0
1533 palignr $15, (%esi), %xmm0
1534 pcmpeqb (%edi), %xmm0
1535
1536 movdqa 32(%esi), %xmm3
1537 palignr $15, 16(%esi), %xmm3
1538 pcmpeqb 16(%edi), %xmm3
1539
1540L(shr_15_gobble_loop):
1541 pand %xmm0, %xmm3
1542 sub $32, %ecx
1543 pmovmskb %xmm3, %edx
1544 movdqa %xmm0, %xmm1
1545
1546 movdqa 64(%esi), %xmm3
1547 palignr $15,48(%esi), %xmm3
1548 sbb $0xffff, %edx
1549 movdqa 48(%esi), %xmm0
1550 palignr $15,32(%esi), %xmm0
1551 pcmpeqb 32(%edi), %xmm0
1552 lea 32(%esi), %esi
1553 pcmpeqb 48(%edi), %xmm3
1554
1555 lea 32(%edi), %edi
1556 jz L(shr_15_gobble_loop)
1557 pand %xmm0, %xmm3
1558
1559 cmp $0, %ecx
1560 jge L(shr_15_gobble_next)
1561 inc %edx
1562 add $32, %ecx
1563L(shr_15_gobble_next):
1564 test %edx, %edx
1565 jnz L(exit)
1566
1567 pmovmskb %xmm3, %edx
1568 movdqa %xmm0, %xmm1
1569 lea 32(%edi), %edi
1570 lea 32(%esi), %esi
1571 sub $0xffff, %edx
1572 jnz L(exit)
1573
1574 lea (%ecx, %edi,1), %eax
1575 lea 15(%ecx, %esi,1), %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001576 POP (%edi)
1577 POP (%esi)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001578 jmp L(less48bytes)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001579#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001580
Bruce Beare124a5422010-10-11 12:24:41 -07001581 cfi_restore_state
1582 cfi_remember_state
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001583 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001584L(exit):
1585 pmovmskb %xmm1, %ebx
1586 sub $0xffff, %ebx
1587 jz L(first16bytes)
1588 lea -16(%esi), %esi
1589 lea -16(%edi), %edi
1590 mov %ebx, %edx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001591
Bruce Beare8ff1a272010-03-04 11:03:37 -08001592L(first16bytes):
1593 add %eax, %esi
1594L(less16bytes):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001595
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001596#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001597 test %dl, %dl
1598 jz L(next_24_bytes)
1599
1600 test $0x01, %dl
1601 jnz L(Byte16)
1602
1603 test $0x02, %dl
1604 jnz L(Byte17)
1605
1606 test $0x04, %dl
1607 jnz L(Byte18)
1608
1609 test $0x08, %dl
1610 jnz L(Byte19)
1611
1612 test $0x10, %dl
1613 jnz L(Byte20)
1614
1615 test $0x20, %dl
1616 jnz L(Byte21)
1617
1618 test $0x40, %dl
1619 jnz L(Byte22)
1620L(Byte23):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001621 movzbl -9(%edi), %eax
1622 movzbl -9(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001623 sub %edx, %eax
1624 RETURN
1625
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001626 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001627L(Byte16):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001628 movzbl -16(%edi), %eax
1629 movzbl -16(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001630 sub %edx, %eax
1631 RETURN
1632
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001633 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001634L(Byte17):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001635 movzbl -15(%edi), %eax
1636 movzbl -15(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001637 sub %edx, %eax
1638 RETURN
1639
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001640 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001641L(Byte18):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001642 movzbl -14(%edi), %eax
1643 movzbl -14(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001644 sub %edx, %eax
1645 RETURN
1646
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001647 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001648L(Byte19):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001649 movzbl -13(%edi), %eax
1650 movzbl -13(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001651 sub %edx, %eax
1652 RETURN
1653
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001654 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001655L(Byte20):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001656 movzbl -12(%edi), %eax
1657 movzbl -12(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001658 sub %edx, %eax
1659 RETURN
1660
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001661 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001662L(Byte21):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001663 movzbl -11(%edi), %eax
1664 movzbl -11(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001665 sub %edx, %eax
1666 RETURN
1667
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001668 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001669L(Byte22):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001670 movzbl -10(%edi), %eax
1671 movzbl -10(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001672 sub %edx, %eax
1673 RETURN
1674
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001675 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001676L(next_24_bytes):
1677 lea 8(%edi), %edi
1678 lea 8(%esi), %esi
1679 test $0x01, %dh
1680 jnz L(Byte16)
1681
1682 test $0x02, %dh
1683 jnz L(Byte17)
1684
1685 test $0x04, %dh
1686 jnz L(Byte18)
1687
1688 test $0x08, %dh
1689 jnz L(Byte19)
1690
1691 test $0x10, %dh
1692 jnz L(Byte20)
1693
1694 test $0x20, %dh
1695 jnz L(Byte21)
1696
1697 test $0x40, %dh
1698 jnz L(Byte22)
1699
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001700 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001701L(Byte31):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001702 movzbl -9(%edi), %eax
1703 movzbl -9(%esi), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -08001704 sub %edx, %eax
1705 RETURN_END
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001706#elif defined(USE_AS_WMEMCMP)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001707
1708/* special for wmemcmp */
1709 test %dl, %dl
1710 jz L(next_two_double_words)
1711 and $15, %dl
1712 jz L(second_double_word)
1713 mov -16(%edi), %ecx
1714 cmp -16(%esi), %ecx
1715 mov $1, %eax
1716 jg L(nequal_bigger)
1717 neg %eax
1718 RETURN
1719
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001720 .p2align 4
1721L(second_double_word):
1722 mov -12(%edi), %ecx
1723 cmp -12(%esi), %ecx
1724 mov $1, %eax
1725 jg L(nequal_bigger)
1726 neg %eax
1727 RETURN
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001728
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001729 .p2align 4
1730L(next_two_double_words):
1731 and $15, %dh
1732 jz L(fourth_double_word)
1733 mov -8(%edi), %ecx
1734 cmp -8(%esi), %ecx
1735 mov $1, %eax
1736 jg L(nequal_bigger)
1737 neg %eax
1738 RETURN
1739
1740 .p2align 4
1741L(fourth_double_word):
1742 mov -4(%edi), %ecx
1743 cmp -4(%esi), %ecx
1744 mov $1, %eax
1745 jg L(nequal_bigger)
1746 neg %eax
1747 RETURN
1748
1749 .p2align 4
1750L(nequal_bigger):
1751 RETURN_END
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001752
1753#elif defined(USE_AS_MEMCMP16)
1754
1755/* special for __memcmp16 */
1756 test %dl, %dl
1757 jz L(next_four_words)
1758 test $15, %dl
1759 jz L(second_two_words)
1760 test $3, %dl
1761 jz L(second_word)
1762 movzwl -16(%edi), %eax
1763 movzwl -16(%esi), %ebx
1764 subl %ebx, %eax
1765 RETURN
1766
1767 .p2align 4
1768L(second_word):
1769 movzwl -14(%edi), %eax
1770 movzwl -14(%esi), %ebx
1771 subl %ebx, %eax
1772 RETURN
1773
1774 .p2align 4
1775L(second_two_words):
1776 test $63, %dl
1777 jz L(fourth_word)
1778 movzwl -12(%edi), %eax
1779 movzwl -12(%esi), %ebx
1780 subl %ebx, %eax
1781 RETURN
1782
1783 .p2align 4
1784L(fourth_word):
1785 movzwl -10(%edi), %eax
1786 movzwl -10(%esi), %ebx
1787 subl %ebx, %eax
1788 RETURN
1789
1790 .p2align 4
1791L(next_four_words):
1792 test $15, %dh
1793 jz L(fourth_two_words)
1794 test $3, %dh
1795 jz L(sixth_word)
1796 movzwl -8(%edi), %eax
1797 movzwl -8(%esi), %ebx
1798 subl %ebx, %eax
1799 RETURN
1800
1801 .p2align 4
1802L(sixth_word):
1803 movzwl -6(%edi), %eax
1804 movzwl -6(%esi), %ebx
1805 subl %ebx, %eax
1806 RETURN
1807
1808 .p2align 4
1809L(fourth_two_words):
1810 test $63, %dh
1811 jz L(eighth_word)
1812 movzwl -4(%edi), %eax
1813 movzwl -4(%esi), %ebx
1814 subl %ebx, %eax
1815 RETURN
1816
1817 .p2align 4
1818L(eighth_word):
1819 movzwl -2(%edi), %eax
1820 movzwl -2(%esi), %ebx
1821 subl %ebx, %eax
1822 RETURN
1823#else
1824# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001825#endif
1826
Bruce Beare8ff1a272010-03-04 11:03:37 -08001827 CFI_PUSH (%ebx)
1828
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001829 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001830L(more8bytes):
1831 cmp $16, %ecx
1832 jae L(more16bytes)
1833 cmp $8, %ecx
1834 je L(8bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001835#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001836 cmp $9, %ecx
1837 je L(9bytes)
1838 cmp $10, %ecx
1839 je L(10bytes)
1840 cmp $11, %ecx
1841 je L(11bytes)
1842 cmp $12, %ecx
1843 je L(12bytes)
1844 cmp $13, %ecx
1845 je L(13bytes)
1846 cmp $14, %ecx
1847 je L(14bytes)
1848 jmp L(15bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001849#elif defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001850 jmp L(12bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001851#elif defined(USE_UTF16) && !defined(USE_WCHAR)
1852 cmp $10, %ecx
1853 je L(10bytes)
1854 cmp $12, %ecx
1855 je L(12bytes)
1856 jmp L(14bytes)
1857#else
1858# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001859#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001860
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001861 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001862L(more16bytes):
1863 cmp $24, %ecx
1864 jae L(more24bytes)
1865 cmp $16, %ecx
1866 je L(16bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001867#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001868 cmp $17, %ecx
1869 je L(17bytes)
1870 cmp $18, %ecx
1871 je L(18bytes)
1872 cmp $19, %ecx
1873 je L(19bytes)
1874 cmp $20, %ecx
1875 je L(20bytes)
1876 cmp $21, %ecx
1877 je L(21bytes)
1878 cmp $22, %ecx
1879 je L(22bytes)
1880 jmp L(23bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001881#elif defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001882 jmp L(20bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001883#elif defined(USE_UTF16) && !defined(USE_WCHAR)
1884 cmp $18, %ecx
1885 je L(18bytes)
1886 cmp $20, %ecx
1887 je L(20bytes)
1888 jmp L(22bytes)
1889#else
1890# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001891#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001892
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001893 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001894L(more24bytes):
1895 cmp $32, %ecx
1896 jae L(more32bytes)
1897 cmp $24, %ecx
1898 je L(24bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001899#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001900 cmp $25, %ecx
1901 je L(25bytes)
1902 cmp $26, %ecx
1903 je L(26bytes)
1904 cmp $27, %ecx
1905 je L(27bytes)
1906 cmp $28, %ecx
1907 je L(28bytes)
1908 cmp $29, %ecx
1909 je L(29bytes)
1910 cmp $30, %ecx
1911 je L(30bytes)
1912 jmp L(31bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001913#elif defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001914 jmp L(28bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001915#elif defined(USE_UTF16) && !defined(USE_WCHAR)
1916 cmp $26, %ecx
1917 je L(26bytes)
1918 cmp $28, %ecx
1919 je L(28bytes)
1920 jmp L(30bytes)
1921#else
1922# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001923#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001924
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001925 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001926L(more32bytes):
1927 cmp $40, %ecx
1928 jae L(more40bytes)
1929 cmp $32, %ecx
1930 je L(32bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001931#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001932 cmp $33, %ecx
1933 je L(33bytes)
1934 cmp $34, %ecx
1935 je L(34bytes)
1936 cmp $35, %ecx
1937 je L(35bytes)
1938 cmp $36, %ecx
1939 je L(36bytes)
1940 cmp $37, %ecx
1941 je L(37bytes)
1942 cmp $38, %ecx
1943 je L(38bytes)
1944 jmp L(39bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001945#elif defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001946 jmp L(36bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001947#elif defined(USE_UTF16) && !defined(USE_WCHAR)
1948 cmp $34, %ecx
1949 je L(34bytes)
1950 cmp $36, %ecx
1951 je L(36bytes)
1952 jmp L(38bytes)
1953#else
1954# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001955#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08001956
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001957 .p2align 4
1958L(less48bytes):
1959 cmp $8, %ecx
1960 jae L(more8bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001961#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001962 cmp $2, %ecx
1963 je L(2bytes)
1964 cmp $3, %ecx
1965 je L(3bytes)
1966 cmp $4, %ecx
1967 je L(4bytes)
1968 cmp $5, %ecx
1969 je L(5bytes)
1970 cmp $6, %ecx
1971 je L(6bytes)
1972 jmp L(7bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001973#elif defined(USE_WCHAR) && !defined(USE_UTF16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001974 jmp L(4bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001975#elif defined(USE_UTF16) && !defined(USE_WCHAR)
1976 cmp $2, %ecx
1977 je L(2bytes)
1978 cmp $4, %ecx
1979 je L(4bytes)
1980 jmp L(6bytes)
1981#else
1982# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001983#endif
1984
1985 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08001986L(more40bytes):
1987 cmp $40, %ecx
1988 je L(40bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04001989#if !defined(USE_WCHAR) && !defined(USE_UTF16)
Bruce Beare8ff1a272010-03-04 11:03:37 -08001990 cmp $41, %ecx
1991 je L(41bytes)
1992 cmp $42, %ecx
1993 je L(42bytes)
1994 cmp $43, %ecx
1995 je L(43bytes)
1996 cmp $44, %ecx
1997 je L(44bytes)
1998 cmp $45, %ecx
1999 je L(45bytes)
2000 cmp $46, %ecx
2001 je L(46bytes)
2002 jmp L(47bytes)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002003#elif defined(USE_UTF16) && !defined(USE_WCHAR)
2004 cmp $42, %ecx
2005 je L(42bytes)
2006 cmp $44, %ecx
2007 je L(44bytes)
2008 jmp L(46bytes)
2009#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08002010
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002011#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002012 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002013L(44bytes):
2014 mov -44(%eax), %ecx
2015 mov -44(%edx), %ebx
2016 cmp %ebx, %ecx
2017 jne L(find_diff)
2018L(40bytes):
2019 mov -40(%eax), %ecx
2020 mov -40(%edx), %ebx
2021 cmp %ebx, %ecx
2022 jne L(find_diff)
2023L(36bytes):
2024 mov -36(%eax), %ecx
2025 mov -36(%edx), %ebx
2026 cmp %ebx, %ecx
2027 jne L(find_diff)
2028L(32bytes):
2029 mov -32(%eax), %ecx
2030 mov -32(%edx), %ebx
2031 cmp %ebx, %ecx
2032 jne L(find_diff)
2033L(28bytes):
2034 mov -28(%eax), %ecx
2035 mov -28(%edx), %ebx
2036 cmp %ebx, %ecx
2037 jne L(find_diff)
2038L(24bytes):
2039 mov -24(%eax), %ecx
2040 mov -24(%edx), %ebx
2041 cmp %ebx, %ecx
2042 jne L(find_diff)
2043L(20bytes):
2044 mov -20(%eax), %ecx
2045 mov -20(%edx), %ebx
2046 cmp %ebx, %ecx
2047 jne L(find_diff)
2048L(16bytes):
2049 mov -16(%eax), %ecx
2050 mov -16(%edx), %ebx
2051 cmp %ebx, %ecx
2052 jne L(find_diff)
2053L(12bytes):
2054 mov -12(%eax), %ecx
2055 mov -12(%edx), %ebx
2056 cmp %ebx, %ecx
2057 jne L(find_diff)
2058L(8bytes):
2059 mov -8(%eax), %ecx
2060 mov -8(%edx), %ebx
2061 cmp %ebx, %ecx
2062 jne L(find_diff)
2063L(4bytes):
2064 mov -4(%eax), %ecx
2065 mov -4(%edx), %ebx
2066 cmp %ebx, %ecx
2067 mov $0, %eax
2068 jne L(find_diff)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002069 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002070 ret
2071 CFI_PUSH (%ebx)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002072#elif defined(USE_AS_WMEMCMP)
2073
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002074 .p2align 4
2075L(44bytes):
2076 mov -44(%eax), %ecx
2077 cmp -44(%edx), %ecx
2078 jne L(find_diff)
2079L(40bytes):
2080 mov -40(%eax), %ecx
2081 cmp -40(%edx), %ecx
2082 jne L(find_diff)
2083L(36bytes):
2084 mov -36(%eax), %ecx
2085 cmp -36(%edx), %ecx
2086 jne L(find_diff)
2087L(32bytes):
2088 mov -32(%eax), %ecx
2089 cmp -32(%edx), %ecx
2090 jne L(find_diff)
2091L(28bytes):
2092 mov -28(%eax), %ecx
2093 cmp -28(%edx), %ecx
2094 jne L(find_diff)
2095L(24bytes):
2096 mov -24(%eax), %ecx
2097 cmp -24(%edx), %ecx
2098 jne L(find_diff)
2099L(20bytes):
2100 mov -20(%eax), %ecx
2101 cmp -20(%edx), %ecx
2102 jne L(find_diff)
2103L(16bytes):
2104 mov -16(%eax), %ecx
2105 cmp -16(%edx), %ecx
2106 jne L(find_diff)
2107L(12bytes):
2108 mov -12(%eax), %ecx
2109 cmp -12(%edx), %ecx
2110 jne L(find_diff)
2111L(8bytes):
2112 mov -8(%eax), %ecx
2113 cmp -8(%edx), %ecx
2114 jne L(find_diff)
2115L(4bytes):
2116 mov -4(%eax), %ecx
2117 xor %eax, %eax
2118 cmp -4(%edx), %ecx
2119 jne L(find_diff)
2120 POP (%ebx)
2121 ret
2122 CFI_PUSH (%ebx)
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002123#elif defined USE_AS_MEMCMP16
2124
2125 .p2align 4
2126L(46bytes):
2127 movzwl -46(%eax), %ecx
2128 movzwl -46(%edx), %ebx
2129 subl %ebx, %ecx
2130 jne L(memcmp16_exit)
2131L(44bytes):
2132 movzwl -44(%eax), %ecx
2133 movzwl -44(%edx), %ebx
2134 subl %ebx, %ecx
2135 jne L(memcmp16_exit)
2136L(42bytes):
2137 movzwl -42(%eax), %ecx
2138 movzwl -42(%edx), %ebx
2139 subl %ebx, %ecx
2140 jne L(memcmp16_exit)
2141L(40bytes):
2142 movzwl -40(%eax), %ecx
2143 movzwl -40(%edx), %ebx
2144 subl %ebx, %ecx
2145 jne L(memcmp16_exit)
2146L(38bytes):
2147 movzwl -38(%eax), %ecx
2148 movzwl -38(%edx), %ebx
2149 subl %ebx, %ecx
2150 jne L(memcmp16_exit)
2151L(36bytes):
2152 movzwl -36(%eax), %ecx
2153 movzwl -36(%edx), %ebx
2154 subl %ebx, %ecx
2155 jne L(memcmp16_exit)
2156L(34bytes):
2157 movzwl -34(%eax), %ecx
2158 movzwl -34(%edx), %ebx
2159 subl %ebx, %ecx
2160 jne L(memcmp16_exit)
2161L(32bytes):
2162 movzwl -32(%eax), %ecx
2163 movzwl -32(%edx), %ebx
2164 subl %ebx, %ecx
2165 jne L(memcmp16_exit)
2166L(30bytes):
2167 movzwl -30(%eax), %ecx
2168 movzwl -30(%edx), %ebx
2169 subl %ebx, %ecx
2170 jne L(memcmp16_exit)
2171L(28bytes):
2172 movzwl -28(%eax), %ecx
2173 movzwl -28(%edx), %ebx
2174 subl %ebx, %ecx
2175 jne L(memcmp16_exit)
2176L(26bytes):
2177 movzwl -26(%eax), %ecx
2178 movzwl -26(%edx), %ebx
2179 subl %ebx, %ecx
2180 jne L(memcmp16_exit)
2181L(24bytes):
2182 movzwl -24(%eax), %ecx
2183 movzwl -24(%edx), %ebx
2184 subl %ebx, %ecx
2185 jne L(memcmp16_exit)
2186L(22bytes):
2187 movzwl -22(%eax), %ecx
2188 movzwl -22(%edx), %ebx
2189 subl %ebx, %ecx
2190 jne L(memcmp16_exit)
2191L(20bytes):
2192 movzwl -20(%eax), %ecx
2193 movzwl -20(%edx), %ebx
2194 subl %ebx, %ecx
2195 jne L(memcmp16_exit)
2196L(18bytes):
2197 movzwl -18(%eax), %ecx
2198 movzwl -18(%edx), %ebx
2199 subl %ebx, %ecx
2200 jne L(memcmp16_exit)
2201L(16bytes):
2202 movzwl -16(%eax), %ecx
2203 movzwl -16(%edx), %ebx
2204 subl %ebx, %ecx
2205 jne L(memcmp16_exit)
2206L(14bytes):
2207 movzwl -14(%eax), %ecx
2208 movzwl -14(%edx), %ebx
2209 subl %ebx, %ecx
2210 jne L(memcmp16_exit)
2211L(12bytes):
2212 movzwl -12(%eax), %ecx
2213 movzwl -12(%edx), %ebx
2214 subl %ebx, %ecx
2215 jne L(memcmp16_exit)
2216L(10bytes):
2217 movzwl -10(%eax), %ecx
2218 movzwl -10(%edx), %ebx
2219 subl %ebx, %ecx
2220 jne L(memcmp16_exit)
2221L(8bytes):
2222 movzwl -8(%eax), %ecx
2223 movzwl -8(%edx), %ebx
2224 subl %ebx, %ecx
2225 jne L(memcmp16_exit)
2226L(6bytes):
2227 movzwl -6(%eax), %ecx
2228 movzwl -6(%edx), %ebx
2229 subl %ebx, %ecx
2230 jne L(memcmp16_exit)
2231L(4bytes):
2232 movzwl -4(%eax), %ecx
2233 movzwl -4(%edx), %ebx
2234 subl %ebx, %ecx
2235 jne L(memcmp16_exit)
2236L(2bytes):
2237 movzwl -2(%eax), %eax
2238 movzwl -2(%edx), %ebx
2239 subl %ebx, %eax
2240 POP (%ebx)
2241 ret
2242 CFI_PUSH (%ebx)
2243#else
2244# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002245#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08002246
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002247#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002248
2249 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002250L(45bytes):
2251 mov -45(%eax), %ecx
2252 mov -45(%edx), %ebx
2253 cmp %ebx, %ecx
2254 jne L(find_diff)
2255L(41bytes):
2256 mov -41(%eax), %ecx
2257 mov -41(%edx), %ebx
2258 cmp %ebx, %ecx
2259 jne L(find_diff)
2260L(37bytes):
2261 mov -37(%eax), %ecx
2262 mov -37(%edx), %ebx
2263 cmp %ebx, %ecx
2264 jne L(find_diff)
2265L(33bytes):
2266 mov -33(%eax), %ecx
2267 mov -33(%edx), %ebx
2268 cmp %ebx, %ecx
2269 jne L(find_diff)
2270L(29bytes):
2271 mov -29(%eax), %ecx
2272 mov -29(%edx), %ebx
2273 cmp %ebx, %ecx
2274 jne L(find_diff)
2275L(25bytes):
2276 mov -25(%eax), %ecx
2277 mov -25(%edx), %ebx
2278 cmp %ebx, %ecx
2279 jne L(find_diff)
2280L(21bytes):
2281 mov -21(%eax), %ecx
2282 mov -21(%edx), %ebx
2283 cmp %ebx, %ecx
2284 jne L(find_diff)
2285L(17bytes):
2286 mov -17(%eax), %ecx
2287 mov -17(%edx), %ebx
2288 cmp %ebx, %ecx
2289 jne L(find_diff)
2290L(13bytes):
2291 mov -13(%eax), %ecx
2292 mov -13(%edx), %ebx
2293 cmp %ebx, %ecx
2294 jne L(find_diff)
2295L(9bytes):
2296 mov -9(%eax), %ecx
2297 mov -9(%edx), %ebx
2298 cmp %ebx, %ecx
2299 jne L(find_diff)
2300L(5bytes):
2301 mov -5(%eax), %ecx
2302 mov -5(%edx), %ebx
2303 cmp %ebx, %ecx
2304 jne L(find_diff)
2305 movzbl -1(%eax), %ecx
2306 cmp -1(%edx), %cl
2307 mov $0, %eax
2308 jne L(end)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002309 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002310 ret
2311 CFI_PUSH (%ebx)
2312
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002313 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002314L(46bytes):
2315 mov -46(%eax), %ecx
2316 mov -46(%edx), %ebx
2317 cmp %ebx, %ecx
2318 jne L(find_diff)
2319L(42bytes):
2320 mov -42(%eax), %ecx
2321 mov -42(%edx), %ebx
2322 cmp %ebx, %ecx
2323 jne L(find_diff)
2324L(38bytes):
2325 mov -38(%eax), %ecx
2326 mov -38(%edx), %ebx
2327 cmp %ebx, %ecx
2328 jne L(find_diff)
2329L(34bytes):
2330 mov -34(%eax), %ecx
2331 mov -34(%edx), %ebx
2332 cmp %ebx, %ecx
2333 jne L(find_diff)
2334L(30bytes):
2335 mov -30(%eax), %ecx
2336 mov -30(%edx), %ebx
2337 cmp %ebx, %ecx
2338 jne L(find_diff)
2339L(26bytes):
2340 mov -26(%eax), %ecx
2341 mov -26(%edx), %ebx
2342 cmp %ebx, %ecx
2343 jne L(find_diff)
2344L(22bytes):
2345 mov -22(%eax), %ecx
2346 mov -22(%edx), %ebx
2347 cmp %ebx, %ecx
2348 jne L(find_diff)
2349L(18bytes):
2350 mov -18(%eax), %ecx
2351 mov -18(%edx), %ebx
2352 cmp %ebx, %ecx
2353 jne L(find_diff)
2354L(14bytes):
2355 mov -14(%eax), %ecx
2356 mov -14(%edx), %ebx
2357 cmp %ebx, %ecx
2358 jne L(find_diff)
2359L(10bytes):
2360 mov -10(%eax), %ecx
2361 mov -10(%edx), %ebx
2362 cmp %ebx, %ecx
2363 jne L(find_diff)
2364L(6bytes):
2365 mov -6(%eax), %ecx
2366 mov -6(%edx), %ebx
2367 cmp %ebx, %ecx
2368 jne L(find_diff)
2369L(2bytes):
2370 movzwl -2(%eax), %ecx
2371 movzwl -2(%edx), %ebx
2372 cmp %bl, %cl
2373 jne L(end)
2374 cmp %bh, %ch
2375 mov $0, %eax
2376 jne L(end)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002377 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002378 ret
2379 CFI_PUSH (%ebx)
2380
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002381 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002382L(47bytes):
2383 movl -47(%eax), %ecx
2384 movl -47(%edx), %ebx
2385 cmp %ebx, %ecx
2386 jne L(find_diff)
2387L(43bytes):
2388 movl -43(%eax), %ecx
2389 movl -43(%edx), %ebx
2390 cmp %ebx, %ecx
2391 jne L(find_diff)
2392L(39bytes):
2393 movl -39(%eax), %ecx
2394 movl -39(%edx), %ebx
2395 cmp %ebx, %ecx
2396 jne L(find_diff)
2397L(35bytes):
2398 movl -35(%eax), %ecx
2399 movl -35(%edx), %ebx
2400 cmp %ebx, %ecx
2401 jne L(find_diff)
2402L(31bytes):
2403 movl -31(%eax), %ecx
2404 movl -31(%edx), %ebx
2405 cmp %ebx, %ecx
2406 jne L(find_diff)
2407L(27bytes):
2408 movl -27(%eax), %ecx
2409 movl -27(%edx), %ebx
2410 cmp %ebx, %ecx
2411 jne L(find_diff)
2412L(23bytes):
2413 movl -23(%eax), %ecx
2414 movl -23(%edx), %ebx
2415 cmp %ebx, %ecx
2416 jne L(find_diff)
2417L(19bytes):
2418 movl -19(%eax), %ecx
2419 movl -19(%edx), %ebx
2420 cmp %ebx, %ecx
2421 jne L(find_diff)
2422L(15bytes):
2423 movl -15(%eax), %ecx
2424 movl -15(%edx), %ebx
2425 cmp %ebx, %ecx
2426 jne L(find_diff)
2427L(11bytes):
2428 movl -11(%eax), %ecx
2429 movl -11(%edx), %ebx
2430 cmp %ebx, %ecx
2431 jne L(find_diff)
2432L(7bytes):
2433 movl -7(%eax), %ecx
2434 movl -7(%edx), %ebx
2435 cmp %ebx, %ecx
2436 jne L(find_diff)
2437L(3bytes):
2438 movzwl -3(%eax), %ecx
2439 movzwl -3(%edx), %ebx
2440 cmpb %bl, %cl
2441 jne L(end)
2442 cmp %bx, %cx
2443 jne L(end)
2444 movzbl -1(%eax), %eax
2445 cmpb -1(%edx), %al
2446 mov $0, %eax
2447 jne L(end)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002448 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002449 ret
2450 CFI_PUSH (%ebx)
2451
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002452 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002453L(find_diff):
2454 cmpb %bl, %cl
2455 jne L(end)
2456 cmp %bx, %cx
2457 jne L(end)
2458 shr $16,%ecx
2459 shr $16,%ebx
2460 cmp %bl, %cl
2461 jne L(end)
2462 cmp %bx, %cx
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002463
2464 .p2align 4
Bruce Beare8ff1a272010-03-04 11:03:37 -08002465L(end):
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002466 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002467 mov $1, %eax
2468 ja L(bigger)
2469 neg %eax
2470L(bigger):
2471 ret
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002472#elif defined(USE_AS_WMEMCMP)
Bruce Beare8ff1a272010-03-04 11:03:37 -08002473
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002474 .p2align 4
2475L(find_diff):
2476 POP (%ebx)
2477 mov $1, %eax
2478 jg L(find_diff_bigger)
2479 neg %eax
2480 ret
2481
2482 .p2align 4
2483L(find_diff_bigger):
2484 ret
2485
Alexander Ivchenkobaa91f42013-06-27 12:55:46 +04002486#elif defined(USE_AS_MEMCMP16)
2487
2488 .p2align 4
2489L(memcmp16_exit):
2490 POP (%ebx)
2491 mov %ecx, %eax
2492 ret
2493#else
2494# error Unreachable preprocessor case
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002495#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -08002496END (MEMCMP)