blob: a315a378ea234e6d657f35053c8c04c6bea7909d [file] [log] [blame]
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86.S"
18
19#define MEMCMP __memcmp16
20
21/* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */
22
23#ifndef L
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040024# define L(label) .L##label
Alexander Ivchenko907194a2014-07-14 18:54:34 +040025#endif
26
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040027#define CFI_PUSH(REG) \
28 CFI_ADJUST_CFA_OFFSET(4); \
29 CFI_REL_OFFSET(REG, 0)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040030
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040031#define CFI_POP(REG) \
32 CFI_ADJUST_CFA_OFFSET(-4); \
33 CFI_RESTORE(REG)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040034
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040035#define PUSH(REG) pushl REG; CFI_PUSH (REG)
36#define POP(REG) popl REG; CFI_POP (REG)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040037
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040038#define PARMS 4
39#define BLK1 PARMS
40#define BLK2 BLK1+4
41#define LEN BLK2+4
42#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
43#define RETURN RETURN_END; CFI_RESTORE_STATE; CFI_REMEMBER_STATE
Alexander Ivchenko907194a2014-07-14 18:54:34 +040044
45DEFINE_FUNCTION MEMCMP
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040046 movl LEN(%esp), %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +040047
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040048 shl $1, %ecx
49 jz L(zero)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040050
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040051 movl BLK1(%esp), %eax
52 cmp $48, %ecx
53 movl BLK2(%esp), %edx
54 jae L(48bytesormore)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040055
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040056 PUSH (%ebx)
57 add %ecx, %edx
58 add %ecx, %eax
59 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040060
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040061 CFI_POP (%ebx)
Alexander Ivchenko907194a2014-07-14 18:54:34 +040062
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040063 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +040064L(zero):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040065 xor %eax, %eax
66 ret
Alexander Ivchenko907194a2014-07-14 18:54:34 +040067
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040068 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +040069L(48bytesormore):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040070 PUSH (%ebx)
71 PUSH (%esi)
72 PUSH (%edi)
73 CFI_REMEMBER_STATE
74 movdqu (%eax), %xmm3
75 movdqu (%edx), %xmm0
76 movl %eax, %edi
77 movl %edx, %esi
78 pcmpeqb %xmm0, %xmm3
79 pmovmskb %xmm3, %edx
80 lea 16(%edi), %edi
Alexander Ivchenko907194a2014-07-14 18:54:34 +040081
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040082 sub $0xffff, %edx
83 lea 16(%esi), %esi
84 jnz L(less16bytes)
85 mov %edi, %edx
86 and $0xf, %edx
87 xor %edx, %edi
88 sub %edx, %esi
89 add %edx, %ecx
90 mov %esi, %edx
91 and $0xf, %edx
92 jz L(shr_0)
93 xor %edx, %esi
Alexander Ivchenko907194a2014-07-14 18:54:34 +040094
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +040095 cmp $0, %edx
96 je L(shr_0)
97 cmp $2, %edx
98 je L(shr_2)
99 cmp $4, %edx
100 je L(shr_4)
101 cmp $6, %edx
102 je L(shr_6)
103 cmp $8, %edx
104 je L(shr_8)
105 cmp $10, %edx
106 je L(shr_10)
107 cmp $12, %edx
108 je L(shr_12)
109 jmp L(shr_14)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400110
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400111 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400112L(shr_0):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400113 cmp $80, %ecx
114 jae L(shr_0_gobble)
115 lea -48(%ecx), %ecx
116 xor %eax, %eax
117 movaps (%esi), %xmm1
118 pcmpeqb (%edi), %xmm1
119 movaps 16(%esi), %xmm2
120 pcmpeqb 16(%edi), %xmm2
121 pand %xmm1, %xmm2
122 pmovmskb %xmm2, %edx
123 add $32, %edi
124 add $32, %esi
125 sub $0xffff, %edx
126 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400127
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400128 lea (%ecx, %edi,1), %eax
129 lea (%ecx, %esi,1), %edx
130 POP (%edi)
131 POP (%esi)
132 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400133
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400134 CFI_RESTORE_STATE
135 CFI_REMEMBER_STATE
136 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400137L(shr_0_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400138 lea -48(%ecx), %ecx
139 movdqa (%esi), %xmm0
140 xor %eax, %eax
141 pcmpeqb (%edi), %xmm0
142 sub $32, %ecx
143 movdqa 16(%esi), %xmm2
144 pcmpeqb 16(%edi), %xmm2
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400145L(shr_0_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400146 pand %xmm0, %xmm2
147 sub $32, %ecx
148 pmovmskb %xmm2, %edx
149 movdqa %xmm0, %xmm1
150 movdqa 32(%esi), %xmm0
151 movdqa 48(%esi), %xmm2
152 sbb $0xffff, %edx
153 pcmpeqb 32(%edi), %xmm0
154 pcmpeqb 48(%edi), %xmm2
155 lea 32(%edi), %edi
156 lea 32(%esi), %esi
157 jz L(shr_0_gobble_loop)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400158
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400159 pand %xmm0, %xmm2
160 cmp $0, %ecx
161 jge L(shr_0_gobble_loop_next)
162 inc %edx
163 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400164L(shr_0_gobble_loop_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400165 test %edx, %edx
166 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400167
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400168 pmovmskb %xmm2, %edx
169 movdqa %xmm0, %xmm1
170 lea 32(%edi), %edi
171 lea 32(%esi), %esi
172 sub $0xffff, %edx
173 jnz L(exit)
174 lea (%ecx, %edi,1), %eax
175 lea (%ecx, %esi,1), %edx
176 POP (%edi)
177 POP (%esi)
178 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400179
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400180 CFI_RESTORE_STATE
181 CFI_REMEMBER_STATE
182 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400183L(shr_2):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400184 cmp $80, %ecx
185 lea -48(%ecx), %ecx
186 mov %edx, %eax
187 jae L(shr_2_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400188
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400189 movdqa 16(%esi), %xmm1
190 movdqa %xmm1, %xmm2
191 palignr $2,(%esi), %xmm1
192 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400193
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400194 movdqa 32(%esi), %xmm3
195 palignr $2,%xmm2, %xmm3
196 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400197
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400198 pand %xmm1, %xmm3
199 pmovmskb %xmm3, %edx
200 lea 32(%edi), %edi
201 lea 32(%esi), %esi
202 sub $0xffff, %edx
203 jnz L(exit)
204 lea (%ecx, %edi,1), %eax
205 lea 2(%ecx, %esi,1), %edx
206 POP (%edi)
207 POP (%esi)
208 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400209
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400210 CFI_RESTORE_STATE
211 CFI_REMEMBER_STATE
212 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400213L(shr_2_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400214 sub $32, %ecx
215 movdqa 16(%esi), %xmm0
216 palignr $2,(%esi), %xmm0
217 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400218
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400219 movdqa 32(%esi), %xmm3
220 palignr $2,16(%esi), %xmm3
221 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400222
223L(shr_2_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400224 pand %xmm0, %xmm3
225 sub $32, %ecx
226 pmovmskb %xmm3, %edx
227 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400228
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400229 movdqa 64(%esi), %xmm3
230 palignr $2,48(%esi), %xmm3
231 sbb $0xffff, %edx
232 movdqa 48(%esi), %xmm0
233 palignr $2,32(%esi), %xmm0
234 pcmpeqb 32(%edi), %xmm0
235 lea 32(%esi), %esi
236 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400237
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400238 lea 32(%edi), %edi
239 jz L(shr_2_gobble_loop)
240 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400241
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400242 cmp $0, %ecx
243 jge L(shr_2_gobble_next)
244 inc %edx
245 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400246L(shr_2_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400247 test %edx, %edx
248 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400249
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400250 pmovmskb %xmm3, %edx
251 movdqa %xmm0, %xmm1
252 lea 32(%edi), %edi
253 lea 32(%esi), %esi
254 sub $0xffff, %edx
255 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400256
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400257 lea (%ecx, %edi,1), %eax
258 lea 2(%ecx, %esi,1), %edx
259 POP (%edi)
260 POP (%esi)
261 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400262
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400263 CFI_RESTORE_STATE
264 CFI_REMEMBER_STATE
265 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400266L(shr_4):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400267 cmp $80, %ecx
268 lea -48(%ecx), %ecx
269 mov %edx, %eax
270 jae L(shr_4_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400271
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400272 movdqa 16(%esi), %xmm1
273 movdqa %xmm1, %xmm2
274 palignr $4,(%esi), %xmm1
275 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400276
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400277 movdqa 32(%esi), %xmm3
278 palignr $4,%xmm2, %xmm3
279 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400280
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400281 pand %xmm1, %xmm3
282 pmovmskb %xmm3, %edx
283 lea 32(%edi), %edi
284 lea 32(%esi), %esi
285 sub $0xffff, %edx
286 jnz L(exit)
287 lea (%ecx, %edi,1), %eax
288 lea 4(%ecx, %esi,1), %edx
289 POP (%edi)
290 POP (%esi)
291 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400292
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400293 CFI_RESTORE_STATE
294 CFI_REMEMBER_STATE
295 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400296L(shr_4_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400297 sub $32, %ecx
298 movdqa 16(%esi), %xmm0
299 palignr $4,(%esi), %xmm0
300 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400301
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400302 movdqa 32(%esi), %xmm3
303 palignr $4,16(%esi), %xmm3
304 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400305
306L(shr_4_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400307 pand %xmm0, %xmm3
308 sub $32, %ecx
309 pmovmskb %xmm3, %edx
310 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400311
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400312 movdqa 64(%esi), %xmm3
313 palignr $4,48(%esi), %xmm3
314 sbb $0xffff, %edx
315 movdqa 48(%esi), %xmm0
316 palignr $4,32(%esi), %xmm0
317 pcmpeqb 32(%edi), %xmm0
318 lea 32(%esi), %esi
319 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400320
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400321 lea 32(%edi), %edi
322 jz L(shr_4_gobble_loop)
323 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400324
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400325 cmp $0, %ecx
326 jge L(shr_4_gobble_next)
327 inc %edx
328 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400329L(shr_4_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400330 test %edx, %edx
331 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400332
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400333 pmovmskb %xmm3, %edx
334 movdqa %xmm0, %xmm1
335 lea 32(%edi), %edi
336 lea 32(%esi), %esi
337 sub $0xffff, %edx
338 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400339
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400340 lea (%ecx, %edi,1), %eax
341 lea 4(%ecx, %esi,1), %edx
342 POP (%edi)
343 POP (%esi)
344 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400345
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400346 CFI_RESTORE_STATE
347 CFI_REMEMBER_STATE
348 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400349L(shr_6):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400350 cmp $80, %ecx
351 lea -48(%ecx), %ecx
352 mov %edx, %eax
353 jae L(shr_6_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400354
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400355 movdqa 16(%esi), %xmm1
356 movdqa %xmm1, %xmm2
357 palignr $6,(%esi), %xmm1
358 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400359
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400360 movdqa 32(%esi), %xmm3
361 palignr $6,%xmm2, %xmm3
362 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400363
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400364 pand %xmm1, %xmm3
365 pmovmskb %xmm3, %edx
366 lea 32(%edi), %edi
367 lea 32(%esi), %esi
368 sub $0xffff, %edx
369 jnz L(exit)
370 lea (%ecx, %edi,1), %eax
371 lea 6(%ecx, %esi,1), %edx
372 POP (%edi)
373 POP (%esi)
374 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400375
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400376 CFI_RESTORE_STATE
377 CFI_REMEMBER_STATE
378 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400379L(shr_6_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400380 sub $32, %ecx
381 movdqa 16(%esi), %xmm0
382 palignr $6,(%esi), %xmm0
383 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400384
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400385 movdqa 32(%esi), %xmm3
386 palignr $6,16(%esi), %xmm3
387 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400388
389L(shr_6_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400390 pand %xmm0, %xmm3
391 sub $32, %ecx
392 pmovmskb %xmm3, %edx
393 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400394
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400395 movdqa 64(%esi), %xmm3
396 palignr $6,48(%esi), %xmm3
397 sbb $0xffff, %edx
398 movdqa 48(%esi), %xmm0
399 palignr $6,32(%esi), %xmm0
400 pcmpeqb 32(%edi), %xmm0
401 lea 32(%esi), %esi
402 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400403
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400404 lea 32(%edi), %edi
405 jz L(shr_6_gobble_loop)
406 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400407
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400408 cmp $0, %ecx
409 jge L(shr_6_gobble_next)
410 inc %edx
411 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400412L(shr_6_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400413 test %edx, %edx
414 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400415
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400416 pmovmskb %xmm3, %edx
417 movdqa %xmm0, %xmm1
418 lea 32(%edi), %edi
419 lea 32(%esi), %esi
420 sub $0xffff, %edx
421 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400422
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400423 lea (%ecx, %edi,1), %eax
424 lea 6(%ecx, %esi,1), %edx
425 POP (%edi)
426 POP (%esi)
427 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400428
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400429 CFI_RESTORE_STATE
430 CFI_REMEMBER_STATE
431 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400432L(shr_8):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400433 cmp $80, %ecx
434 lea -48(%ecx), %ecx
435 mov %edx, %eax
436 jae L(shr_8_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400437
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400438 movdqa 16(%esi), %xmm1
439 movdqa %xmm1, %xmm2
440 palignr $8,(%esi), %xmm1
441 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400442
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400443 movdqa 32(%esi), %xmm3
444 palignr $8,%xmm2, %xmm3
445 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400446
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400447 pand %xmm1, %xmm3
448 pmovmskb %xmm3, %edx
449 lea 32(%edi), %edi
450 lea 32(%esi), %esi
451 sub $0xffff, %edx
452 jnz L(exit)
453 lea (%ecx, %edi,1), %eax
454 lea 8(%ecx, %esi,1), %edx
455 POP (%edi)
456 POP (%esi)
457 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400458
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400459 CFI_RESTORE_STATE
460 CFI_REMEMBER_STATE
461 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400462L(shr_8_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400463 sub $32, %ecx
464 movdqa 16(%esi), %xmm0
465 palignr $8,(%esi), %xmm0
466 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400467
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400468 movdqa 32(%esi), %xmm3
469 palignr $8,16(%esi), %xmm3
470 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400471
472L(shr_8_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400473 pand %xmm0, %xmm3
474 sub $32, %ecx
475 pmovmskb %xmm3, %edx
476 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400477
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400478 movdqa 64(%esi), %xmm3
479 palignr $8,48(%esi), %xmm3
480 sbb $0xffff, %edx
481 movdqa 48(%esi), %xmm0
482 palignr $8,32(%esi), %xmm0
483 pcmpeqb 32(%edi), %xmm0
484 lea 32(%esi), %esi
485 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400486
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400487 lea 32(%edi), %edi
488 jz L(shr_8_gobble_loop)
489 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400490
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400491 cmp $0, %ecx
492 jge L(shr_8_gobble_next)
493 inc %edx
494 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400495L(shr_8_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400496 test %edx, %edx
497 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400498
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400499 pmovmskb %xmm3, %edx
500 movdqa %xmm0, %xmm1
501 lea 32(%edi), %edi
502 lea 32(%esi), %esi
503 sub $0xffff, %edx
504 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400505
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400506 lea (%ecx, %edi,1), %eax
507 lea 8(%ecx, %esi,1), %edx
508 POP (%edi)
509 POP (%esi)
510 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400511
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400512 CFI_RESTORE_STATE
513 CFI_REMEMBER_STATE
514 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400515L(shr_10):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400516 cmp $80, %ecx
517 lea -48(%ecx), %ecx
518 mov %edx, %eax
519 jae L(shr_10_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400520
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400521 movdqa 16(%esi), %xmm1
522 movdqa %xmm1, %xmm2
523 palignr $10, (%esi), %xmm1
524 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400525
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400526 movdqa 32(%esi), %xmm3
527 palignr $10,%xmm2, %xmm3
528 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400529
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400530 pand %xmm1, %xmm3
531 pmovmskb %xmm3, %edx
532 lea 32(%edi), %edi
533 lea 32(%esi), %esi
534 sub $0xffff, %edx
535 jnz L(exit)
536 lea (%ecx, %edi,1), %eax
537 lea 10(%ecx, %esi,1), %edx
538 POP (%edi)
539 POP (%esi)
540 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400541
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400542 CFI_RESTORE_STATE
543 CFI_REMEMBER_STATE
544 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400545L(shr_10_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400546 sub $32, %ecx
547 movdqa 16(%esi), %xmm0
548 palignr $10, (%esi), %xmm0
549 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400550
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400551 movdqa 32(%esi), %xmm3
552 palignr $10, 16(%esi), %xmm3
553 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400554
555L(shr_10_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400556 pand %xmm0, %xmm3
557 sub $32, %ecx
558 pmovmskb %xmm3, %edx
559 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400560
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400561 movdqa 64(%esi), %xmm3
562 palignr $10,48(%esi), %xmm3
563 sbb $0xffff, %edx
564 movdqa 48(%esi), %xmm0
565 palignr $10,32(%esi), %xmm0
566 pcmpeqb 32(%edi), %xmm0
567 lea 32(%esi), %esi
568 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400569
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400570 lea 32(%edi), %edi
571 jz L(shr_10_gobble_loop)
572 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400573
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400574 cmp $0, %ecx
575 jge L(shr_10_gobble_next)
576 inc %edx
577 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400578L(shr_10_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400579 test %edx, %edx
580 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400581
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400582 pmovmskb %xmm3, %edx
583 movdqa %xmm0, %xmm1
584 lea 32(%edi), %edi
585 lea 32(%esi), %esi
586 sub $0xffff, %edx
587 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400588
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400589 lea (%ecx, %edi,1), %eax
590 lea 10(%ecx, %esi,1), %edx
591 POP (%edi)
592 POP (%esi)
593 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400594
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400595 CFI_RESTORE_STATE
596 CFI_REMEMBER_STATE
597 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400598L(shr_12):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400599 cmp $80, %ecx
600 lea -48(%ecx), %ecx
601 mov %edx, %eax
602 jae L(shr_12_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400603
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400604 movdqa 16(%esi), %xmm1
605 movdqa %xmm1, %xmm2
606 palignr $12, (%esi), %xmm1
607 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400608
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400609 movdqa 32(%esi), %xmm3
610 palignr $12, %xmm2, %xmm3
611 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400612
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400613 pand %xmm1, %xmm3
614 pmovmskb %xmm3, %edx
615 lea 32(%edi), %edi
616 lea 32(%esi), %esi
617 sub $0xffff, %edx
618 jnz L(exit)
619 lea (%ecx, %edi,1), %eax
620 lea 12(%ecx, %esi,1), %edx
621 POP (%edi)
622 POP (%esi)
623 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400624
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400625 CFI_RESTORE_STATE
626 CFI_REMEMBER_STATE
627 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400628L(shr_12_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400629 sub $32, %ecx
630 movdqa 16(%esi), %xmm0
631 palignr $12, (%esi), %xmm0
632 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400633
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400634 movdqa 32(%esi), %xmm3
635 palignr $12, 16(%esi), %xmm3
636 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400637
638L(shr_12_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400639 pand %xmm0, %xmm3
640 sub $32, %ecx
641 pmovmskb %xmm3, %edx
642 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400643
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400644 movdqa 64(%esi), %xmm3
645 palignr $12,48(%esi), %xmm3
646 sbb $0xffff, %edx
647 movdqa 48(%esi), %xmm0
648 palignr $12,32(%esi), %xmm0
649 pcmpeqb 32(%edi), %xmm0
650 lea 32(%esi), %esi
651 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400652
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400653 lea 32(%edi), %edi
654 jz L(shr_12_gobble_loop)
655 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400656
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400657 cmp $0, %ecx
658 jge L(shr_12_gobble_next)
659 inc %edx
660 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400661L(shr_12_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400662 test %edx, %edx
663 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400664
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400665 pmovmskb %xmm3, %edx
666 movdqa %xmm0, %xmm1
667 lea 32(%edi), %edi
668 lea 32(%esi), %esi
669 sub $0xffff, %edx
670 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400671
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400672 lea (%ecx, %edi,1), %eax
673 lea 12(%ecx, %esi,1), %edx
674 POP (%edi)
675 POP (%esi)
676 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400677
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400678 CFI_RESTORE_STATE
679 CFI_REMEMBER_STATE
680 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400681L(shr_14):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400682 cmp $80, %ecx
683 lea -48(%ecx), %ecx
684 mov %edx, %eax
685 jae L(shr_14_gobble)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400686
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400687 movdqa 16(%esi), %xmm1
688 movdqa %xmm1, %xmm2
689 palignr $14, (%esi), %xmm1
690 pcmpeqb (%edi), %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400691
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400692 movdqa 32(%esi), %xmm3
693 palignr $14, %xmm2, %xmm3
694 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400695
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400696 pand %xmm1, %xmm3
697 pmovmskb %xmm3, %edx
698 lea 32(%edi), %edi
699 lea 32(%esi), %esi
700 sub $0xffff, %edx
701 jnz L(exit)
702 lea (%ecx, %edi,1), %eax
703 lea 14(%ecx, %esi,1), %edx
704 POP (%edi)
705 POP (%esi)
706 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400707
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400708 CFI_RESTORE_STATE
709 CFI_REMEMBER_STATE
710 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400711L(shr_14_gobble):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400712 sub $32, %ecx
713 movdqa 16(%esi), %xmm0
714 palignr $14, (%esi), %xmm0
715 pcmpeqb (%edi), %xmm0
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400716
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400717 movdqa 32(%esi), %xmm3
718 palignr $14, 16(%esi), %xmm3
719 pcmpeqb 16(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400720
721L(shr_14_gobble_loop):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400722 pand %xmm0, %xmm3
723 sub $32, %ecx
724 pmovmskb %xmm3, %edx
725 movdqa %xmm0, %xmm1
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400726
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400727 movdqa 64(%esi), %xmm3
728 palignr $14,48(%esi), %xmm3
729 sbb $0xffff, %edx
730 movdqa 48(%esi), %xmm0
731 palignr $14,32(%esi), %xmm0
732 pcmpeqb 32(%edi), %xmm0
733 lea 32(%esi), %esi
734 pcmpeqb 48(%edi), %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400735
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400736 lea 32(%edi), %edi
737 jz L(shr_14_gobble_loop)
738 pand %xmm0, %xmm3
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400739
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400740 cmp $0, %ecx
741 jge L(shr_14_gobble_next)
742 inc %edx
743 add $32, %ecx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400744L(shr_14_gobble_next):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400745 test %edx, %edx
746 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400747
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400748 pmovmskb %xmm3, %edx
749 movdqa %xmm0, %xmm1
750 lea 32(%edi), %edi
751 lea 32(%esi), %esi
752 sub $0xffff, %edx
753 jnz L(exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400754
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400755 lea (%ecx, %edi,1), %eax
756 lea 14(%ecx, %esi,1), %edx
757 POP (%edi)
758 POP (%esi)
759 jmp L(less48bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400760
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400761 CFI_RESTORE_STATE
762 CFI_REMEMBER_STATE
763 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400764L(exit):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400765 pmovmskb %xmm1, %ebx
766 sub $0xffff, %ebx
767 jz L(first16bytes)
768 lea -16(%esi), %esi
769 lea -16(%edi), %edi
770 mov %ebx, %edx
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400771
772L(first16bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400773 add %eax, %esi
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400774L(less16bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400775 test %dl, %dl
776 jz L(next_four_words)
777 test $15, %dl
778 jz L(second_two_words)
779 test $3, %dl
780 jz L(second_word)
781 movzwl -16(%edi), %eax
782 movzwl -16(%esi), %ebx
783 subl %ebx, %eax
784 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400785
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400786 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400787L(second_word):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400788 movzwl -14(%edi), %eax
789 movzwl -14(%esi), %ebx
790 subl %ebx, %eax
791 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400792
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400793 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400794L(second_two_words):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400795 test $63, %dl
796 jz L(fourth_word)
797 movzwl -12(%edi), %eax
798 movzwl -12(%esi), %ebx
799 subl %ebx, %eax
800 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400801
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400802 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400803L(fourth_word):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400804 movzwl -10(%edi), %eax
805 movzwl -10(%esi), %ebx
806 subl %ebx, %eax
807 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400808
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400809 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400810L(next_four_words):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400811 test $15, %dh
812 jz L(fourth_two_words)
813 test $3, %dh
814 jz L(sixth_word)
815 movzwl -8(%edi), %eax
816 movzwl -8(%esi), %ebx
817 subl %ebx, %eax
818 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400819
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400820 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400821L(sixth_word):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400822 movzwl -6(%edi), %eax
823 movzwl -6(%esi), %ebx
824 subl %ebx, %eax
825 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400826
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400827 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400828L(fourth_two_words):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400829 test $63, %dh
830 jz L(eighth_word)
831 movzwl -4(%edi), %eax
832 movzwl -4(%esi), %ebx
833 subl %ebx, %eax
834 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400835
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400836 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400837L(eighth_word):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400838 movzwl -2(%edi), %eax
839 movzwl -2(%esi), %ebx
840 subl %ebx, %eax
841 RETURN
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400842
843
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400844 CFI_PUSH (%ebx)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400845
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400846 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400847L(more8bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400848 cmp $16, %ecx
849 jae L(more16bytes)
850 cmp $8, %ecx
851 je L(8bytes)
852 cmp $10, %ecx
853 je L(10bytes)
854 cmp $12, %ecx
855 je L(12bytes)
856 jmp L(14bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400857
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400858 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400859L(more16bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400860 cmp $24, %ecx
861 jae L(more24bytes)
862 cmp $16, %ecx
863 je L(16bytes)
864 cmp $18, %ecx
865 je L(18bytes)
866 cmp $20, %ecx
867 je L(20bytes)
868 jmp L(22bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400869
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400870 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400871L(more24bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400872 cmp $32, %ecx
873 jae L(more32bytes)
874 cmp $24, %ecx
875 je L(24bytes)
876 cmp $26, %ecx
877 je L(26bytes)
878 cmp $28, %ecx
879 je L(28bytes)
880 jmp L(30bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400881
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400882 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400883L(more32bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400884 cmp $40, %ecx
885 jae L(more40bytes)
886 cmp $32, %ecx
887 je L(32bytes)
888 cmp $34, %ecx
889 je L(34bytes)
890 cmp $36, %ecx
891 je L(36bytes)
892 jmp L(38bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400893
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400894 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400895L(less48bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400896 cmp $8, %ecx
897 jae L(more8bytes)
898 cmp $2, %ecx
899 je L(2bytes)
900 cmp $4, %ecx
901 je L(4bytes)
902 jmp L(6bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400903
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400904 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400905L(more40bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400906 cmp $40, %ecx
907 je L(40bytes)
908 cmp $42, %ecx
909 je L(42bytes)
910 cmp $44, %ecx
911 je L(44bytes)
912 jmp L(46bytes)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400913
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400914 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400915L(46bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400916 movzwl -46(%eax), %ecx
917 movzwl -46(%edx), %ebx
918 subl %ebx, %ecx
919 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400920L(44bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400921 movzwl -44(%eax), %ecx
922 movzwl -44(%edx), %ebx
923 subl %ebx, %ecx
924 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400925L(42bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400926 movzwl -42(%eax), %ecx
927 movzwl -42(%edx), %ebx
928 subl %ebx, %ecx
929 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400930L(40bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400931 movzwl -40(%eax), %ecx
932 movzwl -40(%edx), %ebx
933 subl %ebx, %ecx
934 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400935L(38bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400936 movzwl -38(%eax), %ecx
937 movzwl -38(%edx), %ebx
938 subl %ebx, %ecx
939 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400940L(36bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400941 movzwl -36(%eax), %ecx
942 movzwl -36(%edx), %ebx
943 subl %ebx, %ecx
944 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400945L(34bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400946 movzwl -34(%eax), %ecx
947 movzwl -34(%edx), %ebx
948 subl %ebx, %ecx
949 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400950L(32bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400951 movzwl -32(%eax), %ecx
952 movzwl -32(%edx), %ebx
953 subl %ebx, %ecx
954 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400955L(30bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400956 movzwl -30(%eax), %ecx
957 movzwl -30(%edx), %ebx
958 subl %ebx, %ecx
959 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400960L(28bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400961 movzwl -28(%eax), %ecx
962 movzwl -28(%edx), %ebx
963 subl %ebx, %ecx
964 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400965L(26bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400966 movzwl -26(%eax), %ecx
967 movzwl -26(%edx), %ebx
968 subl %ebx, %ecx
969 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400970L(24bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400971 movzwl -24(%eax), %ecx
972 movzwl -24(%edx), %ebx
973 subl %ebx, %ecx
974 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400975L(22bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400976 movzwl -22(%eax), %ecx
977 movzwl -22(%edx), %ebx
978 subl %ebx, %ecx
979 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400980L(20bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400981 movzwl -20(%eax), %ecx
982 movzwl -20(%edx), %ebx
983 subl %ebx, %ecx
984 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400985L(18bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400986 movzwl -18(%eax), %ecx
987 movzwl -18(%edx), %ebx
988 subl %ebx, %ecx
989 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400990L(16bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400991 movzwl -16(%eax), %ecx
992 movzwl -16(%edx), %ebx
993 subl %ebx, %ecx
994 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +0400995L(14bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +0400996 movzwl -14(%eax), %ecx
997 movzwl -14(%edx), %ebx
998 subl %ebx, %ecx
999 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001000L(12bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001001 movzwl -12(%eax), %ecx
1002 movzwl -12(%edx), %ebx
1003 subl %ebx, %ecx
1004 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001005L(10bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001006 movzwl -10(%eax), %ecx
1007 movzwl -10(%edx), %ebx
1008 subl %ebx, %ecx
1009 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001010L(8bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001011 movzwl -8(%eax), %ecx
1012 movzwl -8(%edx), %ebx
1013 subl %ebx, %ecx
1014 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001015L(6bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001016 movzwl -6(%eax), %ecx
1017 movzwl -6(%edx), %ebx
1018 subl %ebx, %ecx
1019 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001020L(4bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001021 movzwl -4(%eax), %ecx
1022 movzwl -4(%edx), %ebx
1023 subl %ebx, %ecx
1024 jne L(memcmp16_exit)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001025L(2bytes):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001026 movzwl -2(%eax), %eax
1027 movzwl -2(%edx), %ebx
1028 subl %ebx, %eax
1029 POP (%ebx)
1030 ret
1031 CFI_PUSH (%ebx)
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001032
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001033 .p2align 4
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001034L(memcmp16_exit):
Varvara Rainchik2a0bffd2014-07-30 17:01:24 +04001035 POP (%ebx)
1036 mov %ecx, %eax
1037 ret
Alexander Ivchenko907194a2014-07-14 18:54:34 +04001038END_FUNCTION MEMCMP