blob: 891186822458482f94cec0bd2dcdd4c9ca01a0d7 [file] [log] [blame]
Bruce Beare124a5422010-10-11 12:24:41 -07001#define STRLEN sse2_strlen_atom
2
3#ifndef L
4# define L(label) .L##label
5#endif
6
7#ifndef cfi_startproc
8# define cfi_startproc .cfi_startproc
9#endif
10
11#ifndef cfi_endproc
12# define cfi_endproc .cfi_endproc
13#endif
14
15#ifndef cfi_rel_offset
16# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
17#endif
18
19#ifndef cfi_restore
20# define cfi_restore(reg) .cfi_restore reg
21#endif
22
23#ifndef cfi_adjust_cfa_offset
24# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
25#endif
26
27#ifndef cfi_remember_state
28# define cfi_remember_state .cfi_remember_state
29#endif
30
31#ifndef cfi_restore_state
32# define cfi_restore_state .cfi_restore_state
33#endif
34
35#ifndef ENTRY
36# define ENTRY(name) \
37 .type name, @function; \
38 .globl name; \
39 .p2align 4; \
40name: \
41 cfi_startproc
42#endif
43
44#ifndef END
45# define END(name) \
46 cfi_endproc; \
47 .size name, .-name
48#endif
49
50#define CFI_PUSH(REG) \
51 cfi_adjust_cfa_offset (4); \
52 cfi_rel_offset (REG, 0)
53
54#define CFI_POP(REG) \
55 cfi_adjust_cfa_offset (-4); \
56 cfi_restore (REG)
57
58#define PUSH(REG) pushl REG; CFI_PUSH (REG)
59#define POP(REG) popl REG; CFI_POP (REG)
60#define PARMS 4
61#define STR PARMS
62#define ENTRANCE
63#define RETURN ret
64
65 .text
66ENTRY (STRLEN)
67 ENTRANCE
68 mov STR(%esp), %edx
69 xor %eax, %eax
70 cmpb $0, (%edx)
71 jz L(exit_tail0)
72 cmpb $0, 1(%edx)
73 jz L(exit_tail1)
74 cmpb $0, 2(%edx)
75 jz L(exit_tail2)
76 cmpb $0, 3(%edx)
77 jz L(exit_tail3)
78 cmpb $0, 4(%edx)
79 jz L(exit_tail4)
80 cmpb $0, 5(%edx)
81 jz L(exit_tail5)
82 cmpb $0, 6(%edx)
83 jz L(exit_tail6)
84 cmpb $0, 7(%edx)
85 jz L(exit_tail7)
86 cmpb $0, 8(%edx)
87 jz L(exit_tail8)
88 cmpb $0, 9(%edx)
89 jz L(exit_tail9)
90 cmpb $0, 10(%edx)
91 jz L(exit_tail10)
92 cmpb $0, 11(%edx)
93 jz L(exit_tail11)
94 cmpb $0, 12(%edx)
95 jz L(exit_tail12)
96 cmpb $0, 13(%edx)
97 jz L(exit_tail13)
98 cmpb $0, 14(%edx)
99 jz L(exit_tail14)
100 cmpb $0, 15(%edx)
101 jz L(exit_tail15)
102 pxor %xmm0, %xmm0
103 mov %edx, %eax
104 mov %edx, %ecx
105 and $-16, %eax
106 add $16, %ecx
107 add $16, %eax
108
109 pcmpeqb (%eax), %xmm0
110 pmovmskb %xmm0, %edx
111 pxor %xmm1, %xmm1
112 test %edx, %edx
113 lea 16(%eax), %eax
114 jnz L(exit)
115
116 pcmpeqb (%eax), %xmm1
117 pmovmskb %xmm1, %edx
118 pxor %xmm2, %xmm2
119 test %edx, %edx
120 lea 16(%eax), %eax
121 jnz L(exit)
122
123
124 pcmpeqb (%eax), %xmm2
125 pmovmskb %xmm2, %edx
126 pxor %xmm3, %xmm3
127 test %edx, %edx
128 lea 16(%eax), %eax
129 jnz L(exit)
130
131 pcmpeqb (%eax), %xmm3
132 pmovmskb %xmm3, %edx
133 test %edx, %edx
134 lea 16(%eax), %eax
135 jnz L(exit)
136
137 pcmpeqb (%eax), %xmm0
138 pmovmskb %xmm0, %edx
139 test %edx, %edx
140 lea 16(%eax), %eax
141 jnz L(exit)
142
143 pcmpeqb (%eax), %xmm1
144 pmovmskb %xmm1, %edx
145 test %edx, %edx
146 lea 16(%eax), %eax
147 jnz L(exit)
148
149 pcmpeqb (%eax), %xmm2
150 pmovmskb %xmm2, %edx
151 test %edx, %edx
152 lea 16(%eax), %eax
153 jnz L(exit)
154
155 pcmpeqb (%eax), %xmm3
156 pmovmskb %xmm3, %edx
157 test %edx, %edx
158 lea 16(%eax), %eax
159 jnz L(exit)
160
161 pcmpeqb (%eax), %xmm0
162 pmovmskb %xmm0, %edx
163 test %edx, %edx
164 lea 16(%eax), %eax
165 jnz L(exit)
166
167 pcmpeqb (%eax), %xmm1
168 pmovmskb %xmm1, %edx
169 test %edx, %edx
170 lea 16(%eax), %eax
171 jnz L(exit)
172
173 pcmpeqb (%eax), %xmm2
174 pmovmskb %xmm2, %edx
175 test %edx, %edx
176 lea 16(%eax), %eax
177 jnz L(exit)
178
179 pcmpeqb (%eax), %xmm3
180 pmovmskb %xmm3, %edx
181 test %edx, %edx
182 lea 16(%eax), %eax
183 jnz L(exit)
184
185 pcmpeqb (%eax), %xmm0
186 pmovmskb %xmm0, %edx
187 test %edx, %edx
188 lea 16(%eax), %eax
189 jnz L(exit)
190
191 pcmpeqb (%eax), %xmm1
192 pmovmskb %xmm1, %edx
193 test %edx, %edx
194 lea 16(%eax), %eax
195 jnz L(exit)
196
197 pcmpeqb (%eax), %xmm2
198 pmovmskb %xmm2, %edx
199 test %edx, %edx
200 lea 16(%eax), %eax
201 jnz L(exit)
202
203 pcmpeqb (%eax), %xmm3
204 pmovmskb %xmm3, %edx
205 test %edx, %edx
206 lea 16(%eax), %eax
207 jnz L(exit)
208
209 and $-0x40, %eax
210 PUSH (%esi)
211 PUSH (%edi)
212 PUSH (%ebx)
213 PUSH (%ebp)
214 xor %ebp, %ebp
215L(aligned_64):
216 pcmpeqb (%eax), %xmm0
217 pcmpeqb 16(%eax), %xmm1
218 pcmpeqb 32(%eax), %xmm2
219 pcmpeqb 48(%eax), %xmm3
220 pmovmskb %xmm0, %edx
221 pmovmskb %xmm1, %esi
222 pmovmskb %xmm2, %edi
223 pmovmskb %xmm3, %ebx
224 or %edx, %ebp
225 or %esi, %ebp
226 or %edi, %ebp
227 or %ebx, %ebp
228 lea 64(%eax), %eax
229 jz L(aligned_64)
230L(48leave):
231 test %edx, %edx
232 jnz L(aligned_64_exit_16)
233 test %esi, %esi
234 jnz L(aligned_64_exit_32)
235 test %edi, %edi
236 jnz L(aligned_64_exit_48)
237 mov %ebx, %edx
238 lea (%eax), %eax
239 jmp L(aligned_64_exit)
240L(aligned_64_exit_48):
241 lea -16(%eax), %eax
242 mov %edi, %edx
243 jmp L(aligned_64_exit)
244L(aligned_64_exit_32):
245 lea -32(%eax), %eax
246 mov %esi, %edx
247 jmp L(aligned_64_exit)
248L(aligned_64_exit_16):
249 lea -48(%eax), %eax
250L(aligned_64_exit):
251 POP (%ebp)
252 POP (%ebx)
253 POP (%edi)
254 POP (%esi)
255L(exit):
256 sub %ecx, %eax
257 test %dl, %dl
258 jz L(exit_high)
259 test $0x01, %dl
260 jnz L(exit_tail0)
261
262 test $0x02, %dl
263 jnz L(exit_tail1)
264
265 test $0x04, %dl
266 jnz L(exit_tail2)
267
268 test $0x08, %dl
269 jnz L(exit_tail3)
270
271 test $0x10, %dl
272 jnz L(exit_tail4)
273
274 test $0x20, %dl
275 jnz L(exit_tail5)
276
277 test $0x40, %dl
278 jnz L(exit_tail6)
279 add $7, %eax
280L(exit_tail0):
281 RETURN
282
283L(exit_high):
284 add $8, %eax
285 test $0x01, %dh
286 jnz L(exit_tail0)
287
288 test $0x02, %dh
289 jnz L(exit_tail1)
290
291 test $0x04, %dh
292 jnz L(exit_tail2)
293
294 test $0x08, %dh
295 jnz L(exit_tail3)
296
297 test $0x10, %dh
298 jnz L(exit_tail4)
299
300 test $0x20, %dh
301 jnz L(exit_tail5)
302
303 test $0x40, %dh
304 jnz L(exit_tail6)
305 add $7, %eax
306 RETURN
307
308 .p2align 4
309L(exit_tail1):
310 add $1, %eax
311 RETURN
312
313L(exit_tail2):
314 add $2, %eax
315 RETURN
316
317L(exit_tail3):
318 add $3, %eax
319 RETURN
320
321L(exit_tail4):
322 add $4, %eax
323 RETURN
324
325L(exit_tail5):
326 add $5, %eax
327 RETURN
328
329L(exit_tail6):
330 add $6, %eax
331 RETURN
332
333L(exit_tail7):
334 add $7, %eax
335 RETURN
336
337L(exit_tail8):
338 add $8, %eax
339 RETURN
340
341L(exit_tail9):
342 add $9, %eax
343 RETURN
344
345L(exit_tail10):
346 add $10, %eax
347 RETURN
348
349L(exit_tail11):
350 add $11, %eax
351 RETURN
352
353L(exit_tail12):
354 add $12, %eax
355 RETURN
356
357L(exit_tail13):
358 add $13, %eax
359 RETURN
360
361L(exit_tail14):
362 add $14, %eax
363 RETURN
364
365L(exit_tail15):
366 add $15, %eax
367 ret
368
369END (STRLEN)