blob: e325181f464b89c14525a742fb863151f067f38d [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60name: \
61 cfi_startproc
62#endif
63
64#ifndef END
65# define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68#endif
69
70#define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#define PARMS 8
82#define ENTRANCE PUSH(%edi)
83#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
84
85
86#define STR1 PARMS
87#define STR2 STR1+4
88
89 .text
90ENTRY (strchr)
91
92 ENTRANCE
93 mov STR1(%esp), %ecx
94 movd STR2(%esp), %xmm1
95
96 pxor %xmm2, %xmm2
97 mov %ecx, %edi
98 punpcklbw %xmm1, %xmm1
99 punpcklbw %xmm1, %xmm1
100 /* ECX has OFFSET. */
101 and $15, %ecx
102 pshufd $0, %xmm1, %xmm1
103 je L(loop)
104
105/* Handle unaligned string. */
106 and $-16, %edi
107 movdqa (%edi), %xmm0
108 pcmpeqb %xmm0, %xmm2
109 pcmpeqb %xmm1, %xmm0
110 /* Find where NULL is. */
111 pmovmskb %xmm2, %edx
112 /* Check if there is a match. */
113 pmovmskb %xmm0, %eax
114 /* Remove the leading bytes. */
115 sarl %cl, %edx
116 sarl %cl, %eax
117 test %eax, %eax
118 jz L(unaligned_no_match)
119 add %ecx, %edi
120 test %edx, %edx
121 jz L(match_case1)
122 jmp L(match_case2)
123
124 .p2align 4
125L(unaligned_no_match):
126 test %edx, %edx
127 jne L(return_null)
128
129 pxor %xmm2, %xmm2
130 add $16, %edi
131
132 .p2align 4
133/* Loop start on aligned string. */
134L(loop):
135 movdqa (%edi), %xmm0
136 pcmpeqb %xmm0, %xmm2
137 pcmpeqb %xmm1, %xmm0
138 pmovmskb %xmm2, %edx
139 pmovmskb %xmm0, %eax
140 test %eax, %eax
141 jnz L(matches)
142 test %edx, %edx
143 jnz L(return_null)
144 add $16, %edi
145
146 movdqa (%edi), %xmm0
147 pcmpeqb %xmm0, %xmm2
148 pcmpeqb %xmm1, %xmm0
149 pmovmskb %xmm2, %edx
150 pmovmskb %xmm0, %eax
151 test %eax, %eax
152 jnz L(matches)
153 test %edx, %edx
154 jnz L(return_null)
155 add $16, %edi
156
157 movdqa (%edi), %xmm0
158 pcmpeqb %xmm0, %xmm2
159 pcmpeqb %xmm1, %xmm0
160 pmovmskb %xmm2, %edx
161 pmovmskb %xmm0, %eax
162 test %eax, %eax
163 jnz L(matches)
164 test %edx, %edx
165 jnz L(return_null)
166 add $16, %edi
167
168 movdqa (%edi), %xmm0
169 pcmpeqb %xmm0, %xmm2
170 pcmpeqb %xmm1, %xmm0
171 pmovmskb %xmm2, %edx
172 pmovmskb %xmm0, %eax
173 test %eax, %eax
174 jnz L(matches)
175 test %edx, %edx
176 jnz L(return_null)
177 add $16, %edi
178 jmp L(loop)
179
180L(matches):
181 /* There is a match. First find where NULL is. */
182 test %edx, %edx
183 jz L(match_case1)
184
185 .p2align 4
186L(match_case2):
187 test %al, %al
188 jz L(match_higth_case2)
189
190 mov %al, %cl
191 and $15, %cl
192 jnz L(match_case2_4)
193
194 mov %dl, %ch
195 and $15, %ch
196 jnz L(return_null)
197
198 test $0x10, %al
199 jnz L(Exit5)
200 test $0x10, %dl
201 jnz L(return_null)
202 test $0x20, %al
203 jnz L(Exit6)
204 test $0x20, %dl
205 jnz L(return_null)
206 test $0x40, %al
207 jnz L(Exit7)
208 test $0x40, %dl
209 jnz L(return_null)
210 lea 7(%edi), %eax
211 RETURN
212
213 .p2align 4
214L(match_case2_4):
215 test $0x01, %al
216 jnz L(Exit1)
217 test $0x01, %dl
218 jnz L(return_null)
219 test $0x02, %al
220 jnz L(Exit2)
221 test $0x02, %dl
222 jnz L(return_null)
223 test $0x04, %al
224 jnz L(Exit3)
225 test $0x04, %dl
226 jnz L(return_null)
227 lea 3(%edi), %eax
228 RETURN
229
230 .p2align 4
231L(match_higth_case2):
232 test %dl, %dl
233 jnz L(return_null)
234
235 mov %ah, %cl
236 and $15, %cl
237 jnz L(match_case2_12)
238
239 mov %dh, %ch
240 and $15, %ch
241 jnz L(return_null)
242
243 test $0x10, %ah
244 jnz L(Exit13)
245 test $0x10, %dh
246 jnz L(return_null)
247 test $0x20, %ah
248 jnz L(Exit14)
249 test $0x20, %dh
250 jnz L(return_null)
251 test $0x40, %ah
252 jnz L(Exit15)
253 test $0x40, %dh
254 jnz L(return_null)
255 lea 15(%edi), %eax
256 RETURN
257
258 .p2align 4
259L(match_case2_12):
260 test $0x01, %ah
261 jnz L(Exit9)
262 test $0x01, %dh
263 jnz L(return_null)
264 test $0x02, %ah
265 jnz L(Exit10)
266 test $0x02, %dh
267 jnz L(return_null)
268 test $0x04, %ah
269 jnz L(Exit11)
270 test $0x04, %dh
271 jnz L(return_null)
272 lea 11(%edi), %eax
273 RETURN
274
275 .p2align 4
276L(match_case1):
277 test %al, %al
278 jz L(match_higth_case1)
279
280 test $0x01, %al
281 jnz L(Exit1)
282 test $0x02, %al
283 jnz L(Exit2)
284 test $0x04, %al
285 jnz L(Exit3)
286 test $0x08, %al
287 jnz L(Exit4)
288 test $0x10, %al
289 jnz L(Exit5)
290 test $0x20, %al
291 jnz L(Exit6)
292 test $0x40, %al
293 jnz L(Exit7)
294 lea 7(%edi), %eax
295 RETURN
296
297 .p2align 4
298L(match_higth_case1):
299 test $0x01, %ah
300 jnz L(Exit9)
301 test $0x02, %ah
302 jnz L(Exit10)
303 test $0x04, %ah
304 jnz L(Exit11)
305 test $0x08, %ah
306 jnz L(Exit12)
307 test $0x10, %ah
308 jnz L(Exit13)
309 test $0x20, %ah
310 jnz L(Exit14)
311 test $0x40, %ah
312 jnz L(Exit15)
313 lea 15(%edi), %eax
314 RETURN
315
316 .p2align 4
317L(Exit1):
318 lea (%edi), %eax
319 RETURN
320
321 .p2align 4
322L(Exit2):
323 lea 1(%edi), %eax
324 RETURN
325
326 .p2align 4
327L(Exit3):
328 lea 2(%edi), %eax
329 RETURN
330
331 .p2align 4
332L(Exit4):
333 lea 3(%edi), %eax
334 RETURN
335
336 .p2align 4
337L(Exit5):
338 lea 4(%edi), %eax
339 RETURN
340
341 .p2align 4
342L(Exit6):
343 lea 5(%edi), %eax
344 RETURN
345
346 .p2align 4
347L(Exit7):
348 lea 6(%edi), %eax
349 RETURN
350
351 .p2align 4
352L(Exit9):
353 lea 8(%edi), %eax
354 RETURN
355
356 .p2align 4
357L(Exit10):
358 lea 9(%edi), %eax
359 RETURN
360
361 .p2align 4
362L(Exit11):
363 lea 10(%edi), %eax
364 RETURN
365
366 .p2align 4
367L(Exit12):
368 lea 11(%edi), %eax
369 RETURN
370
371 .p2align 4
372L(Exit13):
373 lea 12(%edi), %eax
374 RETURN
375
376 .p2align 4
377L(Exit14):
378 lea 13(%edi), %eax
379 RETURN
380
381 .p2align 4
382L(Exit15):
383 lea 14(%edi), %eax
384 RETURN
385
386 .p2align 4
387L(return_null):
388 xor %eax, %eax
389 RETURN
390
391END (strchr)