blob: e30779d4c770a47ffcb2515ee60be7f49d4cb04b [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60name: \
61 cfi_startproc
62#endif
63
64#ifndef END
65# define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68#endif
69
70#define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#define PARMS 8
82#define ENTRANCE PUSH(%edi);
83#define RETURN POP(%edi); ret; CFI_PUSH(%edi);
84
85#define STR1 PARMS
86#define STR2 STR1+4
87
88 .text
89ENTRY (wcsrchr)
90
91 ENTRANCE
92 mov STR1(%esp), %ecx
93 movd STR2(%esp), %xmm1
94
95 mov %ecx, %edi
96 punpckldq %xmm1, %xmm1
97 pxor %xmm2, %xmm2
98 punpckldq %xmm1, %xmm1
99
100/* ECX has OFFSET. */
101 and $63, %ecx
102 cmp $48, %ecx
103 ja L(crosscache)
104
105/* unaligned string. */
106 movdqu (%edi), %xmm0
107 pcmpeqd %xmm0, %xmm2
108 pcmpeqd %xmm1, %xmm0
109/* Find where NULL is. */
110 pmovmskb %xmm2, %ecx
111/* Check if there is a match. */
112 pmovmskb %xmm0, %eax
113 add $16, %edi
114
115 test %eax, %eax
116 jnz L(unaligned_match1)
117
118 test %ecx, %ecx
119 jnz L(return_null)
120
121 and $-16, %edi
122
123 PUSH (%esi)
124
125 xor %edx, %edx
126 jmp L(loop)
127
128 CFI_POP (%esi)
129
130 .p2align 4
131L(unaligned_match1):
132 test %ecx, %ecx
133 jnz L(prolog_find_zero_1)
134
135 PUSH (%esi)
136
137/* Save current match */
138 mov %eax, %edx
139 mov %edi, %esi
140 and $-16, %edi
141 jmp L(loop)
142
143 CFI_POP (%esi)
144
145 .p2align 4
146L(crosscache):
147/* Hancle unaligned string. */
148 and $15, %ecx
149 and $-16, %edi
150 pxor %xmm3, %xmm3
151 movdqa (%edi), %xmm0
152 pcmpeqd %xmm0, %xmm3
153 pcmpeqd %xmm1, %xmm0
154/* Find where NULL is. */
155 pmovmskb %xmm3, %edx
156/* Check if there is a match. */
157 pmovmskb %xmm0, %eax
158/* Remove the leading bytes. */
159 shr %cl, %edx
160 shr %cl, %eax
161 add $16, %edi
162
163 test %eax, %eax
164 jnz L(unaligned_match)
165
166 test %edx, %edx
167 jnz L(return_null)
168
169 PUSH (%esi)
170
171 xor %edx, %edx
172 jmp L(loop)
173
174 CFI_POP (%esi)
175
176 .p2align 4
177L(unaligned_match):
178 test %edx, %edx
179 jnz L(prolog_find_zero)
180
181 PUSH (%esi)
182
183 mov %eax, %edx
184 lea (%edi, %ecx), %esi
185
186/* Loop start on aligned string. */
187 .p2align 4
188L(loop):
189 movdqa (%edi), %xmm0
190 pcmpeqd %xmm0, %xmm2
191 add $16, %edi
192 pcmpeqd %xmm1, %xmm0
193 pmovmskb %xmm2, %ecx
194 pmovmskb %xmm0, %eax
195 or %eax, %ecx
196 jnz L(matches)
197
198 movdqa (%edi), %xmm3
199 pcmpeqd %xmm3, %xmm2
200 add $16, %edi
201 pcmpeqd %xmm1, %xmm3
202 pmovmskb %xmm2, %ecx
203 pmovmskb %xmm3, %eax
204 or %eax, %ecx
205 jnz L(matches)
206
207 movdqa (%edi), %xmm4
208 pcmpeqd %xmm4, %xmm2
209 add $16, %edi
210 pcmpeqd %xmm1, %xmm4
211 pmovmskb %xmm2, %ecx
212 pmovmskb %xmm4, %eax
213 or %eax, %ecx
214 jnz L(matches)
215
216 movdqa (%edi), %xmm5
217 pcmpeqd %xmm5, %xmm2
218 add $16, %edi
219 pcmpeqd %xmm1, %xmm5
220 pmovmskb %xmm2, %ecx
221 pmovmskb %xmm5, %eax
222 or %eax, %ecx
223 jz L(loop)
224
225 .p2align 4
226L(matches):
227 test %eax, %eax
228 jnz L(match)
229L(return_value):
230 test %edx, %edx
231 jz L(return_null_1)
232 mov %edx, %eax
233 mov %esi, %edi
234
235 POP (%esi)
236
237 test %ah, %ah
238 jnz L(match_third_or_fourth_wchar)
239 test $15 << 4, %al
240 jnz L(match_second_wchar)
241 lea -16(%edi), %eax
242 RETURN
243
244 CFI_PUSH (%esi)
245
246 .p2align 4
247L(return_null_1):
248 POP (%esi)
249
250 xor %eax, %eax
251 RETURN
252
253 CFI_PUSH (%esi)
254
255 .p2align 4
256L(match):
257 pmovmskb %xmm2, %ecx
258 test %ecx, %ecx
259 jnz L(find_zero)
260/* save match info */
261 mov %eax, %edx
262 mov %edi, %esi
263 jmp L(loop)
264
265 .p2align 4
266L(find_zero):
267 test %cl, %cl
268 jz L(find_zero_in_third_or_fourth_wchar)
269 test $15, %cl
270 jz L(find_zero_in_second_wchar)
271 and $1, %eax
272 jz L(return_value)
273
274 POP (%esi)
275
276 lea -16(%edi), %eax
277 RETURN
278
279 CFI_PUSH (%esi)
280
281 .p2align 4
282L(find_zero_in_second_wchar):
283 and $1 << 5 - 1, %eax
284 jz L(return_value)
285
286 POP (%esi)
287
288 test $15 << 4, %al
289 jnz L(match_second_wchar)
290 lea -16(%edi), %eax
291 RETURN
292
293 CFI_PUSH (%esi)
294
295 .p2align 4
296L(find_zero_in_third_or_fourth_wchar):
297 test $15, %ch
298 jz L(find_zero_in_fourth_wchar)
299 and $1 << 9 - 1, %eax
300 jz L(return_value)
301
302 POP (%esi)
303
304 test %ah, %ah
305 jnz L(match_third_wchar)
306 test $15 << 4, %al
307 jnz L(match_second_wchar)
308 lea -16(%edi), %eax
309 RETURN
310
311 CFI_PUSH (%esi)
312
313 .p2align 4
314L(find_zero_in_fourth_wchar):
315
316 POP (%esi)
317
318 test %ah, %ah
319 jnz L(match_third_or_fourth_wchar)
320 test $15 << 4, %al
321 jnz L(match_second_wchar)
322 lea -16(%edi), %eax
323 RETURN
324
325 CFI_PUSH (%esi)
326
327 .p2align 4
328L(match_second_wchar):
329 lea -12(%edi), %eax
330 RETURN
331
332 .p2align 4
333L(match_third_or_fourth_wchar):
334 test $15 << 4, %ah
335 jnz L(match_fourth_wchar)
336 lea -8(%edi), %eax
337 RETURN
338
339 .p2align 4
340L(match_third_wchar):
341 lea -8(%edi), %eax
342 RETURN
343
344 .p2align 4
345L(match_fourth_wchar):
346 lea -4(%edi), %eax
347 RETURN
348
349 .p2align 4
350L(return_null):
351 xor %eax, %eax
352 RETURN
353
354 .p2align 4
355L(prolog_find_zero):
356 add %ecx, %edi
357 mov %edx, %ecx
358L(prolog_find_zero_1):
359 test %cl, %cl
360 jz L(prolog_find_zero_in_third_or_fourth_wchar)
361 test $15, %cl
362 jz L(prolog_find_zero_in_second_wchar)
363 and $1, %eax
364 jz L(return_null)
365
366 lea -16(%edi), %eax
367 RETURN
368
369 .p2align 4
370L(prolog_find_zero_in_second_wchar):
371 and $1 << 5 - 1, %eax
372 jz L(return_null)
373
374 test $15 << 4, %al
375 jnz L(match_second_wchar)
376 lea -16(%edi), %eax
377 RETURN
378
379 .p2align 4
380L(prolog_find_zero_in_third_or_fourth_wchar):
381 test $15, %ch
382 jz L(prolog_find_zero_in_fourth_wchar)
383 and $1 << 9 - 1, %eax
384 jz L(return_null)
385
386 test %ah, %ah
387 jnz L(match_third_wchar)
388 test $15 << 4, %al
389 jnz L(match_second_wchar)
390 lea -16(%edi), %eax
391 RETURN
392
393 .p2align 4
394L(prolog_find_zero_in_fourth_wchar):
395 test %ah, %ah
396 jnz L(match_third_or_fourth_wchar)
397 test $15 << 4, %al
398 jnz L(match_second_wchar)
399 lea -16(%edi), %eax
400 RETURN
401
402END (wcsrchr)