blob: 729302bcd8a4075365606ee0be8691458d0e02fd [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg) .cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60name: \
61 cfi_startproc
62#endif
63
64#ifndef END
65# define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68#endif
69
70#define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#define PARMS 4
82
83
84#define STR1 PARMS
85#define STR2 STR1+4
86
87 .text
88ENTRY (wcschr)
89
90 mov STR1(%esp), %ecx
91 movd STR2(%esp), %xmm1
92
93 mov %ecx, %eax
94 punpckldq %xmm1, %xmm1
95 pxor %xmm2, %xmm2
96 punpckldq %xmm1, %xmm1
97
98 and $63, %eax
99 cmp $48, %eax
100 ja L(cross_cache)
101
102 movdqu (%ecx), %xmm0
103 pcmpeqd %xmm0, %xmm2
104 pcmpeqd %xmm1, %xmm0
105 pmovmskb %xmm2, %edx
106 pmovmskb %xmm0, %eax
107 or %eax, %edx
108 jnz L(matches)
109 and $-16, %ecx
110 jmp L(loop)
111
112 .p2align 4
113L(cross_cache):
114 PUSH (%edi)
115 mov %ecx, %edi
116 mov %eax, %ecx
117 and $-16, %edi
118 and $15, %ecx
119 movdqa (%edi), %xmm0
120 pcmpeqd %xmm0, %xmm2
121 pcmpeqd %xmm1, %xmm0
122 pmovmskb %xmm2, %edx
123 pmovmskb %xmm0, %eax
124
125 sarl %cl, %edx
126 sarl %cl, %eax
127 test %eax, %eax
128 jz L(unaligned_no_match)
129
130 add %edi, %ecx
131 POP (%edi)
132
133 test %edx, %edx
134 jz L(match_case1)
135 test %al, %al
136 jz L(match_higth_case2)
137 test $15, %al
138 jnz L(match_case2_4)
139 test $15, %dl
140 jnz L(return_null)
141 lea 4(%ecx), %eax
142 ret
143
144 CFI_PUSH (%edi)
145
146 .p2align 4
147L(unaligned_no_match):
148 mov %edi, %ecx
149 POP (%edi)
150
151 test %edx, %edx
152 jnz L(return_null)
153
154 pxor %xmm2, %xmm2
155
156/* Loop start on aligned string. */
157 .p2align 4
158L(loop):
159 add $16, %ecx
160 movdqa (%ecx), %xmm0
161 pcmpeqd %xmm0, %xmm2
162 pcmpeqd %xmm1, %xmm0
163 pmovmskb %xmm2, %edx
164 pmovmskb %xmm0, %eax
165 or %eax, %edx
166 jnz L(matches)
167 add $16, %ecx
168
169 movdqa (%ecx), %xmm0
170 pcmpeqd %xmm0, %xmm2
171 pcmpeqd %xmm1, %xmm0
172 pmovmskb %xmm2, %edx
173 pmovmskb %xmm0, %eax
174 or %eax, %edx
175 jnz L(matches)
176 add $16, %ecx
177
178 movdqa (%ecx), %xmm0
179 pcmpeqd %xmm0, %xmm2
180 pcmpeqd %xmm1, %xmm0
181 pmovmskb %xmm2, %edx
182 pmovmskb %xmm0, %eax
183 or %eax, %edx
184 jnz L(matches)
185 add $16, %ecx
186
187 movdqa (%ecx), %xmm0
188 pcmpeqd %xmm0, %xmm2
189 pcmpeqd %xmm1, %xmm0
190 pmovmskb %xmm2, %edx
191 pmovmskb %xmm0, %eax
192 or %eax, %edx
193 jz L(loop)
194
195 .p2align 4
196L(matches):
197 pmovmskb %xmm2, %edx
198 test %eax, %eax
199 jz L(return_null)
200 test %edx, %edx
201 jz L(match_case1)
202
203 .p2align 4
204L(match_case2):
205 test %al, %al
206 jz L(match_higth_case2)
207 test $15, %al
208 jnz L(match_case2_4)
209 test $15, %dl
210 jnz L(return_null)
211 lea 4(%ecx), %eax
212 ret
213
214 .p2align 4
215L(match_case2_4):
216 mov %ecx, %eax
217 ret
218
219 .p2align 4
220L(match_higth_case2):
221 test %dl, %dl
222 jnz L(return_null)
223 test $15, %ah
224 jnz L(match_case2_12)
225 test $15, %dh
226 jnz L(return_null)
227 lea 12(%ecx), %eax
228 ret
229
230 .p2align 4
231L(match_case2_12):
232 lea 8(%ecx), %eax
233 ret
234
235 .p2align 4
236L(match_case1):
237 test %al, %al
238 jz L(match_higth_case1)
239
240 test $0x01, %al
241 jnz L(exit0)
242 lea 4(%ecx), %eax
243 ret
244
245 .p2align 4
246L(match_higth_case1):
247 test $0x01, %ah
248 jnz L(exit3)
249 lea 12(%ecx), %eax
250 ret
251
252 .p2align 4
253L(exit0):
254 mov %ecx, %eax
255 ret
256
257 .p2align 4
258L(exit3):
259 lea 8(%ecx), %eax
260 ret
261
262 .p2align 4
263L(return_null):
264 xor %eax, %eax
265 ret
266
267END (wcschr)