blob: 6a6ad519ed8c088f08e22de72d3d9341ff0871a1 [file] [log] [blame]
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04001/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef USE_AS_WCSCAT
32
33# ifndef L
34# define L(label) .L##label
35# endif
36
37# ifndef cfi_startproc
38# define cfi_startproc .cfi_startproc
39# endif
40
41# ifndef cfi_endproc
42# define cfi_endproc .cfi_endproc
43# endif
44
45# ifndef ENTRY
46# define ENTRY(name) \
47 .type name, @function; \
48 .globl name; \
49 .p2align 4; \
50name: \
51 cfi_startproc
52# endif
53
54# ifndef END
55# define END(name) \
56 cfi_endproc; \
57 .size name, .-name
58# endif
59
60# define PARMS 4
61# define STR PARMS
62# define RETURN ret
63
64 .text
65ENTRY (wcslen)
66 mov STR(%esp), %edx
67#endif
68 cmp $0, (%edx)
69 jz L(exit_tail0)
70 cmp $0, 4(%edx)
71 jz L(exit_tail1)
72 cmp $0, 8(%edx)
73 jz L(exit_tail2)
74 cmp $0, 12(%edx)
75 jz L(exit_tail3)
76 cmp $0, 16(%edx)
77 jz L(exit_tail4)
78 cmp $0, 20(%edx)
79 jz L(exit_tail5)
80 cmp $0, 24(%edx)
81 jz L(exit_tail6)
82 cmp $0, 28(%edx)
83 jz L(exit_tail7)
84
85 pxor %xmm0, %xmm0
86
87 lea 32(%edx), %eax
88 lea -16(%eax), %ecx
89 and $-16, %eax
90
91 pcmpeqd (%eax), %xmm0
92 pmovmskb %xmm0, %edx
93 pxor %xmm1, %xmm1
94 lea 16(%eax), %eax
95 test %edx, %edx
96 jnz L(exit)
97
98 pcmpeqd (%eax), %xmm1
99 pmovmskb %xmm1, %edx
100 pxor %xmm2, %xmm2
101 lea 16(%eax), %eax
102 test %edx, %edx
103 jnz L(exit)
104
105 pcmpeqd (%eax), %xmm2
106 pmovmskb %xmm2, %edx
107 pxor %xmm3, %xmm3
108 lea 16(%eax), %eax
109 test %edx, %edx
110 jnz L(exit)
111
112 pcmpeqd (%eax), %xmm3
113 pmovmskb %xmm3, %edx
114 lea 16(%eax), %eax
115 test %edx, %edx
116 jnz L(exit)
117
118 pcmpeqd (%eax), %xmm0
119 pmovmskb %xmm0, %edx
120 lea 16(%eax), %eax
121 test %edx, %edx
122 jnz L(exit)
123
124 pcmpeqd (%eax), %xmm1
125 pmovmskb %xmm1, %edx
126 lea 16(%eax), %eax
127 test %edx, %edx
128 jnz L(exit)
129
130 pcmpeqd (%eax), %xmm2
131 pmovmskb %xmm2, %edx
132 lea 16(%eax), %eax
133 test %edx, %edx
134 jnz L(exit)
135
136 pcmpeqd (%eax), %xmm3
137 pmovmskb %xmm3, %edx
138 lea 16(%eax), %eax
139 test %edx, %edx
140 jnz L(exit)
141
142 pcmpeqd (%eax), %xmm0
143 pmovmskb %xmm0, %edx
144 lea 16(%eax), %eax
145 test %edx, %edx
146 jnz L(exit)
147
148 pcmpeqd (%eax), %xmm1
149 pmovmskb %xmm1, %edx
150 lea 16(%eax), %eax
151 test %edx, %edx
152 jnz L(exit)
153
154 pcmpeqd (%eax), %xmm2
155 pmovmskb %xmm2, %edx
156 lea 16(%eax), %eax
157 test %edx, %edx
158 jnz L(exit)
159
160 pcmpeqd (%eax), %xmm3
161 pmovmskb %xmm3, %edx
162 lea 16(%eax), %eax
163 test %edx, %edx
164 jnz L(exit)
165
166 pcmpeqd (%eax), %xmm0
167 pmovmskb %xmm0, %edx
168 lea 16(%eax), %eax
169 test %edx, %edx
170 jnz L(exit)
171
172 pcmpeqd (%eax), %xmm1
173 pmovmskb %xmm1, %edx
174 lea 16(%eax), %eax
175 test %edx, %edx
176 jnz L(exit)
177
178 pcmpeqd (%eax), %xmm2
179 pmovmskb %xmm2, %edx
180 lea 16(%eax), %eax
181 test %edx, %edx
182 jnz L(exit)
183
184 pcmpeqd (%eax), %xmm3
185 pmovmskb %xmm3, %edx
186 lea 16(%eax), %eax
187 test %edx, %edx
188 jnz L(exit)
189
190 and $-0x40, %eax
191
192 .p2align 4
193L(aligned_64_loop):
194 movaps (%eax), %xmm0
195 movaps 16(%eax), %xmm1
196 movaps 32(%eax), %xmm2
197 movaps 48(%eax), %xmm6
198
199 pminub %xmm1, %xmm0
200 pminub %xmm6, %xmm2
201 pminub %xmm0, %xmm2
202 pcmpeqd %xmm3, %xmm2
203 pmovmskb %xmm2, %edx
204 lea 64(%eax), %eax
205 test %edx, %edx
206 jz L(aligned_64_loop)
207
208 pcmpeqd -64(%eax), %xmm3
209 pmovmskb %xmm3, %edx
210 lea 48(%ecx), %ecx
211 test %edx, %edx
212 jnz L(exit)
213
214 pcmpeqd %xmm1, %xmm3
215 pmovmskb %xmm3, %edx
216 lea -16(%ecx), %ecx
217 test %edx, %edx
218 jnz L(exit)
219
220 pcmpeqd -32(%eax), %xmm3
221 pmovmskb %xmm3, %edx
222 lea -16(%ecx), %ecx
223 test %edx, %edx
224 jnz L(exit)
225
226 pcmpeqd %xmm6, %xmm3
227 pmovmskb %xmm3, %edx
228 lea -16(%ecx), %ecx
229 test %edx, %edx
230 jnz L(exit)
231
232 jmp L(aligned_64_loop)
233
234 .p2align 4
235L(exit):
236 sub %ecx, %eax
237 shr $2, %eax
238 test %dl, %dl
239 jz L(exit_high)
240
241 mov %dl, %cl
242 and $15, %cl
243 jz L(exit_1)
244 RETURN
245
246 .p2align 4
247L(exit_high):
248 mov %dh, %ch
249 and $15, %ch
250 jz L(exit_3)
251 add $2, %eax
252 RETURN
253
254 .p2align 4
255L(exit_1):
256 add $1, %eax
257 RETURN
258
259 .p2align 4
260L(exit_3):
261 add $3, %eax
262 RETURN
263
264 .p2align 4
265L(exit_tail0):
266 xor %eax, %eax
267 RETURN
268
269 .p2align 4
270L(exit_tail1):
271 mov $1, %eax
272 RETURN
273
274 .p2align 4
275L(exit_tail2):
276 mov $2, %eax
277 RETURN
278
279 .p2align 4
280L(exit_tail3):
281 mov $3, %eax
282 RETURN
283
284 .p2align 4
285L(exit_tail4):
286 mov $4, %eax
287 RETURN
288
289 .p2align 4
290L(exit_tail5):
291 mov $5, %eax
292 RETURN
293
294 .p2align 4
295L(exit_tail6):
296 mov $6, %eax
297 RETURN
298
299 .p2align 4
300L(exit_tail7):
301 mov $7, %eax
302#ifndef USE_AS_WCSCAT
303 RETURN
304
305END (wcslen)
306#endif