blob: 3772fe77043ab336997d19e71b48b014b6711e8c [file] [log] [blame]
Varvara Rainchika020a242014-04-29 17:44:56 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef USE_AS_STRCAT
32
33#ifndef STRLEN
34# define STRLEN strlen
35#endif
36
37#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43#endif
44
45#ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47#endif
48
49#ifndef ENTRY
50# define ENTRY(name) \
51 .type name, @function; \
52 .globl name; \
53 .p2align 4; \
54name: \
55 cfi_startproc
56#endif
57
58#ifndef END
59# define END(name) \
60 cfi_endproc; \
61 .size name, .-name
62#endif
63#define RETURN ret
64 .section .text.sse2,"ax",@progbits
65ENTRY (STRLEN)
66/* end ifndef USE_AS_STRCAT */
67#endif
68 xor %rax, %rax
69 mov %edi, %ecx
70 and $0x3f, %ecx
71 pxor %xmm0, %xmm0
72 cmp $0x30, %ecx
73 ja L(next)
74 movdqu (%rdi), %xmm1
75 pcmpeqb %xmm1, %xmm0
76 pmovmskb %xmm0, %edx
77 test %edx, %edx
78 jnz L(exit_less16)
79 mov %rdi, %rax
80 and $-16, %rax
81 jmp L(align16_start)
82L(next):
83 mov %rdi, %rax
84 and $-16, %rax
85 pcmpeqb (%rax), %xmm0
86 mov $-1, %r10d
87 sub %rax, %rcx
88 shl %cl, %r10d
89 pmovmskb %xmm0, %edx
90 and %r10d, %edx
91 jnz L(exit)
92L(align16_start):
93 pxor %xmm0, %xmm0
94 pxor %xmm1, %xmm1
95 pxor %xmm2, %xmm2
96 pxor %xmm3, %xmm3
97 pcmpeqb 16(%rax), %xmm0
98 pmovmskb %xmm0, %edx
99 test %edx, %edx
100 jnz L(exit16)
101
102 pcmpeqb 32(%rax), %xmm1
103 pmovmskb %xmm1, %edx
104 test %edx, %edx
105 jnz L(exit32)
106
107 pcmpeqb 48(%rax), %xmm2
108 pmovmskb %xmm2, %edx
109 test %edx, %edx
110 jnz L(exit48)
111
112 pcmpeqb 64(%rax), %xmm3
113 pmovmskb %xmm3, %edx
114 test %edx, %edx
115 jnz L(exit64)
116
117 pcmpeqb 80(%rax), %xmm0
118 add $64, %rax
119 pmovmskb %xmm0, %edx
120 test %edx, %edx
121 jnz L(exit16)
122
123 pcmpeqb 32(%rax), %xmm1
124 pmovmskb %xmm1, %edx
125 test %edx, %edx
126 jnz L(exit32)
127
128 pcmpeqb 48(%rax), %xmm2
129 pmovmskb %xmm2, %edx
130 test %edx, %edx
131 jnz L(exit48)
132
133 pcmpeqb 64(%rax), %xmm3
134 pmovmskb %xmm3, %edx
135 test %edx, %edx
136 jnz L(exit64)
137
138 pcmpeqb 80(%rax), %xmm0
139 add $64, %rax
140 pmovmskb %xmm0, %edx
141 test %edx, %edx
142 jnz L(exit16)
143
144 pcmpeqb 32(%rax), %xmm1
145 pmovmskb %xmm1, %edx
146 test %edx, %edx
147 jnz L(exit32)
148
149 pcmpeqb 48(%rax), %xmm2
150 pmovmskb %xmm2, %edx
151 test %edx, %edx
152 jnz L(exit48)
153
154 pcmpeqb 64(%rax), %xmm3
155 pmovmskb %xmm3, %edx
156 test %edx, %edx
157 jnz L(exit64)
158
159 pcmpeqb 80(%rax), %xmm0
160 add $64, %rax
161 pmovmskb %xmm0, %edx
162 test %edx, %edx
163 jnz L(exit16)
164
165 pcmpeqb 32(%rax), %xmm1
166 pmovmskb %xmm1, %edx
167 test %edx, %edx
168 jnz L(exit32)
169
170 pcmpeqb 48(%rax), %xmm2
171 pmovmskb %xmm2, %edx
172 test %edx, %edx
173 jnz L(exit48)
174
175 pcmpeqb 64(%rax), %xmm3
176 pmovmskb %xmm3, %edx
177 test %edx, %edx
178 jnz L(exit64)
179
180
181 test $0x3f, %rax
182 jz L(align64_loop)
183
184 pcmpeqb 80(%rax), %xmm0
185 add $80, %rax
186 pmovmskb %xmm0, %edx
187 test %edx, %edx
188 jnz L(exit)
189
190 test $0x3f, %rax
191 jz L(align64_loop)
192
193 pcmpeqb 16(%rax), %xmm1
194 add $16, %rax
195 pmovmskb %xmm1, %edx
196 test %edx, %edx
197 jnz L(exit)
198
199 test $0x3f, %rax
200 jz L(align64_loop)
201
202 pcmpeqb 16(%rax), %xmm2
203 add $16, %rax
204 pmovmskb %xmm2, %edx
205 test %edx, %edx
206 jnz L(exit)
207
208 test $0x3f, %rax
209 jz L(align64_loop)
210
211 pcmpeqb 16(%rax), %xmm3
212 add $16, %rax
213 pmovmskb %xmm3, %edx
214 test %edx, %edx
215 jnz L(exit)
216
217 add $16, %rax
218 .p2align 4
219 L(align64_loop):
220 movaps (%rax), %xmm4
221 pminub 16(%rax), %xmm4
222 movaps 32(%rax), %xmm5
223 pminub 48(%rax), %xmm5
224 add $64, %rax
225 pminub %xmm4, %xmm5
226 pcmpeqb %xmm0, %xmm5
227 pmovmskb %xmm5, %edx
228 test %edx, %edx
229 jz L(align64_loop)
230
231
232 pcmpeqb -64(%rax), %xmm0
233 sub $80, %rax
234 pmovmskb %xmm0, %edx
235 test %edx, %edx
236 jnz L(exit16)
237
238 pcmpeqb 32(%rax), %xmm1
239 pmovmskb %xmm1, %edx
240 test %edx, %edx
241 jnz L(exit32)
242
243 pcmpeqb 48(%rax), %xmm2
244 pmovmskb %xmm2, %edx
245 test %edx, %edx
246 jnz L(exit48)
247
248 pcmpeqb 64(%rax), %xmm3
249 pmovmskb %xmm3, %edx
250 sub %rdi, %rax
251 bsf %rdx, %rdx
252 add %rdx, %rax
253 add $64, %rax
254 RETURN
255
256 .p2align 4
257L(exit):
258 sub %rdi, %rax
259L(exit_less16):
260 bsf %rdx, %rdx
261 add %rdx, %rax
262 RETURN
263 .p2align 4
264L(exit16):
265 sub %rdi, %rax
266 bsf %rdx, %rdx
267 add %rdx, %rax
268 add $16, %rax
269 RETURN
270 .p2align 4
271L(exit32):
272 sub %rdi, %rax
273 bsf %rdx, %rdx
274 add %rdx, %rax
275 add $32, %rax
276 RETURN
277 .p2align 4
278L(exit48):
279 sub %rdi, %rax
280 bsf %rdx, %rdx
281 add %rdx, %rax
282 add $48, %rax
283 RETURN
284 .p2align 4
285L(exit64):
286 sub %rdi, %rax
287 bsf %rdx, %rdx
288 add %rdx, %rax
289 add $64, %rax
290#ifndef USE_AS_STRCAT
291 RETURN
292
293END (STRLEN)
294#endif