blob: 7fb428391573bdc89b0e33562dcdd4c506f8a194 [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
Kenny Root420878c2011-02-16 11:55:58 -080030#include <machine/asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080031
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010032
33#ifdef HAVE_32_BYTE_CACHE_LINE
34#define CACHE_LINE_SIZE 32
35#else
36#define CACHE_LINE_SIZE 64
37#endif
38
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080039/*
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010040 * Optimized memcmp() for Cortex-A9.
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080041 */
42
Kenny Root420878c2011-02-16 11:55:58 -080043ENTRY(memcmp)
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010044 pld [r0, #(CACHE_LINE_SIZE * 0)]
45 pld [r0, #(CACHE_LINE_SIZE * 1)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080046
47 /* take of the case where length is 0 or the buffers are the same */
48 cmp r0, r1
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080049 moveq r0, #0
50 bxeq lr
51
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010052 pld [r1, #(CACHE_LINE_SIZE * 0)]
53 pld [r1, #(CACHE_LINE_SIZE * 1)]
54
55 /* make sure we have at least 8+4 bytes, this simplify things below
56 * and avoid some overhead for small blocks
57 */
58 cmp r2, #(8+4)
59 bmi 10f
60/*
61 * Neon optimization
62 * Comparing 32 bytes at a time
63 */
64#if defined(__ARM_NEON__) && defined(NEON_UNALIGNED_ACCESS)
65 subs r2, r2, #32
66 blo 3f
67
68 /* preload all the cache lines we need. */
69 pld [r0, #(CACHE_LINE_SIZE * 2)]
70 pld [r1, #(CACHE_LINE_SIZE * 2)]
71
721: /* The main loop compares 32 bytes at a time */
73 vld1.8 {d0 - d3}, [r0]!
74 pld [r0, #(CACHE_LINE_SIZE * 2)]
75 vld1.8 {d4 - d7}, [r1]!
76 pld [r1, #(CACHE_LINE_SIZE * 2)]
77
78 /* Start subtracting the values and merge results */
79 vsub.i8 q0, q2
80 vsub.i8 q1, q3
81 vorr q2, q0, q1
82 vorr d4, d5
83 vmov r3, ip, d4
84 /* Check if there are any differences among the 32 bytes */
85 orrs r3, ip
86 bne 2f
87 subs r2, r2, #32
88 bhs 1b
89 b 3f
902:
91 /* Check if the difference was in the first or last 16 bytes */
92 sub r0, #32
93 vorr d0, d1
94 sub r1, #32
95 vmov r3, ip, d0
96 orrs r3, ip
97 /* if the first 16 bytes are equal, we only have to rewind 16 bytes */
98 ittt eq
99 subeq r2, #16
100 addeq r0, #16
101 addeq r1, #16
102
1033: /* fix-up the remaining count */
104 add r2, r2, #32
105
106 cmp r2, #(8+4)
107 bmi 10f
108#endif
109
Ben Chengbd192b42009-09-15 13:41:14 -0700110 .save {r4, lr}
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800111 /* save registers */
112 stmfd sp!, {r4, lr}
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800113
114 /* since r0 hold the result, move the first source
115 * pointer somewhere else
116 */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800117 mov r4, r0
Elliott Hughes67195002013-02-13 15:12:32 -0800118
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800119 /* align first pointer to word boundary
120 * offset = -src & 3
121 */
122 rsb r3, r4, #0
123 ands r3, r3, #3
124 beq 0f
125
126 /* align first pointer */
127 sub r2, r2, r3
1281: ldrb r0, [r4], #1
129 ldrb ip, [r1], #1
130 subs r0, r0, ip
131 bne 9f
132 subs r3, r3, #1
133 bne 1b
134
135
1360: /* here the first pointer is aligned, and we have at least 4 bytes
137 * to process.
138 */
139
140 /* see if the pointers are congruent */
141 eor r0, r4, r1
142 ands r0, r0, #3
143 bne 5f
144
145 /* congruent case, 32 bytes per iteration
146 * We need to make sure there are at least 32+4 bytes left
147 * because we effectively read ahead one word, and we could
148 * read past the buffer (and segfault) if we're not careful.
149 */
150
151 ldr ip, [r1]
152 subs r2, r2, #(32 + 4)
153 bmi 1f
Elliott Hughes67195002013-02-13 15:12:32 -0800154
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01001550: pld [r4, #(CACHE_LINE_SIZE * 2)]
156 pld [r1, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800157 ldr r0, [r4], #4
158 ldr lr, [r1, #4]!
159 eors r0, r0, ip
160 ldreq r0, [r4], #4
161 ldreq ip, [r1, #4]!
162 eoreqs r0, r0, lr
163 ldreq r0, [r4], #4
164 ldreq lr, [r1, #4]!
165 eoreqs r0, r0, ip
166 ldreq r0, [r4], #4
167 ldreq ip, [r1, #4]!
168 eoreqs r0, r0, lr
169 ldreq r0, [r4], #4
170 ldreq lr, [r1, #4]!
171 eoreqs r0, r0, ip
172 ldreq r0, [r4], #4
173 ldreq ip, [r1, #4]!
174 eoreqs r0, r0, lr
175 ldreq r0, [r4], #4
176 ldreq lr, [r1, #4]!
177 eoreqs r0, r0, ip
178 ldreq r0, [r4], #4
179 ldreq ip, [r1, #4]!
180 eoreqs r0, r0, lr
Elliott Hughes67195002013-02-13 15:12:32 -0800181 bne 2f
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800182 subs r2, r2, #32
183 bhs 0b
184
185 /* do we have at least 4 bytes left? */
1861: adds r2, r2, #(32 - 4 + 4)
187 bmi 4f
Elliott Hughes67195002013-02-13 15:12:32 -0800188
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800189 /* finish off 4 bytes at a time */
1903: ldr r0, [r4], #4
191 ldr ip, [r1], #4
192 eors r0, r0, ip
193 bne 2f
194 subs r2, r2, #4
195 bhs 3b
196
197 /* are we done? */
1984: adds r2, r2, #4
199 moveq r0, #0
200 beq 9f
201
202 /* finish off the remaining bytes */
203 b 8f
204
2052: /* the last 4 bytes are different, restart them */
206 sub r4, r4, #4
207 sub r1, r1, #4
208 mov r2, #4
209
210 /* process the last few bytes */
2118: ldrb r0, [r4], #1
212 ldrb ip, [r1], #1
213 // stall
214 subs r0, r0, ip
215 bne 9f
216 subs r2, r2, #1
217 bne 8b
218
2199: /* restore registers and return */
220 ldmfd sp!, {r4, lr}
221 bx lr
Henrik Smiding3ebd31c2010-11-05 15:09:37 +0100222
22310: /* process less than 12 bytes */
224 cmp r2, #0
225 moveq r0, #0
226 bxeq lr
227 mov r3, r0
22811:
229 ldrb r0, [r3], #1
230 ldrb ip, [r1], #1
231 subs r0, ip
232 bxne lr
233 subs r2, r2, #1
234 bne 11b
235 bx lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800236
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08002375: /*************** non-congruent case ***************/
Elliott Hughes67195002013-02-13 15:12:32 -0800238 and r0, r1, #3
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800239 cmp r0, #2
240 bne 4f
241
242 /* here, offset is 2 (16-bits aligned, special cased) */
Elliott Hughes67195002013-02-13 15:12:32 -0800243
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800244 /* make sure we have at least 16 bytes to process */
245 subs r2, r2, #16
246 addmi r2, r2, #16
247 bmi 8b
248
249 /* align the unaligned pointer */
250 bic r1, r1, #3
251 ldr lr, [r1], #4
252
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01002536: pld [r1, #(CACHE_LINE_SIZE * 2)]
254 pld [r4, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800255 mov ip, lr, lsr #16
256 ldr lr, [r1], #4
257 ldr r0, [r4], #4
258 orr ip, ip, lr, lsl #16
259 eors r0, r0, ip
260 moveq ip, lr, lsr #16
261 ldreq lr, [r1], #4
262 ldreq r0, [r4], #4
263 orreq ip, ip, lr, lsl #16
264 eoreqs r0, r0, ip
265 moveq ip, lr, lsr #16
266 ldreq lr, [r1], #4
267 ldreq r0, [r4], #4
268 orreq ip, ip, lr, lsl #16
269 eoreqs r0, r0, ip
270 moveq ip, lr, lsr #16
271 ldreq lr, [r1], #4
272 ldreq r0, [r4], #4
273 orreq ip, ip, lr, lsl #16
274 eoreqs r0, r0, ip
275 bne 7f
276 subs r2, r2, #16
277 bhs 6b
278 sub r1, r1, #2
279 /* are we done? */
280 adds r2, r2, #16
281 moveq r0, #0
282 beq 9b
283 /* finish off the remaining bytes */
284 b 8b
285
2867: /* fix up the 2 pointers and fallthrough... */
287 sub r1, r1, #(4+2)
288 sub r4, r4, #4
289 mov r2, #4
290 b 8b
291
292
2934: /*************** offset is 1 or 3 (less optimized) ***************/
294
295 stmfd sp!, {r5, r6, r7}
296
297 // r5 = rhs
298 // r6 = lhs
299 // r7 = scratch
300
301 mov r5, r0, lsl #3 /* r5 = right shift */
302 rsb r6, r5, #32 /* r6 = left shift */
303
304 /* align the unaligned pointer */
305 bic r1, r1, #3
306 ldr r7, [r1], #4
307 sub r2, r2, #8
308
3096: mov ip, r7, lsr r5
310 ldr r7, [r1], #4
311 ldr r0, [r4], #4
312 orr ip, ip, r7, lsl r6
313 eors r0, r0, ip
314 moveq ip, r7, lsr r5
315 ldreq r7, [r1], #4
316 ldreq r0, [r4], #4
317 orreq ip, ip, r7, lsl r6
318 eoreqs r0, r0, ip
319 bne 7f
320 subs r2, r2, #8
321 bhs 6b
322
323 sub r1, r1, r6, lsr #3
324 ldmfd sp!, {r5, r6, r7}
325
326 /* are we done? */
327 adds r2, r2, #8
328 moveq r0, #0
329 beq 9b
330
331 /* finish off the remaining bytes */
332 b 8b
333
3347: /* fix up the 2 pointers and fallthrough... */
335 sub r1, r1, #4
336 sub r1, r1, r6, lsr #3
337 sub r4, r4, #4
338 mov r2, #4
339 ldmfd sp!, {r5, r6, r7}
340 b 8b
Elliott Hughes67195002013-02-13 15:12:32 -0800341END(memcmp)