blob: 0dc3af0cabbafe4448957e1fcfa10d39686dac29 [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
Kenny Root420878c2011-02-16 11:55:58 -080030#include <machine/asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080031
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010032
33#ifdef HAVE_32_BYTE_CACHE_LINE
34#define CACHE_LINE_SIZE 32
35#else
36#define CACHE_LINE_SIZE 64
37#endif
38
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080039/*
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010040 * Optimized memcmp() for Cortex-A9.
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080041 */
42
Kenny Root420878c2011-02-16 11:55:58 -080043ENTRY(memcmp)
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010044 pld [r0, #(CACHE_LINE_SIZE * 0)]
45 pld [r0, #(CACHE_LINE_SIZE * 1)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080046
47 /* take of the case where length is 0 or the buffers are the same */
48 cmp r0, r1
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080049 moveq r0, #0
50 bxeq lr
51
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010052 pld [r1, #(CACHE_LINE_SIZE * 0)]
53 pld [r1, #(CACHE_LINE_SIZE * 1)]
54
55 /* make sure we have at least 8+4 bytes, this simplify things below
56 * and avoid some overhead for small blocks
57 */
58 cmp r2, #(8+4)
59 bmi 10f
60/*
61 * Neon optimization
62 * Comparing 32 bytes at a time
63 */
64#if defined(__ARM_NEON__) && defined(NEON_UNALIGNED_ACCESS)
65 subs r2, r2, #32
66 blo 3f
67
68 /* preload all the cache lines we need. */
69 pld [r0, #(CACHE_LINE_SIZE * 2)]
70 pld [r1, #(CACHE_LINE_SIZE * 2)]
71
721: /* The main loop compares 32 bytes at a time */
73 vld1.8 {d0 - d3}, [r0]!
74 pld [r0, #(CACHE_LINE_SIZE * 2)]
75 vld1.8 {d4 - d7}, [r1]!
76 pld [r1, #(CACHE_LINE_SIZE * 2)]
77
78 /* Start subtracting the values and merge results */
79 vsub.i8 q0, q2
80 vsub.i8 q1, q3
81 vorr q2, q0, q1
82 vorr d4, d5
83 vmov r3, ip, d4
84 /* Check if there are any differences among the 32 bytes */
85 orrs r3, ip
86 bne 2f
87 subs r2, r2, #32
88 bhs 1b
89 b 3f
902:
91 /* Check if the difference was in the first or last 16 bytes */
92 sub r0, #32
93 vorr d0, d1
94 sub r1, #32
95 vmov r3, ip, d0
96 orrs r3, ip
97 /* if the first 16 bytes are equal, we only have to rewind 16 bytes */
98 ittt eq
99 subeq r2, #16
100 addeq r0, #16
101 addeq r1, #16
102
1033: /* fix-up the remaining count */
104 add r2, r2, #32
105
106 cmp r2, #(8+4)
107 bmi 10f
108#endif
109
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800110 /* save registers */
111 stmfd sp!, {r4, lr}
Christopher Ferrised459702013-12-02 17:44:53 -0800112 .cfi_def_cfa_offset 8
113 .cfi_rel_offset r4, 0
114 .cfi_rel_offset lr, 4
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800115
116 /* since r0 hold the result, move the first source
117 * pointer somewhere else
118 */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800119 mov r4, r0
Elliott Hughes67195002013-02-13 15:12:32 -0800120
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800121 /* align first pointer to word boundary
122 * offset = -src & 3
123 */
124 rsb r3, r4, #0
125 ands r3, r3, #3
126 beq 0f
127
128 /* align first pointer */
129 sub r2, r2, r3
1301: ldrb r0, [r4], #1
131 ldrb ip, [r1], #1
132 subs r0, r0, ip
133 bne 9f
134 subs r3, r3, #1
135 bne 1b
136
137
1380: /* here the first pointer is aligned, and we have at least 4 bytes
139 * to process.
140 */
141
142 /* see if the pointers are congruent */
143 eor r0, r4, r1
144 ands r0, r0, #3
145 bne 5f
146
147 /* congruent case, 32 bytes per iteration
148 * We need to make sure there are at least 32+4 bytes left
149 * because we effectively read ahead one word, and we could
150 * read past the buffer (and segfault) if we're not careful.
151 */
152
153 ldr ip, [r1]
154 subs r2, r2, #(32 + 4)
155 bmi 1f
Elliott Hughes67195002013-02-13 15:12:32 -0800156
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01001570: pld [r4, #(CACHE_LINE_SIZE * 2)]
158 pld [r1, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800159 ldr r0, [r4], #4
160 ldr lr, [r1, #4]!
161 eors r0, r0, ip
162 ldreq r0, [r4], #4
163 ldreq ip, [r1, #4]!
164 eoreqs r0, r0, lr
165 ldreq r0, [r4], #4
166 ldreq lr, [r1, #4]!
167 eoreqs r0, r0, ip
168 ldreq r0, [r4], #4
169 ldreq ip, [r1, #4]!
170 eoreqs r0, r0, lr
171 ldreq r0, [r4], #4
172 ldreq lr, [r1, #4]!
173 eoreqs r0, r0, ip
174 ldreq r0, [r4], #4
175 ldreq ip, [r1, #4]!
176 eoreqs r0, r0, lr
177 ldreq r0, [r4], #4
178 ldreq lr, [r1, #4]!
179 eoreqs r0, r0, ip
180 ldreq r0, [r4], #4
181 ldreq ip, [r1, #4]!
182 eoreqs r0, r0, lr
Elliott Hughes67195002013-02-13 15:12:32 -0800183 bne 2f
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800184 subs r2, r2, #32
185 bhs 0b
186
187 /* do we have at least 4 bytes left? */
1881: adds r2, r2, #(32 - 4 + 4)
189 bmi 4f
Elliott Hughes67195002013-02-13 15:12:32 -0800190
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800191 /* finish off 4 bytes at a time */
1923: ldr r0, [r4], #4
193 ldr ip, [r1], #4
194 eors r0, r0, ip
195 bne 2f
196 subs r2, r2, #4
197 bhs 3b
198
199 /* are we done? */
2004: adds r2, r2, #4
201 moveq r0, #0
202 beq 9f
203
204 /* finish off the remaining bytes */
205 b 8f
206
2072: /* the last 4 bytes are different, restart them */
208 sub r4, r4, #4
209 sub r1, r1, #4
210 mov r2, #4
211
212 /* process the last few bytes */
2138: ldrb r0, [r4], #1
214 ldrb ip, [r1], #1
215 // stall
216 subs r0, r0, ip
217 bne 9f
218 subs r2, r2, #1
219 bne 8b
220
2219: /* restore registers and return */
222 ldmfd sp!, {r4, lr}
223 bx lr
Henrik Smiding3ebd31c2010-11-05 15:09:37 +0100224
22510: /* process less than 12 bytes */
226 cmp r2, #0
227 moveq r0, #0
228 bxeq lr
229 mov r3, r0
23011:
231 ldrb r0, [r3], #1
232 ldrb ip, [r1], #1
233 subs r0, ip
234 bxne lr
235 subs r2, r2, #1
236 bne 11b
237 bx lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800238
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08002395: /*************** non-congruent case ***************/
Elliott Hughes67195002013-02-13 15:12:32 -0800240 and r0, r1, #3
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800241 cmp r0, #2
242 bne 4f
243
244 /* here, offset is 2 (16-bits aligned, special cased) */
Elliott Hughes67195002013-02-13 15:12:32 -0800245
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800246 /* make sure we have at least 16 bytes to process */
247 subs r2, r2, #16
248 addmi r2, r2, #16
249 bmi 8b
250
251 /* align the unaligned pointer */
252 bic r1, r1, #3
253 ldr lr, [r1], #4
254
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01002556: pld [r1, #(CACHE_LINE_SIZE * 2)]
256 pld [r4, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800257 mov ip, lr, lsr #16
258 ldr lr, [r1], #4
259 ldr r0, [r4], #4
260 orr ip, ip, lr, lsl #16
261 eors r0, r0, ip
262 moveq ip, lr, lsr #16
263 ldreq lr, [r1], #4
264 ldreq r0, [r4], #4
265 orreq ip, ip, lr, lsl #16
266 eoreqs r0, r0, ip
267 moveq ip, lr, lsr #16
268 ldreq lr, [r1], #4
269 ldreq r0, [r4], #4
270 orreq ip, ip, lr, lsl #16
271 eoreqs r0, r0, ip
272 moveq ip, lr, lsr #16
273 ldreq lr, [r1], #4
274 ldreq r0, [r4], #4
275 orreq ip, ip, lr, lsl #16
276 eoreqs r0, r0, ip
277 bne 7f
278 subs r2, r2, #16
279 bhs 6b
280 sub r1, r1, #2
281 /* are we done? */
282 adds r2, r2, #16
283 moveq r0, #0
284 beq 9b
285 /* finish off the remaining bytes */
286 b 8b
287
2887: /* fix up the 2 pointers and fallthrough... */
289 sub r1, r1, #(4+2)
290 sub r4, r4, #4
291 mov r2, #4
292 b 8b
293
294
2954: /*************** offset is 1 or 3 (less optimized) ***************/
296
297 stmfd sp!, {r5, r6, r7}
298
299 // r5 = rhs
300 // r6 = lhs
301 // r7 = scratch
302
303 mov r5, r0, lsl #3 /* r5 = right shift */
304 rsb r6, r5, #32 /* r6 = left shift */
305
306 /* align the unaligned pointer */
307 bic r1, r1, #3
308 ldr r7, [r1], #4
309 sub r2, r2, #8
310
3116: mov ip, r7, lsr r5
312 ldr r7, [r1], #4
313 ldr r0, [r4], #4
314 orr ip, ip, r7, lsl r6
315 eors r0, r0, ip
316 moveq ip, r7, lsr r5
317 ldreq r7, [r1], #4
318 ldreq r0, [r4], #4
319 orreq ip, ip, r7, lsl r6
320 eoreqs r0, r0, ip
321 bne 7f
322 subs r2, r2, #8
323 bhs 6b
324
325 sub r1, r1, r6, lsr #3
326 ldmfd sp!, {r5, r6, r7}
327
328 /* are we done? */
329 adds r2, r2, #8
330 moveq r0, #0
331 beq 9b
332
333 /* finish off the remaining bytes */
334 b 8b
335
3367: /* fix up the 2 pointers and fallthrough... */
337 sub r1, r1, #4
338 sub r1, r1, r6, lsr #3
339 sub r4, r4, #4
340 mov r2, #4
341 ldmfd sp!, {r5, r6, r7}
342 b 8b
Elliott Hughes67195002013-02-13 15:12:32 -0800343END(memcmp)