blob: 6643d5515d8283cc339cecf7e212b9b97ad7b390 [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
Elliott Hughes851e68a2014-02-19 16:53:20 -080030#include <private/bionic_asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080031
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010032
33#ifdef HAVE_32_BYTE_CACHE_LINE
34#define CACHE_LINE_SIZE 32
35#else
36#define CACHE_LINE_SIZE 64
37#endif
38
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080039/*
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010040 * Optimized memcmp() for Cortex-A9.
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080041 */
42
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -070043.syntax unified
44
Kenny Root420878c2011-02-16 11:55:58 -080045ENTRY(memcmp)
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010046 pld [r0, #(CACHE_LINE_SIZE * 0)]
47 pld [r0, #(CACHE_LINE_SIZE * 1)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080048
49 /* take of the case where length is 0 or the buffers are the same */
50 cmp r0, r1
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080051 moveq r0, #0
52 bxeq lr
53
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010054 pld [r1, #(CACHE_LINE_SIZE * 0)]
55 pld [r1, #(CACHE_LINE_SIZE * 1)]
56
57 /* make sure we have at least 8+4 bytes, this simplify things below
58 * and avoid some overhead for small blocks
59 */
60 cmp r2, #(8+4)
61 bmi 10f
62/*
63 * Neon optimization
64 * Comparing 32 bytes at a time
65 */
66#if defined(__ARM_NEON__) && defined(NEON_UNALIGNED_ACCESS)
67 subs r2, r2, #32
68 blo 3f
69
70 /* preload all the cache lines we need. */
71 pld [r0, #(CACHE_LINE_SIZE * 2)]
72 pld [r1, #(CACHE_LINE_SIZE * 2)]
73
741: /* The main loop compares 32 bytes at a time */
75 vld1.8 {d0 - d3}, [r0]!
76 pld [r0, #(CACHE_LINE_SIZE * 2)]
77 vld1.8 {d4 - d7}, [r1]!
78 pld [r1, #(CACHE_LINE_SIZE * 2)]
79
80 /* Start subtracting the values and merge results */
81 vsub.i8 q0, q2
82 vsub.i8 q1, q3
83 vorr q2, q0, q1
84 vorr d4, d5
85 vmov r3, ip, d4
86 /* Check if there are any differences among the 32 bytes */
87 orrs r3, ip
88 bne 2f
89 subs r2, r2, #32
90 bhs 1b
91 b 3f
922:
93 /* Check if the difference was in the first or last 16 bytes */
94 sub r0, #32
95 vorr d0, d1
96 sub r1, #32
97 vmov r3, ip, d0
98 orrs r3, ip
99 /* if the first 16 bytes are equal, we only have to rewind 16 bytes */
100 ittt eq
101 subeq r2, #16
102 addeq r0, #16
103 addeq r1, #16
104
1053: /* fix-up the remaining count */
106 add r2, r2, #32
107
108 cmp r2, #(8+4)
109 bmi 10f
110#endif
111
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800112 /* save registers */
113 stmfd sp!, {r4, lr}
Christopher Ferrised459702013-12-02 17:44:53 -0800114 .cfi_def_cfa_offset 8
115 .cfi_rel_offset r4, 0
116 .cfi_rel_offset lr, 4
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800117
118 /* since r0 hold the result, move the first source
119 * pointer somewhere else
120 */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800121 mov r4, r0
Elliott Hughes67195002013-02-13 15:12:32 -0800122
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800123 /* align first pointer to word boundary
124 * offset = -src & 3
125 */
126 rsb r3, r4, #0
127 ands r3, r3, #3
128 beq 0f
129
130 /* align first pointer */
131 sub r2, r2, r3
1321: ldrb r0, [r4], #1
133 ldrb ip, [r1], #1
134 subs r0, r0, ip
135 bne 9f
136 subs r3, r3, #1
137 bne 1b
138
139
1400: /* here the first pointer is aligned, and we have at least 4 bytes
141 * to process.
142 */
143
144 /* see if the pointers are congruent */
145 eor r0, r4, r1
146 ands r0, r0, #3
147 bne 5f
148
149 /* congruent case, 32 bytes per iteration
150 * We need to make sure there are at least 32+4 bytes left
151 * because we effectively read ahead one word, and we could
152 * read past the buffer (and segfault) if we're not careful.
153 */
154
155 ldr ip, [r1]
156 subs r2, r2, #(32 + 4)
157 bmi 1f
Elliott Hughes67195002013-02-13 15:12:32 -0800158
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01001590: pld [r4, #(CACHE_LINE_SIZE * 2)]
160 pld [r1, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800161 ldr r0, [r4], #4
162 ldr lr, [r1, #4]!
163 eors r0, r0, ip
164 ldreq r0, [r4], #4
165 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700166 eorseq r0, r0, lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800167 ldreq r0, [r4], #4
168 ldreq lr, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700169 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800170 ldreq r0, [r4], #4
171 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700172 eorseq r0, r0, lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800173 ldreq r0, [r4], #4
174 ldreq lr, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700175 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800176 ldreq r0, [r4], #4
177 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700178 eorseq r0, r0, lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800179 ldreq r0, [r4], #4
180 ldreq lr, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700181 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800182 ldreq r0, [r4], #4
183 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700184 eorseq r0, r0, lr
Elliott Hughes67195002013-02-13 15:12:32 -0800185 bne 2f
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800186 subs r2, r2, #32
187 bhs 0b
188
189 /* do we have at least 4 bytes left? */
1901: adds r2, r2, #(32 - 4 + 4)
191 bmi 4f
Elliott Hughes67195002013-02-13 15:12:32 -0800192
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800193 /* finish off 4 bytes at a time */
1943: ldr r0, [r4], #4
195 ldr ip, [r1], #4
196 eors r0, r0, ip
197 bne 2f
198 subs r2, r2, #4
199 bhs 3b
200
201 /* are we done? */
2024: adds r2, r2, #4
203 moveq r0, #0
204 beq 9f
205
206 /* finish off the remaining bytes */
207 b 8f
208
2092: /* the last 4 bytes are different, restart them */
210 sub r4, r4, #4
211 sub r1, r1, #4
212 mov r2, #4
213
214 /* process the last few bytes */
2158: ldrb r0, [r4], #1
216 ldrb ip, [r1], #1
217 // stall
218 subs r0, r0, ip
219 bne 9f
220 subs r2, r2, #1
221 bne 8b
222
2239: /* restore registers and return */
Christopher Ferrise1e434a2015-07-06 12:03:40 -0700224 ldmfd sp!, {r4, pc}
Henrik Smiding3ebd31c2010-11-05 15:09:37 +0100225
22610: /* process less than 12 bytes */
227 cmp r2, #0
228 moveq r0, #0
229 bxeq lr
230 mov r3, r0
23111:
232 ldrb r0, [r3], #1
233 ldrb ip, [r1], #1
234 subs r0, ip
235 bxne lr
236 subs r2, r2, #1
237 bne 11b
238 bx lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800239
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08002405: /*************** non-congruent case ***************/
Elliott Hughes67195002013-02-13 15:12:32 -0800241 and r0, r1, #3
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800242 cmp r0, #2
243 bne 4f
244
245 /* here, offset is 2 (16-bits aligned, special cased) */
Elliott Hughes67195002013-02-13 15:12:32 -0800246
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800247 /* make sure we have at least 16 bytes to process */
248 subs r2, r2, #16
249 addmi r2, r2, #16
250 bmi 8b
251
252 /* align the unaligned pointer */
253 bic r1, r1, #3
254 ldr lr, [r1], #4
255
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01002566: pld [r1, #(CACHE_LINE_SIZE * 2)]
257 pld [r4, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800258 mov ip, lr, lsr #16
259 ldr lr, [r1], #4
260 ldr r0, [r4], #4
261 orr ip, ip, lr, lsl #16
262 eors r0, r0, ip
263 moveq ip, lr, lsr #16
264 ldreq lr, [r1], #4
265 ldreq r0, [r4], #4
266 orreq ip, ip, lr, lsl #16
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700267 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800268 moveq ip, lr, lsr #16
269 ldreq lr, [r1], #4
270 ldreq r0, [r4], #4
271 orreq ip, ip, lr, lsl #16
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700272 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800273 moveq ip, lr, lsr #16
274 ldreq lr, [r1], #4
275 ldreq r0, [r4], #4
276 orreq ip, ip, lr, lsl #16
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700277 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800278 bne 7f
279 subs r2, r2, #16
280 bhs 6b
281 sub r1, r1, #2
282 /* are we done? */
283 adds r2, r2, #16
284 moveq r0, #0
285 beq 9b
286 /* finish off the remaining bytes */
287 b 8b
288
2897: /* fix up the 2 pointers and fallthrough... */
290 sub r1, r1, #(4+2)
291 sub r4, r4, #4
292 mov r2, #4
293 b 8b
294
295
2964: /*************** offset is 1 or 3 (less optimized) ***************/
297
298 stmfd sp!, {r5, r6, r7}
299
300 // r5 = rhs
301 // r6 = lhs
302 // r7 = scratch
303
304 mov r5, r0, lsl #3 /* r5 = right shift */
305 rsb r6, r5, #32 /* r6 = left shift */
306
307 /* align the unaligned pointer */
308 bic r1, r1, #3
309 ldr r7, [r1], #4
310 sub r2, r2, #8
311
3126: mov ip, r7, lsr r5
313 ldr r7, [r1], #4
314 ldr r0, [r4], #4
315 orr ip, ip, r7, lsl r6
316 eors r0, r0, ip
317 moveq ip, r7, lsr r5
318 ldreq r7, [r1], #4
319 ldreq r0, [r4], #4
320 orreq ip, ip, r7, lsl r6
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700321 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800322 bne 7f
323 subs r2, r2, #8
324 bhs 6b
325
326 sub r1, r1, r6, lsr #3
327 ldmfd sp!, {r5, r6, r7}
328
329 /* are we done? */
330 adds r2, r2, #8
331 moveq r0, #0
332 beq 9b
333
334 /* finish off the remaining bytes */
335 b 8b
336
3377: /* fix up the 2 pointers and fallthrough... */
338 sub r1, r1, #4
339 sub r1, r1, r6, lsr #3
340 sub r4, r4, #4
341 mov r2, #4
342 ldmfd sp!, {r5, r6, r7}
343 b 8b
Elliott Hughes67195002013-02-13 15:12:32 -0800344END(memcmp)