blob: 825c94f15afe75c227de2bbdfa982bf5672306a1 [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
Kenny Root420878c2011-02-16 11:55:58 -080030#include <machine/asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080031
32/*
33 * Optimized memcmp16() for ARM9.
34 * This would not be optimal on XScale or ARM11, where more prefetching
35 * and use of PLD will be needed.
36 * The 2 major optimzations here are
37 * (1) The main loop compares 16 bytes at a time
38 * (2) The loads are scheduled in a way they won't stall
39 */
40
Kenny Root420878c2011-02-16 11:55:58 -080041ENTRY(__memcmp16)
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080042 PLD (r0, #0)
43 PLD (r1, #0)
44
45 /* take of the case where length is nul or the buffers are the same */
46 cmp r0, r1
47 cmpne r2, #0
48 moveq r0, #0
49 bxeq lr
50
51 /* since r0 hold the result, move the first source
52 * pointer somewhere else
53 */
54
55 mov r3, r0
56
57 /* make sure we have at least 12 words, this simplify things below
58 * and avoid some overhead for small blocks
59 */
60
61 cmp r2, #12
62 bpl 0f
63
64 /* small blocks (less then 12 words) */
65 PLD (r0, #32)
66 PLD (r1, #32)
67
681: ldrh r0, [r3], #2
69 ldrh ip, [r1], #2
70 subs r0, r0, ip
71 bxne lr
72 subs r2, r2, #1
73 bne 1b
74 bx lr
75
76
77 /* save registers */
780: stmfd sp!, {r4, lr}
Christopher Ferrised459702013-12-02 17:44:53 -080079 .cfi_def_cfa_offset 8
80 .cfi_rel_offset r4, 0
81 .cfi_rel_offset lr, 4
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080082
83 /* align first pointer to word boundary */
84 tst r3, #2
85 beq 0f
86
87 ldrh r0, [r3], #2
88 ldrh ip, [r1], #2
89 sub r2, r2, #1
90 subs r0, r0, ip
91 /* restore registers and return */
92 ldmnefd sp!, {r4, lr}
93 bxne lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080094
95
960: /* here the first pointer is aligned, and we have at least 3 words
97 * to process.
98 */
99
100 /* see if the pointers are congruent */
101 eor r0, r3, r1
102 ands r0, r0, #2
103 bne 5f
104
105 /* congruent case, 16 half-words per iteration
106 * We need to make sure there are at least 16+2 words left
107 * because we effectively read ahead one long word, and we could
108 * read past the buffer (and segfault) if we're not careful.
109 */
110
111 ldr ip, [r1]
112 subs r2, r2, #(16 + 2)
113 bmi 1f
114
1150:
116 PLD (r3, #64)
117 PLD (r1, #64)
118 ldr r0, [r3], #4
119 ldr lr, [r1, #4]!
120 eors r0, r0, ip
121 ldreq r0, [r3], #4
122 ldreq ip, [r1, #4]!
123 eoreqs r0, r0, lr
124 ldreq r0, [r3], #4
125 ldreq lr, [r1, #4]!
126 eoreqs r0, r0, ip
127 ldreq r0, [r3], #4
128 ldreq ip, [r1, #4]!
129 eoreqs r0, r0, lr
130 ldreq r0, [r3], #4
131 ldreq lr, [r1, #4]!
132 eoreqs r0, r0, ip
133 ldreq r0, [r3], #4
134 ldreq ip, [r1, #4]!
135 eoreqs r0, r0, lr
136 ldreq r0, [r3], #4
137 ldreq lr, [r1, #4]!
138 eoreqs r0, r0, ip
139 ldreq r0, [r3], #4
140 ldreq ip, [r1, #4]!
141 eoreqs r0, r0, lr
142 bne 2f
143 subs r2, r2, #16
144 bhs 0b
145
146 /* do we have at least 2 words left? */
1471: adds r2, r2, #(16 - 2 + 2)
148 bmi 4f
149
150 /* finish off 2 words at a time */
1513: ldr r0, [r3], #4
152 ldr ip, [r1], #4
153 eors r0, r0, ip
154 bne 2f
155 subs r2, r2, #2
156 bhs 3b
157
158 /* are we done? */
1594: adds r2, r2, #2
160 bne 8f
161 /* restore registers and return */
162 mov r0, #0
163 ldmfd sp!, {r4, lr}
164 bx lr
165
1662: /* the last 2 words are different, restart them */
167 ldrh r0, [r3, #-4]
168 ldrh ip, [r1, #-4]
169 subs r0, r0, ip
170 ldreqh r0, [r3, #-2]
171 ldreqh ip, [r1, #-2]
172 subeqs r0, r0, ip
173 /* restore registers and return */
174 ldmfd sp!, {r4, lr}
175 bx lr
176
177 /* process the last few words */
1788: ldrh r0, [r3], #2
179 ldrh ip, [r1], #2
180 subs r0, r0, ip
181 bne 9f
182 subs r2, r2, #1
183 bne 8b
184
1859: /* restore registers and return */
186 ldmfd sp!, {r4, lr}
187 bx lr
188
189
1905: /*************** non-congruent case ***************/
191
192 /* align the unaligned pointer */
193 bic r1, r1, #3
194 ldr lr, [r1], #4
195 sub r2, r2, #8
196
1976:
198 PLD (r3, #64)
199 PLD (r1, #64)
200 mov ip, lr, lsr #16
201 ldr lr, [r1], #4
202 ldr r0, [r3], #4
203 orr ip, ip, lr, lsl #16
204 eors r0, r0, ip
205 moveq ip, lr, lsr #16
206 ldreq lr, [r1], #4
207 ldreq r0, [r3], #4
208 orreq ip, ip, lr, lsl #16
209 eoreqs r0, r0, ip
210 moveq ip, lr, lsr #16
211 ldreq lr, [r1], #4
212 ldreq r0, [r3], #4
213 orreq ip, ip, lr, lsl #16
214 eoreqs r0, r0, ip
215 moveq ip, lr, lsr #16
216 ldreq lr, [r1], #4
217 ldreq r0, [r3], #4
218 orreq ip, ip, lr, lsl #16
219 eoreqs r0, r0, ip
220 bne 7f
221 subs r2, r2, #8
222 bhs 6b
223 sub r1, r1, #2
224 /* are we done? */
225 adds r2, r2, #8
226 moveq r0, #0
227 beq 9b
228 /* finish off the remaining bytes */
229 b 8b
230
2317: /* fix up the 2 pointers and fallthrough... */
232 sub r1, r1, #2
233 b 2b
Kenny Root420878c2011-02-16 11:55:58 -0800234END(__memcmp16)