blob: 99c9b884240ef253659574e8a964074f57b63e95 [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
Kenny Root420878c2011-02-16 11:55:58 -080030#include <machine/asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080031
32/*
33 * Optimized memcmp16() for ARM9.
34 * This would not be optimal on XScale or ARM11, where more prefetching
35 * and use of PLD will be needed.
36 * The 2 major optimzations here are
37 * (1) The main loop compares 16 bytes at a time
38 * (2) The loads are scheduled in a way they won't stall
39 */
40
Kenny Root420878c2011-02-16 11:55:58 -080041ENTRY(__memcmp16)
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080042 PLD (r0, #0)
43 PLD (r1, #0)
44
45 /* take of the case where length is nul or the buffers are the same */
46 cmp r0, r1
47 cmpne r2, #0
48 moveq r0, #0
49 bxeq lr
50
51 /* since r0 hold the result, move the first source
52 * pointer somewhere else
53 */
54
55 mov r3, r0
56
57 /* make sure we have at least 12 words, this simplify things below
58 * and avoid some overhead for small blocks
59 */
60
61 cmp r2, #12
62 bpl 0f
63
64 /* small blocks (less then 12 words) */
65 PLD (r0, #32)
66 PLD (r1, #32)
67
681: ldrh r0, [r3], #2
69 ldrh ip, [r1], #2
70 subs r0, r0, ip
71 bxne lr
72 subs r2, r2, #1
73 bne 1b
74 bx lr
75
76
Ben Chengbd192b42009-09-15 13:41:14 -070077 .save {r4, lr}
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080078 /* save registers */
790: stmfd sp!, {r4, lr}
80
81 /* align first pointer to word boundary */
82 tst r3, #2
83 beq 0f
84
85 ldrh r0, [r3], #2
86 ldrh ip, [r1], #2
87 sub r2, r2, #1
88 subs r0, r0, ip
89 /* restore registers and return */
90 ldmnefd sp!, {r4, lr}
91 bxne lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080092
93
940: /* here the first pointer is aligned, and we have at least 3 words
95 * to process.
96 */
97
98 /* see if the pointers are congruent */
99 eor r0, r3, r1
100 ands r0, r0, #2
101 bne 5f
102
103 /* congruent case, 16 half-words per iteration
104 * We need to make sure there are at least 16+2 words left
105 * because we effectively read ahead one long word, and we could
106 * read past the buffer (and segfault) if we're not careful.
107 */
108
109 ldr ip, [r1]
110 subs r2, r2, #(16 + 2)
111 bmi 1f
112
1130:
114 PLD (r3, #64)
115 PLD (r1, #64)
116 ldr r0, [r3], #4
117 ldr lr, [r1, #4]!
118 eors r0, r0, ip
119 ldreq r0, [r3], #4
120 ldreq ip, [r1, #4]!
121 eoreqs r0, r0, lr
122 ldreq r0, [r3], #4
123 ldreq lr, [r1, #4]!
124 eoreqs r0, r0, ip
125 ldreq r0, [r3], #4
126 ldreq ip, [r1, #4]!
127 eoreqs r0, r0, lr
128 ldreq r0, [r3], #4
129 ldreq lr, [r1, #4]!
130 eoreqs r0, r0, ip
131 ldreq r0, [r3], #4
132 ldreq ip, [r1, #4]!
133 eoreqs r0, r0, lr
134 ldreq r0, [r3], #4
135 ldreq lr, [r1, #4]!
136 eoreqs r0, r0, ip
137 ldreq r0, [r3], #4
138 ldreq ip, [r1, #4]!
139 eoreqs r0, r0, lr
140 bne 2f
141 subs r2, r2, #16
142 bhs 0b
143
144 /* do we have at least 2 words left? */
1451: adds r2, r2, #(16 - 2 + 2)
146 bmi 4f
147
148 /* finish off 2 words at a time */
1493: ldr r0, [r3], #4
150 ldr ip, [r1], #4
151 eors r0, r0, ip
152 bne 2f
153 subs r2, r2, #2
154 bhs 3b
155
156 /* are we done? */
1574: adds r2, r2, #2
158 bne 8f
159 /* restore registers and return */
160 mov r0, #0
161 ldmfd sp!, {r4, lr}
162 bx lr
163
1642: /* the last 2 words are different, restart them */
165 ldrh r0, [r3, #-4]
166 ldrh ip, [r1, #-4]
167 subs r0, r0, ip
168 ldreqh r0, [r3, #-2]
169 ldreqh ip, [r1, #-2]
170 subeqs r0, r0, ip
171 /* restore registers and return */
172 ldmfd sp!, {r4, lr}
173 bx lr
174
175 /* process the last few words */
1768: ldrh r0, [r3], #2
177 ldrh ip, [r1], #2
178 subs r0, r0, ip
179 bne 9f
180 subs r2, r2, #1
181 bne 8b
182
1839: /* restore registers and return */
184 ldmfd sp!, {r4, lr}
185 bx lr
186
187
1885: /*************** non-congruent case ***************/
189
190 /* align the unaligned pointer */
191 bic r1, r1, #3
192 ldr lr, [r1], #4
193 sub r2, r2, #8
194
1956:
196 PLD (r3, #64)
197 PLD (r1, #64)
198 mov ip, lr, lsr #16
199 ldr lr, [r1], #4
200 ldr r0, [r3], #4
201 orr ip, ip, lr, lsl #16
202 eors r0, r0, ip
203 moveq ip, lr, lsr #16
204 ldreq lr, [r1], #4
205 ldreq r0, [r3], #4
206 orreq ip, ip, lr, lsl #16
207 eoreqs r0, r0, ip
208 moveq ip, lr, lsr #16
209 ldreq lr, [r1], #4
210 ldreq r0, [r3], #4
211 orreq ip, ip, lr, lsl #16
212 eoreqs r0, r0, ip
213 moveq ip, lr, lsr #16
214 ldreq lr, [r1], #4
215 ldreq r0, [r3], #4
216 orreq ip, ip, lr, lsl #16
217 eoreqs r0, r0, ip
218 bne 7f
219 subs r2, r2, #8
220 bhs 6b
221 sub r1, r1, #2
222 /* are we done? */
223 adds r2, r2, #8
224 moveq r0, #0
225 beq 9b
226 /* finish off the remaining bytes */
227 b 8b
228
2297: /* fix up the 2 pointers and fallthrough... */
230 sub r1, r1, #2
231 b 2b
Kenny Root420878c2011-02-16 11:55:58 -0800232END(__memcmp16)