blob: b623a2a2cb3fa8e086740a7ba46b55ab53e8dfd7 [file] [log] [blame]
Andreas Gampe4d0589c2014-06-10 16:10:56 -07001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
18#define ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
19
20#include "asm_support_arm.S"
21
22/*
23 * Optimized memcmp16() for ARM9.
24 * This would not be optimal on XScale or ARM11, where more prefetching
25 * and use of pld will be needed.
26 * The 2 major optimzations here are
27 * (1) The main loop compares 16 bytes at a time
28 * (2) The loads are scheduled in a way they won't stall
29 */
30
31ARM_ENTRY __memcmp16
32 pld [r0, #0]
33 pld [r1, #0]
34
35 /* take of the case where length is nul or the buffers are the same */
36 cmp r0, r1
37 cmpne r2, #0
38 moveq r0, #0
39 bxeq lr
40
41 /* since r0 hold the result, move the first source
42 * pointer somewhere else
43 */
44
45 mov r3, r0
46
47 /* make sure we have at least 12 words, this simplify things below
48 * and avoid some overhead for small blocks
49 */
50
51 cmp r2, #12
52 bpl 0f
53
54 /* small blocks (less then 12 words) */
55 pld [r0, #32]
56 pld [r1, #32]
57
581: ldrh r0, [r3], #2
59 ldrh ip, [r1], #2
60 subs r0, r0, ip
61 bxne lr
62 subs r2, r2, #1
63 bne 1b
64 bx lr
65
66
67 /* save registers */
Andreas Gampe943826e2014-11-05 23:49:43 -0800680: push {r4, lr}
Andreas Gampe4d0589c2014-06-10 16:10:56 -070069 .cfi_def_cfa_offset 8
70 .cfi_rel_offset r4, 0
71 .cfi_rel_offset lr, 4
72
73 /* align first pointer to word boundary */
74 tst r3, #2
75 beq 0f
76
77 ldrh r0, [r3], #2
78 ldrh ip, [r1], #2
79 sub r2, r2, #1
80 subs r0, r0, ip
81 /* restore registers and return */
Andreas Gampe943826e2014-11-05 23:49:43 -080082 popne {r4, lr}
Andreas Gampe4d0589c2014-06-10 16:10:56 -070083 bxne lr
84
85
860: /* here the first pointer is aligned, and we have at least 3 words
87 * to process.
88 */
89
90 /* see if the pointers are congruent */
91 eor r0, r3, r1
92 ands r0, r0, #2
93 bne 5f
94
95 /* congruent case, 16 half-words per iteration
96 * We need to make sure there are at least 16+2 words left
97 * because we effectively read ahead one long word, and we could
98 * read past the buffer (and segfault) if we're not careful.
99 */
100
101 ldr ip, [r1]
102 subs r2, r2, #(16 + 2)
103 bmi 1f
104
1050:
106 pld [r3, #64]
107 pld [r1, #64]
108 ldr r0, [r3], #4
109 ldr lr, [r1, #4]!
110 eors r0, r0, ip
111 ldreq r0, [r3], #4
112 ldreq ip, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800113 eorseq r0, r0, lr
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700114 ldreq r0, [r3], #4
115 ldreq lr, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800116 eorseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700117 ldreq r0, [r3], #4
118 ldreq ip, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800119 eorseq r0, r0, lr
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700120 ldreq r0, [r3], #4
121 ldreq lr, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800122 eorseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700123 ldreq r0, [r3], #4
124 ldreq ip, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800125 eorseq r0, r0, lr
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700126 ldreq r0, [r3], #4
127 ldreq lr, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800128 eorseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700129 ldreq r0, [r3], #4
130 ldreq ip, [r1, #4]!
Andreas Gampe943826e2014-11-05 23:49:43 -0800131 eorseq r0, r0, lr
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700132 bne 2f
133 subs r2, r2, #16
134 bhs 0b
135
136 /* do we have at least 2 words left? */
1371: adds r2, r2, #(16 - 2 + 2)
138 bmi 4f
139
140 /* finish off 2 words at a time */
1413: ldr r0, [r3], #4
142 ldr ip, [r1], #4
143 eors r0, r0, ip
144 bne 2f
145 subs r2, r2, #2
146 bhs 3b
147
148 /* are we done? */
1494: adds r2, r2, #2
150 bne 8f
151 /* restore registers and return */
152 mov r0, #0
Andreas Gampe943826e2014-11-05 23:49:43 -0800153 pop {r4, lr}
154 .cfi_restore r4
155 .cfi_restore lr
156 .cfi_adjust_cfa_offset -8
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700157 bx lr
158
1592: /* the last 2 words are different, restart them */
160 ldrh r0, [r3, #-4]
161 ldrh ip, [r1, #-4]
162 subs r0, r0, ip
Andreas Gampe943826e2014-11-05 23:49:43 -0800163 ldrheq r0, [r3, #-2]
164 ldrheq ip, [r1, #-2]
165 subseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700166 /* restore registers and return */
Andreas Gampe943826e2014-11-05 23:49:43 -0800167 pop {r4, lr}
168 .cfi_restore r4
169 .cfi_restore lr
170 .cfi_adjust_cfa_offset -8
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700171 bx lr
172
173 /* process the last few words */
1748: ldrh r0, [r3], #2
175 ldrh ip, [r1], #2
176 subs r0, r0, ip
177 bne 9f
178 subs r2, r2, #1
179 bne 8b
180
1819: /* restore registers and return */
Andreas Gampe943826e2014-11-05 23:49:43 -0800182 pop {r4, lr}
183 .cfi_restore r4
184 .cfi_restore lr
185 .cfi_adjust_cfa_offset -8
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700186 bx lr
187
188
1895: /*************** non-congruent case ***************/
190
191 /* align the unaligned pointer */
192 bic r1, r1, #3
193 ldr lr, [r1], #4
194 sub r2, r2, #8
195
1966:
197 pld [r3, #64]
198 pld [r1, #64]
199 mov ip, lr, lsr #16
200 ldr lr, [r1], #4
201 ldr r0, [r3], #4
202 orr ip, ip, lr, lsl #16
203 eors r0, r0, ip
204 moveq ip, lr, lsr #16
205 ldreq lr, [r1], #4
206 ldreq r0, [r3], #4
207 orreq ip, ip, lr, lsl #16
Andreas Gampe943826e2014-11-05 23:49:43 -0800208 eorseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700209 moveq ip, lr, lsr #16
210 ldreq lr, [r1], #4
211 ldreq r0, [r3], #4
212 orreq ip, ip, lr, lsl #16
Andreas Gampe943826e2014-11-05 23:49:43 -0800213 eorseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700214 moveq ip, lr, lsr #16
215 ldreq lr, [r1], #4
216 ldreq r0, [r3], #4
217 orreq ip, ip, lr, lsl #16
Andreas Gampe943826e2014-11-05 23:49:43 -0800218 eorseq r0, r0, ip
Andreas Gampe4d0589c2014-06-10 16:10:56 -0700219 bne 7f
220 subs r2, r2, #8
221 bhs 6b
222 sub r1, r1, #2
223 /* are we done? */
224 adds r2, r2, #8
225 moveq r0, #0
226 beq 9b
227 /* finish off the remaining bytes */
228 b 8b
229
2307: /* fix up the 2 pointers and fallthrough... */
231 sub r1, r1, #2
232 b 2b
233END __memcmp16
234
235
236#endif // ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_