blob: 9fdbd567ed6c9d18841d089c5d1968b3e91247a3 [file] [log] [blame]
Jim Huangf50e9be2011-04-20 15:35:04 +08001/*
2 * Copyright (c) 2011 The Android Open Source Project
3 * Copyright (c) 2008 ARM Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the company may not be used to endorse or promote
15 * products derived from this software without specific prior written
16 * permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/cpu-features.h>
31
32 .text
33
34 .global strcmp
35 .type strcmp, %function
36 .align 4
37
38#ifdef __ARMEB__
39#define SHFT2LSB lsl
40#define SHFT2LSBEQ lsleq
41#define SHFT2MSB lsr
42#define SHFT2MSBEQ lsreq
43#define MSB 0x000000ff
44#define LSB 0xff000000
45#else
46#define SHFT2LSB lsr
47#define SHFT2LSBEQ lsreq
48#define SHFT2MSB lsl
49#define SHFT2MSBEQ lsleq
50#define MSB 0xff000000
51#define LSB 0x000000ff
52#endif
53
54#define magic1(REG) REG
55#define magic2(REG) REG, lsl #7
56
57strcmp:
58 .fnstart
59 PLD(r0, #0)
60 PLD(r1, #0)
61 eor r2, r0, r1
62 tst r2, #3
63
64 /* Strings not at same byte offset from a word boundary. */
65 bne .Lstrcmp_unaligned
66 ands r2, r0, #3
67 bic r0, r0, #3
68 bic r1, r1, #3
69 ldr ip, [r0], #4
70 it eq
71 ldreq r3, [r1], #4
72 beq 1f
73
74 /* Although s1 and s2 have identical initial alignment, they are
75 * not currently word aligned. Rather than comparing bytes,
76 * make sure that any bytes fetched from before the addressed
77 * bytes are forced to 0xff. Then they will always compare
78 * equal.
79 */
80 eor r2, r2, #3
81 lsl r2, r2, #3
82 mvn r3, #MSB
83 SHFT2LSB r2, r3, r2
84 ldr r3, [r1], #4
85 orr ip, ip, r2
86 orr r3, r3, r2
871:
88 /* Load the 'magic' constant 0x01010101. */
89 str r4, [sp, #-4]!
90 mov r4, #1
91 orr r4, r4, r4, lsl #8
92 orr r4, r4, r4, lsl #16
93 .p2align 2
944:
95 PLD(r0, #8)
96 PLD(r1, #8)
97 sub r2, ip, magic1(r4)
98 cmp ip, r3
99 itttt eq
100
101 /* check for any zero bytes in first word */
102 biceq r2, r2, ip
103 tsteq r2, magic2(r4)
104 ldreq ip, [r0], #4
105 ldreq r3, [r1], #4
106 beq 4b
1072:
108 /* There's a zero or a different byte in the word */
109 SHFT2MSB r0, ip, #24
110 SHFT2LSB ip, ip, #8
111 cmp r0, #1
112 it cs
113 cmpcs r0, r3, SHFT2MSB #24
114 it eq
115 SHFT2LSBEQ r3, r3, #8
116 beq 2b
117 /* On a big-endian machine, r0 contains the desired byte in bits
118 * 0-7; on a little-endian machine they are in bits 24-31. In
119 * both cases the other bits in r0 are all zero. For r3 the
120 * interesting byte is at the other end of the word, but the
121 * other bits are not necessarily zero. We need a signed result
122 * representing the differnece in the unsigned bytes, so for the
123 * little-endian case we can't just shift the interesting bits up.
124 */
125#ifdef __ARMEB__
126 sub r0, r0, r3, lsr #24
127#else
128 and r3, r3, #255
129 /* No RSB instruction in Thumb2 */
130#ifdef __thumb2__
131 lsr r0, r0, #24
132 sub r0, r0, r3
133#else
134 rsb r0, r3, r0, lsr #24
135#endif
136#endif
137 ldr r4, [sp], #4
138 bx lr
139 .fnend
140
141.Lstrcmp_unaligned:
142 wp1 .req r0
143 wp2 .req r1
144 b1 .req r2
145 w1 .req r4
146 w2 .req r5
147 t1 .req ip
148 @ r3 is scratch
149
150 /* First of all, compare bytes until wp1(sp1) is word-aligned. */
1511:
152 tst wp1, #3
153 beq 2f
154 ldrb r2, [wp1], #1
155 ldrb r3, [wp2], #1
156 cmp r2, #1
157 it cs
158 cmpcs r2, r3
159 beq 1b
160 sub r0, r2, r3
161 bx lr
162
1632:
164 str r5, [sp, #-4]!
165 str r4, [sp, #-4]!
166 mov b1, #1
167 orr b1, b1, b1, lsl #8
168 orr b1, b1, b1, lsl #16
169
170 and t1, wp2, #3
171 bic wp2, wp2, #3
172 ldr w1, [wp1], #4
173 ldr w2, [wp2], #4
174 cmp t1, #2
175 beq 2f
176 bhi 3f
177
178 /* Critical inner Loop: Block with 3 bytes initial overlap */
179 .p2align 2
1801:
181 bic t1, w1, #MSB
182 cmp t1, w2, SHFT2LSB #8
183 sub r3, w1, b1
184 bic r3, r3, w1
185 bne 4f
186 ands r3, r3, b1, lsl #7
187 it eq
188 ldreq w2, [wp2], #4
189 bne 5f
190 eor t1, t1, w1
191 cmp t1, w2, SHFT2MSB #24
192 bne 6f
193 ldr w1, [wp1], #4
194 b 1b
1954:
196 SHFT2LSB w2, w2, #8
197 b 8f
198
1995:
200#ifdef __ARMEB__
201 /* The syndrome value may contain false ones if the string ends
202 * with the bytes 0x01 0x00
203 */
204 tst w1, #0xff000000
205 itt ne
206 tstne w1, #0x00ff0000
207 tstne w1, #0x0000ff00
208 beq 7f
209#else
210 bics r3, r3, #0xff000000
211 bne 7f
212#endif
213 ldrb w2, [wp2]
214 SHFT2LSB t1, w1, #24
215#ifdef __ARMEB__
216 lsl w2, w2, #24
217#endif
218 b 8f
219
2206:
221 SHFT2LSB t1, w1, #24
222 and w2, w2, #LSB
223 b 8f
224
225 /* Critical inner Loop: Block with 2 bytes initial overlap */
226 .p2align 2
2272:
228 SHFT2MSB t1, w1, #16
229 sub r3, w1, b1
230 SHFT2LSB t1, t1, #16
231 bic r3, r3, w1
232 cmp t1, w2, SHFT2LSB #16
233 bne 4f
234 ands r3, r3, b1, lsl #7
235 it eq
236 ldreq w2, [wp2], #4
237 bne 5f
238 eor t1, t1, w1
239 cmp t1, w2, SHFT2MSB #16
240 bne 6f
241 ldr w1, [wp1], #4
242 b 2b
243
2445:
245#ifdef __ARMEB__
246 /* The syndrome value may contain false ones if the string ends
247 * with the bytes 0x01 0x00
248 */
249 tst w1, #0xff000000
250 it ne
251 tstne w1, #0x00ff0000
252 beq 7f
253#else
254 lsls r3, r3, #16
255 bne 7f
256#endif
257 ldrh w2, [wp2]
258 SHFT2LSB t1, w1, #16
259#ifdef __ARMEB__
260 lsl w2, w2, #16
261#endif
262 b 8f
263
2646:
265 SHFT2MSB w2, w2, #16
266 SHFT2LSB t1, w1, #16
2674:
268 SHFT2LSB w2, w2, #16
269 b 8f
270
271 /* Critical inner Loop: Block with 1 byte initial overlap */
272 .p2align 2
2733:
274 and t1, w1, #LSB
275 cmp t1, w2, SHFT2LSB #24
276 sub r3, w1, b1
277 bic r3, r3, w1
278 bne 4f
279 ands r3, r3, b1, lsl #7
280 it eq
281 ldreq w2, [wp2], #4
282 bne 5f
283 eor t1, t1, w1
284 cmp t1, w2, SHFT2MSB #8
285 bne 6f
286 ldr w1, [wp1], #4
287 b 3b
2884:
289 SHFT2LSB w2, w2, #24
290 b 8f
2915:
292 /* The syndrome value may contain false ones if the string ends
293 * with the bytes 0x01 0x00
294 */
295 tst w1, #LSB
296 beq 7f
297 ldr w2, [wp2], #4
2986:
299 SHFT2LSB t1, w1, #8
300 bic w2, w2, #MSB
301 b 8f
3027:
303 mov r0, #0
304 ldr r4, [sp], #4
305 ldr r5, [sp], #4
306 bx lr
307
3088:
309 and r2, t1, #LSB
310 and r0, w2, #LSB
311 cmp r0, #1
312 it cs
313 cmpcs r0, r2
314 itt eq
315 SHFT2LSBEQ t1, t1, #8
316 SHFT2LSBEQ w2, w2, #8
317 beq 8b
318 sub r0, r2, r0
319 ldr r4, [sp], #4
320 ldr r5, [sp], #4
321 bx lr