Jim Huang | f50e9be | 2011-04-20 15:35:04 +0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2011 The Android Open Source Project |
| 3 | * Copyright (c) 2008 ARM Ltd |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions |
| 8 | * are met: |
| 9 | * 1. Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * 3. The name of the company may not be used to endorse or promote |
| 15 | * products derived from this software without specific prior written |
| 16 | * permission. |
| 17 | * |
| 18 | * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED |
| 19 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| 20 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| 21 | * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 22 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
| 23 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 24 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 25 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 26 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | */ |
| 29 | |
| 30 | #include <machine/cpu-features.h> |
Evgeniy Stepanov | 487b613 | 2011-10-04 14:22:15 +0400 | [diff] [blame] | 31 | #include <machine/asm.h> |
Jim Huang | f50e9be | 2011-04-20 15:35:04 +0800 | [diff] [blame] | 32 | |
| 33 | .text |
| 34 | |
Jim Huang | f50e9be | 2011-04-20 15:35:04 +0800 | [diff] [blame] | 35 | #ifdef __ARMEB__ |
| 36 | #define SHFT2LSB lsl |
| 37 | #define SHFT2LSBEQ lsleq |
| 38 | #define SHFT2MSB lsr |
| 39 | #define SHFT2MSBEQ lsreq |
| 40 | #define MSB 0x000000ff |
| 41 | #define LSB 0xff000000 |
| 42 | #else |
| 43 | #define SHFT2LSB lsr |
| 44 | #define SHFT2LSBEQ lsreq |
| 45 | #define SHFT2MSB lsl |
| 46 | #define SHFT2MSBEQ lsleq |
| 47 | #define MSB 0xff000000 |
| 48 | #define LSB 0x000000ff |
| 49 | #endif |
| 50 | |
| 51 | #define magic1(REG) REG |
| 52 | #define magic2(REG) REG, lsl #7 |
| 53 | |
Evgeniy Stepanov | 487b613 | 2011-10-04 14:22:15 +0400 | [diff] [blame] | 54 | ENTRY(strcmp) |
Elliott Hughes | c54ca40 | 2013-12-13 12:17:13 -0800 | [diff] [blame] | 55 | pld [r0, #0] |
| 56 | pld [r1, #0] |
Jim Huang | f50e9be | 2011-04-20 15:35:04 +0800 | [diff] [blame] | 57 | eor r2, r0, r1 |
| 58 | tst r2, #3 |
| 59 | |
| 60 | /* Strings not at same byte offset from a word boundary. */ |
| 61 | bne .Lstrcmp_unaligned |
| 62 | ands r2, r0, #3 |
| 63 | bic r0, r0, #3 |
| 64 | bic r1, r1, #3 |
| 65 | ldr ip, [r0], #4 |
| 66 | it eq |
| 67 | ldreq r3, [r1], #4 |
| 68 | beq 1f |
| 69 | |
| 70 | /* Although s1 and s2 have identical initial alignment, they are |
| 71 | * not currently word aligned. Rather than comparing bytes, |
| 72 | * make sure that any bytes fetched from before the addressed |
| 73 | * bytes are forced to 0xff. Then they will always compare |
| 74 | * equal. |
| 75 | */ |
| 76 | eor r2, r2, #3 |
| 77 | lsl r2, r2, #3 |
| 78 | mvn r3, #MSB |
| 79 | SHFT2LSB r2, r3, r2 |
| 80 | ldr r3, [r1], #4 |
| 81 | orr ip, ip, r2 |
| 82 | orr r3, r3, r2 |
| 83 | 1: |
| 84 | /* Load the 'magic' constant 0x01010101. */ |
| 85 | str r4, [sp, #-4]! |
| 86 | mov r4, #1 |
| 87 | orr r4, r4, r4, lsl #8 |
| 88 | orr r4, r4, r4, lsl #16 |
| 89 | .p2align 2 |
| 90 | 4: |
Elliott Hughes | c54ca40 | 2013-12-13 12:17:13 -0800 | [diff] [blame] | 91 | pld [r0, #8] |
| 92 | pld [r1, #8] |
Jim Huang | f50e9be | 2011-04-20 15:35:04 +0800 | [diff] [blame] | 93 | sub r2, ip, magic1(r4) |
| 94 | cmp ip, r3 |
| 95 | itttt eq |
| 96 | |
| 97 | /* check for any zero bytes in first word */ |
| 98 | biceq r2, r2, ip |
| 99 | tsteq r2, magic2(r4) |
| 100 | ldreq ip, [r0], #4 |
| 101 | ldreq r3, [r1], #4 |
| 102 | beq 4b |
| 103 | 2: |
| 104 | /* There's a zero or a different byte in the word */ |
| 105 | SHFT2MSB r0, ip, #24 |
| 106 | SHFT2LSB ip, ip, #8 |
| 107 | cmp r0, #1 |
| 108 | it cs |
| 109 | cmpcs r0, r3, SHFT2MSB #24 |
| 110 | it eq |
| 111 | SHFT2LSBEQ r3, r3, #8 |
| 112 | beq 2b |
| 113 | /* On a big-endian machine, r0 contains the desired byte in bits |
| 114 | * 0-7; on a little-endian machine they are in bits 24-31. In |
| 115 | * both cases the other bits in r0 are all zero. For r3 the |
| 116 | * interesting byte is at the other end of the word, but the |
| 117 | * other bits are not necessarily zero. We need a signed result |
| 118 | * representing the differnece in the unsigned bytes, so for the |
| 119 | * little-endian case we can't just shift the interesting bits up. |
| 120 | */ |
| 121 | #ifdef __ARMEB__ |
| 122 | sub r0, r0, r3, lsr #24 |
| 123 | #else |
| 124 | and r3, r3, #255 |
| 125 | /* No RSB instruction in Thumb2 */ |
| 126 | #ifdef __thumb2__ |
| 127 | lsr r0, r0, #24 |
| 128 | sub r0, r0, r3 |
| 129 | #else |
| 130 | rsb r0, r3, r0, lsr #24 |
| 131 | #endif |
| 132 | #endif |
| 133 | ldr r4, [sp], #4 |
| 134 | bx lr |
Jim Huang | f50e9be | 2011-04-20 15:35:04 +0800 | [diff] [blame] | 135 | |
| 136 | .Lstrcmp_unaligned: |
| 137 | wp1 .req r0 |
| 138 | wp2 .req r1 |
| 139 | b1 .req r2 |
| 140 | w1 .req r4 |
| 141 | w2 .req r5 |
| 142 | t1 .req ip |
| 143 | @ r3 is scratch |
| 144 | |
| 145 | /* First of all, compare bytes until wp1(sp1) is word-aligned. */ |
| 146 | 1: |
| 147 | tst wp1, #3 |
| 148 | beq 2f |
| 149 | ldrb r2, [wp1], #1 |
| 150 | ldrb r3, [wp2], #1 |
| 151 | cmp r2, #1 |
| 152 | it cs |
| 153 | cmpcs r2, r3 |
| 154 | beq 1b |
| 155 | sub r0, r2, r3 |
| 156 | bx lr |
| 157 | |
| 158 | 2: |
| 159 | str r5, [sp, #-4]! |
| 160 | str r4, [sp, #-4]! |
| 161 | mov b1, #1 |
| 162 | orr b1, b1, b1, lsl #8 |
| 163 | orr b1, b1, b1, lsl #16 |
| 164 | |
| 165 | and t1, wp2, #3 |
| 166 | bic wp2, wp2, #3 |
| 167 | ldr w1, [wp1], #4 |
| 168 | ldr w2, [wp2], #4 |
| 169 | cmp t1, #2 |
| 170 | beq 2f |
| 171 | bhi 3f |
| 172 | |
| 173 | /* Critical inner Loop: Block with 3 bytes initial overlap */ |
| 174 | .p2align 2 |
| 175 | 1: |
| 176 | bic t1, w1, #MSB |
| 177 | cmp t1, w2, SHFT2LSB #8 |
| 178 | sub r3, w1, b1 |
| 179 | bic r3, r3, w1 |
| 180 | bne 4f |
| 181 | ands r3, r3, b1, lsl #7 |
| 182 | it eq |
| 183 | ldreq w2, [wp2], #4 |
| 184 | bne 5f |
| 185 | eor t1, t1, w1 |
| 186 | cmp t1, w2, SHFT2MSB #24 |
| 187 | bne 6f |
| 188 | ldr w1, [wp1], #4 |
| 189 | b 1b |
| 190 | 4: |
| 191 | SHFT2LSB w2, w2, #8 |
| 192 | b 8f |
| 193 | |
| 194 | 5: |
| 195 | #ifdef __ARMEB__ |
| 196 | /* The syndrome value may contain false ones if the string ends |
| 197 | * with the bytes 0x01 0x00 |
| 198 | */ |
| 199 | tst w1, #0xff000000 |
| 200 | itt ne |
| 201 | tstne w1, #0x00ff0000 |
| 202 | tstne w1, #0x0000ff00 |
| 203 | beq 7f |
| 204 | #else |
| 205 | bics r3, r3, #0xff000000 |
| 206 | bne 7f |
| 207 | #endif |
| 208 | ldrb w2, [wp2] |
| 209 | SHFT2LSB t1, w1, #24 |
| 210 | #ifdef __ARMEB__ |
| 211 | lsl w2, w2, #24 |
| 212 | #endif |
| 213 | b 8f |
| 214 | |
| 215 | 6: |
| 216 | SHFT2LSB t1, w1, #24 |
| 217 | and w2, w2, #LSB |
| 218 | b 8f |
| 219 | |
| 220 | /* Critical inner Loop: Block with 2 bytes initial overlap */ |
| 221 | .p2align 2 |
| 222 | 2: |
| 223 | SHFT2MSB t1, w1, #16 |
| 224 | sub r3, w1, b1 |
| 225 | SHFT2LSB t1, t1, #16 |
| 226 | bic r3, r3, w1 |
| 227 | cmp t1, w2, SHFT2LSB #16 |
| 228 | bne 4f |
| 229 | ands r3, r3, b1, lsl #7 |
| 230 | it eq |
| 231 | ldreq w2, [wp2], #4 |
| 232 | bne 5f |
| 233 | eor t1, t1, w1 |
| 234 | cmp t1, w2, SHFT2MSB #16 |
| 235 | bne 6f |
| 236 | ldr w1, [wp1], #4 |
| 237 | b 2b |
| 238 | |
| 239 | 5: |
| 240 | #ifdef __ARMEB__ |
| 241 | /* The syndrome value may contain false ones if the string ends |
| 242 | * with the bytes 0x01 0x00 |
| 243 | */ |
| 244 | tst w1, #0xff000000 |
| 245 | it ne |
| 246 | tstne w1, #0x00ff0000 |
| 247 | beq 7f |
| 248 | #else |
| 249 | lsls r3, r3, #16 |
| 250 | bne 7f |
| 251 | #endif |
| 252 | ldrh w2, [wp2] |
| 253 | SHFT2LSB t1, w1, #16 |
| 254 | #ifdef __ARMEB__ |
| 255 | lsl w2, w2, #16 |
| 256 | #endif |
| 257 | b 8f |
| 258 | |
| 259 | 6: |
| 260 | SHFT2MSB w2, w2, #16 |
| 261 | SHFT2LSB t1, w1, #16 |
| 262 | 4: |
| 263 | SHFT2LSB w2, w2, #16 |
| 264 | b 8f |
| 265 | |
| 266 | /* Critical inner Loop: Block with 1 byte initial overlap */ |
| 267 | .p2align 2 |
| 268 | 3: |
| 269 | and t1, w1, #LSB |
| 270 | cmp t1, w2, SHFT2LSB #24 |
| 271 | sub r3, w1, b1 |
| 272 | bic r3, r3, w1 |
| 273 | bne 4f |
| 274 | ands r3, r3, b1, lsl #7 |
| 275 | it eq |
| 276 | ldreq w2, [wp2], #4 |
| 277 | bne 5f |
| 278 | eor t1, t1, w1 |
| 279 | cmp t1, w2, SHFT2MSB #8 |
| 280 | bne 6f |
| 281 | ldr w1, [wp1], #4 |
| 282 | b 3b |
| 283 | 4: |
| 284 | SHFT2LSB w2, w2, #24 |
| 285 | b 8f |
| 286 | 5: |
| 287 | /* The syndrome value may contain false ones if the string ends |
| 288 | * with the bytes 0x01 0x00 |
| 289 | */ |
| 290 | tst w1, #LSB |
| 291 | beq 7f |
| 292 | ldr w2, [wp2], #4 |
| 293 | 6: |
| 294 | SHFT2LSB t1, w1, #8 |
| 295 | bic w2, w2, #MSB |
| 296 | b 8f |
| 297 | 7: |
| 298 | mov r0, #0 |
| 299 | ldr r4, [sp], #4 |
| 300 | ldr r5, [sp], #4 |
| 301 | bx lr |
| 302 | |
| 303 | 8: |
| 304 | and r2, t1, #LSB |
| 305 | and r0, w2, #LSB |
| 306 | cmp r0, #1 |
| 307 | it cs |
| 308 | cmpcs r0, r2 |
| 309 | itt eq |
| 310 | SHFT2LSBEQ t1, t1, #8 |
| 311 | SHFT2LSBEQ w2, w2, #8 |
| 312 | beq 8b |
| 313 | sub r0, r2, r0 |
| 314 | ldr r4, [sp], #4 |
| 315 | ldr r5, [sp], #4 |
| 316 | bx lr |
Evgeniy Stepanov | 487b613 | 2011-10-04 14:22:15 +0400 | [diff] [blame] | 317 | END(strcmp) |