David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | #ifndef BIONIC_ATOMIC_ARM_H |
| 17 | #define BIONIC_ATOMIC_ARM_H |
| 18 | |
| 19 | #include <machine/cpu-features.h> |
| 20 | |
| 21 | /* Some of the harware instructions used below are not available in Thumb-1 |
| 22 | * mode (they are if you build in ARM or Thumb-2 mode though). To solve this |
| 23 | * problem, we're going to use the same technique than libatomics_ops, |
| 24 | * which is to temporarily switch to ARM, do the operation, then switch |
| 25 | * back to Thumb-1. |
| 26 | * |
| 27 | * This results in two 'bx' jumps, just like a normal function call, but |
| 28 | * everything is kept inlined, avoids loading or computing the function's |
| 29 | * address, and prevents a little I-cache trashing too. |
| 30 | * |
| 31 | * However, it is highly recommended to avoid compiling any C library source |
| 32 | * file that use these functions in Thumb-1 mode. |
| 33 | * |
| 34 | * Define three helper macros to implement this: |
| 35 | */ |
| 36 | #if defined(__thumb__) && !defined(__thumb2__) |
| 37 | # define __ATOMIC_SWITCH_TO_ARM \ |
| 38 | "adr r3, 5f\n" \ |
| 39 | "bx r3\n" \ |
| 40 | ".align\n" \ |
| 41 | ".arm\n" \ |
| 42 | "5:\n" |
| 43 | /* note: the leading \n below is intentional */ |
| 44 | # define __ATOMIC_SWITCH_TO_THUMB \ |
| 45 | "\n" \ |
| 46 | "adr r3, 6f\n" \ |
| 47 | "bx r3\n" \ |
| 48 | ".thumb" \ |
| 49 | "6:\n" |
| 50 | |
| 51 | # define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */ |
| 52 | |
| 53 | /* Warn the user that ARM mode should really be preferred! */ |
| 54 | # warning Rebuilding this source file in ARM mode is highly recommended for performance!! |
| 55 | |
| 56 | #else |
| 57 | # define __ATOMIC_SWITCH_TO_ARM /* nothing */ |
| 58 | # define __ATOMIC_SWITCH_TO_THUMB /* nothing */ |
| 59 | # define __ATOMIC_CLOBBERS /* nothing */ |
| 60 | #endif |
| 61 | |
| 62 | |
| 63 | /* Define a full memory barrier, this is only needed if we build the |
| 64 | * platform for a multi-core device. For the record, using a 'dmb' |
| 65 | * instruction on a Nexus One device can take up to 180 ns even if |
| 66 | * it is completely un-necessary on this device. |
| 67 | * |
| 68 | * NOTE: This is where the platform and NDK headers atomic headers are |
| 69 | * going to diverge. With the NDK, we don't know if the generated |
| 70 | * code is going to run on a single or multi-core device, so we |
| 71 | * need to be cautious. |
| 72 | * |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 73 | * I.e. on single-core devices, the helper immediately returns, |
| 74 | * on multi-core devices, it uses "dmb" or any other means to |
| 75 | * perform a full-memory barrier. |
| 76 | * |
| 77 | * There are three cases to consider for the platform: |
| 78 | * |
| 79 | * - multi-core ARMv7-A => use the 'dmb' hardware instruction |
| 80 | * - multi-core ARMv6 => use the coprocessor |
Nick Kralevich | e91f717 | 2013-07-03 14:14:06 -0700 | [diff] [blame] | 81 | * - single core ARMv6+ => do not use any hardware barrier |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 82 | */ |
| 83 | #if defined(ANDROID_SMP) && ANDROID_SMP == 1 |
| 84 | |
| 85 | /* Sanity check, multi-core is only supported starting from ARMv6 */ |
| 86 | # if __ARM_ARCH__ < 6 |
| 87 | # error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6 |
| 88 | # endif |
| 89 | |
| 90 | # ifdef __ARM_HAVE_DMB |
| 91 | /* For ARMv7-A, we can use the 'dmb' instruction directly */ |
| 92 | __ATOMIC_INLINE__ void |
| 93 | __bionic_memory_barrier(void) |
| 94 | { |
| 95 | /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't |
| 96 | * bother with __ATOMIC_SWITCH_TO_ARM */ |
| 97 | __asm__ __volatile__ ( "dmb" : : : "memory" ); |
| 98 | } |
| 99 | # else /* !__ARM_HAVE_DMB */ |
| 100 | /* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor, |
| 101 | * which requires the use of a general-purpose register, which is slightly |
| 102 | * less efficient. |
| 103 | */ |
| 104 | __ATOMIC_INLINE__ void |
| 105 | __bionic_memory_barrier(void) |
| 106 | { |
| 107 | __asm__ __volatile__ ( |
| 108 | __SWITCH_TO_ARM |
| 109 | "mcr p15, 0, %0, c7, c10, 5" |
| 110 | __SWITCH_TO_THUMB |
| 111 | : : "r" (0) : __ATOMIC_CLOBBERS "memory"); |
| 112 | } |
| 113 | # endif /* !__ARM_HAVE_DMB */ |
| 114 | #else /* !ANDROID_SMP */ |
| 115 | __ATOMIC_INLINE__ void |
| 116 | __bionic_memory_barrier(void) |
| 117 | { |
| 118 | /* A simple compiler barrier */ |
| 119 | __asm__ __volatile__ ( "" : : : "memory" ); |
| 120 | } |
| 121 | #endif /* !ANDROID_SMP */ |
| 122 | |
Nick Kralevich | e91f717 | 2013-07-03 14:14:06 -0700 | [diff] [blame] | 123 | #ifndef __ARM_HAVE_LDREX_STREX |
| 124 | #error Only ARM devices which have LDREX / STREX are supported |
| 125 | #endif |
| 126 | |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 127 | /* Compare-and-swap, without any explicit barriers. Note that this functions |
| 128 | * returns 0 on success, and 1 on failure. The opposite convention is typically |
| 129 | * used on other platforms. |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 130 | */ |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 131 | __ATOMIC_INLINE__ int |
| 132 | __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) |
| 133 | { |
| 134 | int32_t prev, status; |
| 135 | do { |
| 136 | __asm__ __volatile__ ( |
| 137 | __ATOMIC_SWITCH_TO_ARM |
| 138 | "ldrex %0, [%3]\n" |
| 139 | "mov %1, #0\n" |
| 140 | "teq %0, %4\n" |
| 141 | #ifdef __thumb2__ |
| 142 | "it eq\n" |
| 143 | #endif |
| 144 | "strexeq %1, %5, [%3]" |
| 145 | __ATOMIC_SWITCH_TO_THUMB |
| 146 | : "=&r" (prev), "=&r" (status), "+m"(*ptr) |
| 147 | : "r" (ptr), "Ir" (old_value), "r" (new_value) |
| 148 | : __ATOMIC_CLOBBERS "cc"); |
| 149 | } while (__builtin_expect(status != 0, 0)); |
| 150 | return prev != old_value; |
| 151 | } |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 152 | |
Nick Kralevich | e91f717 | 2013-07-03 14:14:06 -0700 | [diff] [blame] | 153 | /* Swap operation, without any explicit barriers. */ |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 154 | __ATOMIC_INLINE__ int32_t |
| 155 | __bionic_swap(int32_t new_value, volatile int32_t* ptr) |
| 156 | { |
| 157 | int32_t prev, status; |
| 158 | do { |
| 159 | __asm__ __volatile__ ( |
| 160 | __ATOMIC_SWITCH_TO_ARM |
| 161 | "ldrex %0, [%3]\n" |
| 162 | "strex %1, %4, [%3]" |
| 163 | __ATOMIC_SWITCH_TO_THUMB |
| 164 | : "=&r" (prev), "=&r" (status), "+m" (*ptr) |
| 165 | : "r" (ptr), "r" (new_value) |
| 166 | : __ATOMIC_CLOBBERS "cc"); |
| 167 | } while (__builtin_expect(status != 0, 0)); |
| 168 | return prev; |
| 169 | } |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 170 | |
| 171 | /* Atomic increment - without any barriers |
| 172 | * This returns the old value |
| 173 | */ |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 174 | __ATOMIC_INLINE__ int32_t |
| 175 | __bionic_atomic_inc(volatile int32_t* ptr) |
| 176 | { |
| 177 | int32_t prev, tmp, status; |
| 178 | do { |
| 179 | __asm__ __volatile__ ( |
| 180 | __ATOMIC_SWITCH_TO_ARM |
| 181 | "ldrex %0, [%4]\n" |
| 182 | "add %1, %0, #1\n" |
| 183 | "strex %2, %1, [%4]" |
| 184 | __ATOMIC_SWITCH_TO_THUMB |
| 185 | : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr) |
| 186 | : "r" (ptr) |
| 187 | : __ATOMIC_CLOBBERS "cc"); |
| 188 | } while (__builtin_expect(status != 0, 0)); |
| 189 | return prev; |
| 190 | } |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 191 | |
| 192 | /* Atomic decrement - without any barriers |
| 193 | * This returns the old value. |
| 194 | */ |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 195 | __ATOMIC_INLINE__ int32_t |
| 196 | __bionic_atomic_dec(volatile int32_t* ptr) |
| 197 | { |
| 198 | int32_t prev, tmp, status; |
| 199 | do { |
| 200 | __asm__ __volatile__ ( |
| 201 | __ATOMIC_SWITCH_TO_ARM |
| 202 | "ldrex %0, [%4]\n" |
| 203 | "sub %1, %0, #1\n" |
| 204 | "strex %2, %1, [%4]" |
| 205 | __ATOMIC_SWITCH_TO_THUMB |
| 206 | : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr) |
| 207 | : "r" (ptr) |
| 208 | : __ATOMIC_CLOBBERS "cc"); |
| 209 | } while (__builtin_expect(status != 0, 0)); |
| 210 | return prev; |
| 211 | } |
David 'Digit' Turner | e31bfae | 2011-11-15 15:47:02 +0100 | [diff] [blame] | 212 | |
| 213 | #endif /* SYS_ATOMICS_ARM_H */ |