Reserve bits in the lock word for read barriers.

This prepares for the CC collector to use the standard object header
model by storing the read barrier state in the lock word.

Bug: 19355854
Bug: 12687968
Change-Id: Ia7585662dd2cebf0479a3e74f734afe5059fb70f
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index fec1ce5..aff3880 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -469,26 +469,33 @@
 .Lretry_lock:
     ldr    r2, [r9, #THREAD_ID_OFFSET]
     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   r1, .Lnot_unlocked         @ already thin locked
-    @ unlocked case - r2 holds thread id with count of 0
+    mov    r3, r1
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    cbnz   r3, .Lnot_unlocked         @ already thin locked
+    @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
+    orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   r3, .Lstrex_fail           @ store failed, retry
+    cbnz   r3, .Llock_strex_fail      @ store failed, retry
     dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
     bx lr
-.Lstrex_fail:
-    b .Lretry_lock                    @ unlikely forward branch, need to reload and recheck r1/r2
-.Lnot_unlocked:
-    lsr    r3, r1, 30
+.Lnot_unlocked:  @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
+    lsr    r3, r1, LOCK_WORD_STATE_SHIFT
     cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, go slow path
     eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r2, r2                     @ zero top 16 bits
     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
                                       @ else contention, go to slow path
-    add    r2, r1, #65536             @ increment count in lock word placing in r2 for storing
-    lsr    r1, r2, 30                 @ if either of the top two bits are set, we overflowed.
-    cbnz   r1, .Lslow_lock            @ if we overflow the count go slow path
-    str    r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+    mov    r3, r1                     @ copy the lock word to check count overflow.
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits.
+    add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
+    lsr    r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  @ if either of the upper two bits (28-29) are set, we overflowed.
+    cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
+    add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
+    strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
+    cbnz   r3, .Llock_strex_fail      @ strex failed, retry
     bx lr
+.Llock_strex_fail:
+    b      .Lretry_lock               @ retry
 .Lslow_lock:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
@@ -505,23 +512,46 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     cbz    r0, .Lslow_unlock
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
     ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsr    r2, r1, 30
+#else
+    ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ Need to use atomic instructions for read barrier
+#endif
+    lsr    r2, r1, #LOCK_WORD_STATE_SHIFT
     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
     ldr    r2, [r9, #THREAD_ID_OFFSET]
-    eor    r3, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    mov    r3, r1                     @ copy lock word to check thread id equality
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
     uxth   r3, r3                     @ zero top 16 bits
     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
-    cmp    r1, #65536
+    mov    r3, r1                     @ copy lock word to detect transition to unlocked
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
+    cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
     bpl    .Lrecursive_thin_unlock
-    @ transition to unlocked, r3 holds 0
+    @ transition to unlocked
+    mov    r3, r1
+    and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK  @ r3: zero except for the preserved read barrier bits
     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
+#ifndef USE_READ_BARRIER
     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
+    cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
+#endif
     bx     lr
-.Lrecursive_thin_unlock:
-    sub    r1, r1, #65536
+.Lrecursive_thin_unlock:  @ r1: original lock word
+    sub    r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ decrement count
+#ifndef USE_READ_BARRIER
     str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+    strex  r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
+    cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
+#endif
     bx     lr
+.Lunlock_strex_fail:
+    b      .Lretry_unlock             @ retry
 .Lslow_unlock:
     @ save callee saves in case exception allocation triggers GC
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2