Reserve bits in the lock word for read barriers.
This prepares for the CC collector to use the standard object header
model by storing the read barrier state in the lock word.
Bug: 19355854
Bug: 12687968
Change-Id: Ia7585662dd2cebf0479a3e74f734afe5059fb70f
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index fec1ce5..aff3880 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -469,26 +469,33 @@
.Lretry_lock:
ldr r2, [r9, #THREAD_ID_OFFSET]
ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- cbnz r1, .Lnot_unlocked @ already thin locked
- @ unlocked case - r2 holds thread id with count of 0
+ mov r3, r1
+ and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ cbnz r3, .Lnot_unlocked @ already thin locked
+ @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
+ orr r2, r1, r2 @ r2 holds thread id with count of 0 with preserved read barrier bits
strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- cbnz r3, .Lstrex_fail @ store failed, retry
+ cbnz r3, .Llock_strex_fail @ store failed, retry
dmb ish @ full (LoadLoad|LoadStore) memory barrier
bx lr
-.Lstrex_fail:
- b .Lretry_lock @ unlikely forward branch, need to reload and recheck r1/r2
-.Lnot_unlocked:
- lsr r3, r1, 30
+.Lnot_unlocked: @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
+ lsr r3, r1, LOCK_WORD_STATE_SHIFT
cbnz r3, .Lslow_lock @ if either of the top two bits are set, go slow path
eor r2, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId()
uxth r2, r2 @ zero top 16 bits
cbnz r2, .Lslow_lock @ lock word and self thread id's match -> recursive lock
@ else contention, go to slow path
- add r2, r1, #65536 @ increment count in lock word placing in r2 for storing
- lsr r1, r2, 30 @ if either of the top two bits are set, we overflowed.
- cbnz r1, .Lslow_lock @ if we overflow the count go slow path
- str r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+ mov r3, r1 @ copy the lock word to check count overflow.
+ and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits.
+ add r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count in lock word placing in r2 to check overflow
+ lsr r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT @ if either of the upper two bits (28-29) are set, we overflowed.
+ cbnz r3, .Lslow_lock @ if we overflow the count go slow path
+ add r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count for real
+ strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
+ cbnz r3, .Llock_strex_fail @ strex failed, retry
bx lr
+.Llock_strex_fail:
+ b .Lretry_lock @ retry
.Lslow_lock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2 @ save callee saves in case we block
mov r1, r9 @ pass Thread::Current
@@ -505,23 +512,46 @@
.extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object
cbz r0, .Lslow_unlock
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
ldr r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- lsr r2, r1, 30
+#else
+ ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ Need to use atomic instructions for read barrier
+#endif
+ lsr r2, r1, #LOCK_WORD_STATE_SHIFT
cbnz r2, .Lslow_unlock @ if either of the top two bits are set, go slow path
ldr r2, [r9, #THREAD_ID_OFFSET]
- eor r3, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId()
+ mov r3, r1 @ copy lock word to check thread id equality
+ and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ eor r3, r3, r2 @ lock_word.ThreadId() ^ self->ThreadId()
uxth r3, r3 @ zero top 16 bits
cbnz r3, .Lslow_unlock @ do lock word and self thread id's match?
- cmp r1, #65536
+ mov r3, r1 @ copy lock word to detect transition to unlocked
+ and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits
+ cmp r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
bpl .Lrecursive_thin_unlock
- @ transition to unlocked, r3 holds 0
+ @ transition to unlocked
+ mov r3, r1
+ and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK @ r3: zero except for the preserved read barrier bits
dmb ish @ full (LoadStore|StoreStore) memory barrier
+#ifndef USE_READ_BARRIER
str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+ strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
+ cbnz r2, .Lunlock_strex_fail @ store failed, retry
+#endif
bx lr
-.Lrecursive_thin_unlock:
- sub r1, r1, #65536
+.Lrecursive_thin_unlock: @ r1: original lock word
+ sub r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ decrement count
+#ifndef USE_READ_BARRIER
str r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+ strex r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
+ cbnz r2, .Lunlock_strex_fail @ store failed, retry
+#endif
bx lr
+.Lunlock_strex_fail:
+ b .Lretry_unlock @ retry
.Lslow_unlock:
@ save callee saves in case exception allocation triggers GC
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 770073b5..382a4c2 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1000,25 +1000,33 @@
.Lretry_lock:
ldr w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
ldxr w1, [x4]
- cbnz w1, .Lnot_unlocked // already thin locked
+ mov x3, x1
+ and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ cbnz w3, .Lnot_unlocked // already thin locked
+ // unlocked case - x1: original lock word that's zero except for the read barrier bits.
+ orr x2, x1, x2 // x2 holds thread id with count of 0 with preserved read barrier bits
stxr w3, w2, [x4]
- cbnz w3, .Lstrex_fail // store failed, retry
+ cbnz w3, .Llock_stxr_fail // store failed, retry
dmb ishld // full (LoadLoad|LoadStore) memory barrier
ret
-.Lstrex_fail:
- b .Lretry_lock // unlikely forward branch, need to reload and recheck r1/r2
-.Lnot_unlocked:
- lsr w3, w1, 30
+.Lnot_unlocked: // x1: original lock word
+ lsr w3, w1, LOCK_WORD_STATE_SHIFT
cbnz w3, .Lslow_lock // if either of the top two bits are set, go slow path
eor w2, w1, w2 // lock_word.ThreadId() ^ self->ThreadId()
uxth w2, w2 // zero top 16 bits
cbnz w2, .Lslow_lock // lock word and self thread id's match -> recursive lock
// else contention, go to slow path
- add w2, w1, #65536 // increment count in lock word placing in w2 for storing
- lsr w1, w2, 30 // if either of the top two bits are set, we overflowed.
- cbnz w1, .Lslow_lock // if we overflow the count go slow path
- str w2, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] // no need for stxr as we hold the lock
+ mov x3, x1 // copy the lock word to check count overflow.
+ and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits.
+ add w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count in lock word placing in w2 to check overflow
+ lsr w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT // if either of the upper two bits (28-29) are set, we overflowed.
+ cbnz w3, .Lslow_lock // if we overflow the count go slow path
+ add w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count for real
+ stxr w3, w2, [x4]
+ cbnz w3, .Llock_stxr_fail // store failed, retry
ret
+.Llock_stxr_fail:
+ b .Lretry_lock // retry
.Lslow_lock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case we block
mov x1, xSELF // pass Thread::Current
@@ -1036,23 +1044,47 @@
.extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object
cbz x0, .Lslow_unlock
- ldr w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- lsr w2, w1, 30
+ add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET // exclusive load/store has no immediate anymore
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
+ ldr w1, [x4]
+#else
+ ldxr w1, [x4] // Need to use atomic instructions for read barrier
+#endif
+ lsr w2, w1, LOCK_WORD_STATE_SHIFT
cbnz w2, .Lslow_unlock // if either of the top two bits are set, go slow path
ldr w2, [xSELF, #THREAD_ID_OFFSET]
- eor w3, w1, w2 // lock_word.ThreadId() ^ self->ThreadId()
+ mov x3, x1 // copy lock word to check thread id equality
+ and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ eor w3, w3, w2 // lock_word.ThreadId() ^ self->ThreadId()
uxth w3, w3 // zero top 16 bits
cbnz w3, .Lslow_unlock // do lock word and self thread id's match?
- cmp w1, #65536
+ mov x3, x1 // copy lock word to detect transition to unlocked
+ and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits
+ cmp w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
bpl .Lrecursive_thin_unlock
- // transition to unlocked, w3 holds 0
+ // transition to unlocked
+ mov x3, x1
+ and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK // w3: zero except for the preserved read barrier bits
dmb ish // full (LoadStore|StoreStore) memory barrier
- str w3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#ifndef USE_READ_BARRIER
+ str w3, [x4]
+#else
+ stxr w2, w3, [x4] // Need to use atomic instructions for read barrier
+ cbnz w2, .Lunlock_stxr_fail // store failed, retry
+#endif
ret
-.Lrecursive_thin_unlock:
- sub w1, w1, #65536
- str w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+.Lrecursive_thin_unlock: // w1: original lock word
+ sub w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count
+#ifndef USE_READ_BARRIER
+ str w1, [x4]
+#else
+ stxr w2, w1, [x4] // Need to use atomic instructions for read barrier
+ cbnz w2, .Lunlock_stxr_fail // store failed, retry
+#endif
ret
+.Lunlock_stxr_fail:
+ b .Lretry_unlock // retry
.Lslow_unlock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case exception allocation triggers GC
mov x1, xSELF // pass Thread::Current
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index c2acdd1..c437428 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -922,29 +922,39 @@
jz .Lslow_lock
.Lretry_lock:
movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word
- test LITERAL(0xC0000000), %ecx // test the 2 high bits.
+ test LITERAL(LOCK_WORD_STATE_MASK), %ecx // test the 2 high bits.
jne .Lslow_lock // slow path if either of the two high bits are set.
- movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id
+ movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
test %ecx, %ecx
jnz .Lalready_thin // lock word contains a thin lock
- // unlocked case - %edx holds thread id with count of 0
+ // unlocked case - edx: original lock word, eax: obj.
movl %eax, %ecx // remember object in case of retry
- xor %eax, %eax // eax == 0 for comparison with lock word in cmpxchg
- lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
- jnz .Lcmpxchg_fail // cmpxchg failed retry
+ movl %edx, %eax // eax: lock word zero except for read barrier bits.
+ movl %fs:THREAD_ID_OFFSET, %edx // load thread id.
+ or %eax, %edx // edx: thread id with count of 0 + read barrier bits.
+ lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val.
+ jnz .Llock_cmpxchg_fail // cmpxchg failed retry
ret
-.Lcmpxchg_fail:
- movl %ecx, %eax // restore eax
- jmp .Lretry_lock
-.Lalready_thin:
+.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), eax: obj.
+ movl %fs:THREAD_ID_OFFSET, %ecx // ecx := thread id
cmpw %cx, %dx // do we hold the lock already?
jne .Lslow_lock
- addl LITERAL(65536), %ecx // increment recursion count
- test LITERAL(0xC0000000), %ecx // overflowed if either of top two bits are set
+ movl %edx, %ecx // copy the lock word to check count overflow.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check.
+ test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set.
jne .Lslow_lock // count overflowed so go slow
- // update lockword, cmpxchg not necessary as we hold lock
- movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+ movl %eax, %ecx // save obj to use eax for cmpxchg.
+ movl %edx, %eax // copy the lock word as the old val for cmpxchg.
+ addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real.
+ // update lockword, cmpxchg necessary for read barrier bits.
+ lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val.
+ jnz .Llock_cmpxchg_fail // cmpxchg failed retry
ret
+.Llock_cmpxchg_fail:
+ movl %ecx, %eax // restore eax
+ jmp .Lretry_lock
.Lslow_lock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
// Outgoing argument set up
@@ -963,20 +973,43 @@
DEFINE_FUNCTION art_quick_unlock_object
testl %eax, %eax // null check object/eax
jz .Lslow_unlock
+.Lretry_unlock:
movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word
movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id
- test LITERAL(0xC0000000), %ecx
+ test LITERAL(LOCK_WORD_STATE_MASK), %ecx
jnz .Lslow_unlock // lock word contains a monitor
cmpw %cx, %dx // does the thread id match?
jne .Lslow_unlock
- cmpl LITERAL(65536), %ecx
+ movl %ecx, %edx // copy the lock word to detect new count of 0.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits.
+ cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
jae .Lrecursive_thin_unlock
- movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+ // update lockword, cmpxchg necessary for read barrier bits.
+ movl %eax, %edx // edx: obj
+ movl %ecx, %eax // eax: old lock word.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits.
+#ifndef USE_READ_BARRIER
+ movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+#else
+ lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val.
+ jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry
+#endif
ret
-.Lrecursive_thin_unlock:
- subl LITERAL(65536), %ecx
- mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
+.Lrecursive_thin_unlock: // ecx: original lock word, eax: obj
+ // update lockword, cmpxchg necessary for read barrier bits.
+ movl %eax, %edx // edx: obj
+ movl %ecx, %eax // eax: old lock word.
+ subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // ecx: new lock word with decremented count.
+#ifndef USE_READ_BARRIER
+ mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+#else
+ lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val.
+ jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry
+#endif
ret
+.Lunlock_cmpxchg_fail: // edx: obj
+ movl %edx, %eax // restore eax
+ jmp .Lretry_unlock
.Lslow_unlock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
// Outgoing argument set up
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index c865541..9b6b367 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -955,24 +955,33 @@
jz .Lslow_lock
.Lretry_lock:
movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word.
- test LITERAL(0xC0000000), %ecx // Test the 2 high bits.
+ test LITERAL(LOCK_WORD_STATE_MASK), %ecx // Test the 2 high bits.
jne .Lslow_lock // Slow path if either of the two high bits are set.
- movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id
+ movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
test %ecx, %ecx
jnz .Lalready_thin // Lock word contains a thin lock.
- // unlocked case - %edx holds thread id with count of 0
- xor %eax, %eax // eax == 0 for comparison with lock word in cmpxchg
+ // unlocked case - edx: original lock word, edi: obj.
+ movl %edx, %eax // eax: lock word zero except for read barrier bits.
+ movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id
+ or %eax, %edx // edx: thread id with count of 0 + read barrier bits.
lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
jnz .Lretry_lock // cmpxchg failed retry
ret
-.Lalready_thin:
+.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
+ movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id
cmpw %cx, %dx // do we hold the lock already?
jne .Lslow_lock
- addl LITERAL(65536), %ecx // increment recursion count
- test LITERAL(0xC0000000), %ecx // overflowed if either of top two bits are set
+ movl %edx, %ecx // copy the lock word to check count overflow.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits.
+ addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count
+ test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set
jne .Lslow_lock // count overflowed so go slow
- // update lockword, cmpxchg not necessary as we hold lock
- movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+ movl %edx, %eax // copy the lock word as the old val for cmpxchg.
+ addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real.
+ // update lockword, cmpxchg necessary for read barrier bits.
+ lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val.
+ jnz .Lretry_lock // cmpxchg failed retry
ret
.Lslow_lock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -985,19 +994,37 @@
DEFINE_FUNCTION art_quick_unlock_object
testl %edi, %edi // null check object/edi
jz .Lslow_unlock
+.Lretry_unlock:
movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word
movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id
- test LITERAL(0xC0000000), %ecx
+ test LITERAL(LOCK_WORD_STATE_MASK), %ecx
jnz .Lslow_unlock // lock word contains a monitor
cmpw %cx, %dx // does the thread id match?
jne .Lslow_unlock
- cmpl LITERAL(65536), %ecx
+ movl %ecx, %edx // copy the lock word to detect new count of 0.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits.
+ cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
jae .Lrecursive_thin_unlock
- movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+ // update lockword, cmpxchg necessary for read barrier bits.
+ movl %ecx, %eax // eax: old lock word.
+ andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits.
+#ifndef USE_READ_BARRIER
+ movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+#else
+ lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val.
+ jnz .Lretry_unlock // cmpxchg failed retry
+#endif
ret
-.Lrecursive_thin_unlock:
- subl LITERAL(65536), %ecx
+.Lrecursive_thin_unlock: // ecx: original lock word, edi: obj
+ // update lockword, cmpxchg necessary for read barrier bits.
+ movl %ecx, %eax // eax: old lock word.
+ subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
+#ifndef USE_READ_BARRIER
mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+#else
+ lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val.
+ jnz .Lretry_unlock // cmpxchg failed retry
+#endif
ret
.Lslow_unlock:
SETUP_REFS_ONLY_CALLEE_SAVE_FRAME