Add forwarding address checks for X86, arm, arm64

Added to READ_BARRIER_MARK_REG.

Bug: 30162165

Test: test-art-host, test-art-target

Change-Id: I15cf0d51ed3d22fa401e80ffac3877d61593527c
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bf70c55..0135260 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1999,11 +1999,17 @@
     // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
     ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
     tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
-    beq .Lslow_rb_\name
+    beq .Lnot_marked_rb_\name
     // Already marked, return right away.
 .Lret_rb_\name:
     bx lr
 
+.Lnot_marked_rb_\name:
+    // Test that both the forwarding state bits are 1.
+    mvn ip, ip
+    tst ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+    beq .Lret_forwarding_address\name
+
 .Lslow_rb_\name:
     // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here.
     push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
@@ -2064,6 +2070,12 @@
     .cfi_restore ip
     .cfi_restore lr
     bx lr
+.Lret_forwarding_address\name:
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    mvn ip, ip
+    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    bx lr
 END \name
 .endm
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 483cee3..d806715 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2539,10 +2539,17 @@
      */
     // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
     ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_rb_\name
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
 .Lret_rb_\name:
     ret
+.Lnot_marked_rb_\name:
+    // Check if the top two bits are one, if this is the case it is a forwarding address.
+    mvn wIP0, wIP0
+    cmp wzr, wIP0, lsr #30
+    beq .Lret_forwarding_address\name
 .Lslow_rb_\name:
+    // We must not clobber IP0 since art_quick_resolve_string makes a tail call here and relies on
+    // IP0 being restored.
     // Save all potentially live caller-save core registers.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368
     SAVE_TWO_REGS  x2,  x3, 16
@@ -2608,6 +2615,12 @@
     RESTORE_REG xLR, 360
     DECREASE_FRAME 368
     ret
+.Lret_forwarding_address\name:
+    mvn wIP0, wIP0
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    ret
 END \name
 .endm
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index f4f9a68..98739d3 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2155,8 +2155,15 @@
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH eax
+    mov MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    add LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if overflow, the only case where it overflows should be the forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    mov 0(%esp), %eax
     PUSH ecx
     PUSH edx
     PUSH ebx
@@ -2204,6 +2211,12 @@
     POP_REG_NE eax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    mov %eax, REG_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index fc549ec..185e55e 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2279,9 +2279,10 @@
     PUSH rax
     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
     addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
-    // Jump if overflow, the only case where it overflows should be the forwarding address one.
+    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
+    // forwarding address one.
     // Taken ~25% of the time.
-    jnae .Lret_overflow\name
+    jnae .Lret_forwarding_address\name
 
     // Save all potentially live caller-save core registers.
     movq 0(%rsp), %rax
@@ -2349,7 +2350,7 @@
     POP_REG_NE rax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
-.Lret_overflow\name:
+.Lret_forwarding_address\name:
     // The overflow cleared the top bits.
     sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
     movq %rax, REG_VAR(reg)