ARM/AOT: Allow 16-bit LDR for Baker read barrier loads.
Test: m test-art-target-gtest
Test: testrunner.py --target on Nexus 6P.
Test: testrunner.py --target on Nexus 6P with heap poisoning enabled.
Test: Repeat the above tests with ART_USE_OLD_ARM_BACKEND=true.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: I458f2ec5fe9abead4db06c7595d992945096fb68
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index ced52ff..a98aedf 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -18,6 +18,7 @@
#include "arch/arm/asm_support_arm.h"
#include "art_method.h"
+#include "base/bit_utils.h"
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "lock_word.h"
@@ -112,12 +113,22 @@
// Check that the next instruction matches the expected LDR.
switch (kind) {
case BakerReadBarrierKind::kField: {
- DCHECK_GE(code->size() - literal_offset, 8u);
- uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
- // LDR (immediate) with correct base_reg.
- CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
- const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
- CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+ // LDR (immediate), encoding T3, with correct base_reg.
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ } else {
+ DCHECK_GE(code->size() - literal_offset, 6u);
+ uint32_t next_insn = GetInsn16(code, literal_offset + 4u);
+ // LDR (immediate), encoding T1, with correct base_reg.
+ CheckValidReg(next_insn & 0x7u); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+ }
break;
}
case BakerReadBarrierKind::kArray: {
@@ -131,11 +142,20 @@
break;
}
case BakerReadBarrierKind::kGcRoot: {
- DCHECK_GE(literal_offset, 4u);
- uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
- // LDR (immediate) with correct root_reg.
- const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
- CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
+ // LDR (immediate), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ } else {
+ DCHECK_GE(literal_offset, 2u);
+ uint32_t prev_insn = GetInsn16(code, literal_offset - 2u);
+ // LDR (immediate), encoding T1, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+ }
break;
}
default:
@@ -160,7 +180,8 @@
static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
vixl::aarch32::Register base_reg,
vixl::aarch32::MemOperand& lock_word,
- vixl::aarch32::Label* slow_path) {
+ vixl::aarch32::Label* slow_path,
+ int32_t raw_ldr_offset) {
using namespace vixl::aarch32; // NOLINT(build/namespaces)
// Load the lock word containing the rb_state.
__ Ldr(ip, lock_word);
@@ -169,14 +190,7 @@
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
__ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
__ B(ne, slow_path, /* is_far_target */ false);
- static_assert(
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
- "Field and array LDR offsets must be the same to reuse the same code.");
- // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ __ Add(lr, lr, raw_ldr_offset);
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
@@ -199,6 +213,7 @@
CheckValidReg(base_reg.GetCode());
Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
CheckValidReg(holder_reg.GetCode());
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
// If base_reg differs from holder_reg, the offset was too large and we must have
@@ -210,16 +225,30 @@
}
vixl::aarch32::Label slow_path;
MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
- EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
- BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET;
- MemOperand ldr_half_address(lr, ldr_offset + 2);
- __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
- __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
- __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ raw_ldr_offset;
+ Register ep_reg(kBakerCcEntrypointRegister);
+ if (width == BakerReadBarrierWidth::kWide) {
+ MemOperand ldr_half_address(lr, ldr_offset + 2);
+ __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
+ __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
+ __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ } else {
+ MemOperand ldr_address(lr, ldr_offset);
+ __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
+ __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
+ ep_reg, // for narrow LDR.
+ Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+ __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
+ __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
+ }
// Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
- __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint.
+ __ Bx(ep_reg); // Jump to the entrypoint.
if (holder_reg.Is(base_reg)) {
// Add null check slow path. The stack map is at the address pointed to by LR.
__ Bind(&throw_npe);
@@ -233,6 +262,7 @@
Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(base_reg.GetCode());
DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data));
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
vixl::aarch32::Label slow_path;
@@ -240,10 +270,11 @@
mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
- EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ raw_ldr_offset;
MemOperand ldr_address(lr, ldr_offset + 2);
__ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
// i.e. Rm+32 because the scale in imm2 is 2.
@@ -261,6 +292,7 @@
Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(root_reg.GetCode());
DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
vixl::aarch32::Label return_label, not_marked, forwarding_address;
@@ -280,7 +312,10 @@
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
// to art_quick_read_barrier_mark_introspection_gc_roots.
Register ep_reg(kBakerCcEntrypointRegister);
- __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+ int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+ __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
__ Mov(ip, root_reg);
__ Bx(ep_reg);
__ Bind(&forwarding_address);
@@ -344,7 +379,7 @@
void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
DCHECK_LE(offset + 4u, code->size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
uint8_t* addr = &(*code)[offset];
addr[0] = (value >> 16) & 0xff;
addr[1] = (value >> 24) & 0xff;
@@ -354,7 +389,7 @@
uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
DCHECK_LE(offset + 4u, code.size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
const uint8_t* addr = &code[offset];
return
(static_cast<uint32_t>(addr[0]) << 16) +
@@ -369,5 +404,18 @@
return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
}
+uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) {
+ DCHECK_LE(offset + 2u, code.size());
+ DCHECK_ALIGNED(offset, 2u);
+ const uint8_t* addr = &code[offset];
+ return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8);
+}
+
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) {
+ static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+ return GetInsn16(ArrayRef<const uint8_t>(*code), offset);
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 7fad245..7e787d2 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -35,26 +35,37 @@
public:
static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
- static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
CheckValidReg(base_reg);
CheckValidReg(holder_reg);
+ DCHECK(!narrow || base_reg < 8u) << base_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
BakerReadBarrierFirstRegField::Encode(base_reg) |
- BakerReadBarrierSecondRegField::Encode(holder_reg);
+ BakerReadBarrierSecondRegField::Encode(holder_reg) |
+ BakerReadBarrierWidthField::Encode(width);
}
static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
CheckValidReg(base_reg);
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
BakerReadBarrierFirstRegField::Encode(base_reg) |
- BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
}
- static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
CheckValidReg(root_reg);
+ DCHECK(!narrow || root_reg < 8u) << root_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
BakerReadBarrierFirstRegField::Encode(root_reg) |
- BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(width);
}
explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
@@ -86,6 +97,12 @@
kLast
};
+ enum class BakerReadBarrierWidth : uint8_t {
+ kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+ kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+ kLast
+ };
+
static constexpr size_t kBitsForBakerReadBarrierKind =
MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
static constexpr size_t kBitsForRegister = 4u;
@@ -95,9 +112,14 @@
BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
using BakerReadBarrierSecondRegField =
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+ static constexpr size_t kBitsForBakerReadBarrierWidth =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+ using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth,
+ kBitsForBakerReadBarrierKind + 2 * kBitsForRegister,
+ kBitsForBakerReadBarrierWidth>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister);
+ DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg;
}
void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
@@ -108,6 +130,11 @@
template <typename Vector>
static uint32_t GetInsn32(Vector* code, uint32_t offset);
+ static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset);
+
+ template <typename Vector>
+ static uint32_t GetInsn16(Vector* code, uint32_t offset);
+
friend class Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 2e28349..af5fa40 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -52,6 +52,9 @@
// BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+ // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn.
+ static constexpr uint32_t kLdrInsn = 0x6800u;
+
// LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
@@ -223,9 +226,11 @@
void TestStringReference(uint32_t string_offset);
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
- std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
+ std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
- 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow));
ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -237,9 +242,9 @@
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
- std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
+ std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) {
LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
- 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow));
ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -260,7 +265,8 @@
(static_cast<uint32_t>(output_[offset + 1]) << 8);
}
- void TestBakerField(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -568,7 +574,7 @@
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
-void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
+void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
8, 9, 10, 11, // IP, SP, LR and PC are reserved.
@@ -584,8 +590,8 @@
const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
ArrayRef<const uint8_t> code(raw_code);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ false);
const LinkerPatch patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
};
@@ -608,7 +614,8 @@
ASSERT_TRUE(
CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
- std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false);
ASSERT_GT(output_.size(), thunk_offset);
ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
@@ -666,15 +673,131 @@
}
}
-#define TEST_BAKER_FIELD(offset, ref_reg) \
- TEST_F(Thumb2RelativePatcherTest, \
- BakerOffset##offset##_##ref_reg) { \
- TestBakerField(offset, ref_reg); \
+void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 32u);
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ true);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check uses the correct register, i.e. holder_reg.
+ if (holder_reg < 8) {
+ ASSERT_GE(output_.size() - gray_check_offset, 2u);
+ ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ gray_check_offset +=2u;
+ } else {
+ ASSERT_GE(output_.size() - gray_check_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ gray_check_offset += 6u;
+ }
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ ASSERT_GE(output_.size() - gray_check_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (holder_reg << 16) |
+ (/* IP */ 12 << 12) |
+ mirror::Object::MonitorOffset().Uint32Value();
+ ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+ }
+}
+
+#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetWide##offset##_##ref_reg) { \
+ TestBakerFieldWide(offset, ref_reg); \
}
-TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7)
-TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11)
+
+#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetNarrow##offset##_##ref_reg) { \
+ TestBakerFieldNarrow(offset, ref_reg); \
+ }
+
+TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7)
TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
// One thunk in the middle with maximum distance branches to it from both sides.
@@ -682,8 +805,8 @@
constexpr uint32_t kLiteralOffset1 = 6u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -710,7 +833,8 @@
// - thunk size and method 3 pre-header, rounded up (padding in between if needed)
// - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
// - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
- size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
size_t filler2_size =
1 * MB - (kLiteralOffset2 + kPcAdjustment)
- RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
@@ -749,8 +873,8 @@
constexpr uint32_t kLiteralOffset1 = 4u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -779,8 +903,8 @@
constexpr uint32_t kLiteralOffset1 = 6u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -809,7 +933,8 @@
// - thunk size and method 3 pre-header, rounded up (padding in between if needed)
// - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
// - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
- size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
size_t filler2_size =
1 * MB - (kReachableFromOffset2 + kPcAdjustment)
- RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
@@ -929,7 +1054,7 @@
}
}
-TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
8, 9, 10, 11, // IP, SP, LR and PC are reserved.
@@ -945,7 +1070,8 @@
ArrayRef<const uint8_t> code(raw_code);
const LinkerPatch patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(
- kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)),
};
AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
}
@@ -962,7 +1088,67 @@
ASSERT_EQ(kMethodCodeSize, expected_code.size());
EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
- std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check uses the correct register, i.e. root_reg.
+ if (root_reg < 8) {
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ } else {
+ ASSERT_GE(output_.size() - thunk_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ }
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ // Not appplicable to high registers.
+ };
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 2u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true);
ASSERT_GT(output_.size(), thunk_offset);
ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
@@ -973,14 +1159,8 @@
}
// Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
- if (root_reg < 8) {
- ASSERT_GE(output_.size() - thunk_offset, 2u);
- ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
- } else {
- ASSERT_GE(output_.size() - thunk_offset, 6u);
- ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
- ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
- }
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
// Do not check the rest of the implementation.
// The next thunk follows on the next aligned offset.
@@ -998,7 +1178,8 @@
patches.reserve(num_patches);
const uint32_t ldr =
kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
- uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false);
for (size_t i = 0; i != num_patches; ++i) {
PushBackInsn(&code, ldr);
PushBackInsn(&code, kBneWPlus0);
@@ -1067,7 +1248,7 @@
// this pushes the first GC root thunk's pending MaxNextOffset() before the method call
// thunk's pending MaxNextOffset() which needs to be adjusted.
ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
- CompileBakerGcRootThunk(/* root_reg */ 0).size());
+ CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size());
static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
@@ -1080,9 +1261,9 @@
ldr2, kBneWPlus0, // Second GC root LDR with read barrier.
});
uint32_t encoded_data1 =
- Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false);
uint32_t encoded_data2 =
- Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false);
const LinkerPatch last_method_patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index d1ab410..02a5b1e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -100,7 +100,7 @@
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 30u && reg != 16u && reg != 17u);
+ DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg;
}
void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 35dccd6..8650aee 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -90,13 +90,17 @@
}
static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
- DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
__ BindTrackedLabel(bne_label);
Label placeholder_label;
__ b(&placeholder_label, NE); // Placeholder, patched at link-time.
__ Bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+ return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -8049,8 +8053,9 @@
// return_address:
CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8063,16 +8068,18 @@
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8172,10 +8179,12 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register ref_reg = ref.AsRegister<Register>();
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = temp.AsRegister<Register>();
@@ -8183,10 +8192,14 @@
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
CheckLastTempIsBakerCcEntrypointRegister(instruction);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8199,19 +8212,20 @@
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- Register ref_reg = ref.AsRegister<Register>();
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, ref_reg, base, offset);
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8257,7 +8271,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8282,15 +8296,15 @@
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index ed0a64c..54aa03c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -6094,7 +6094,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6189,7 +6189,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 8417f84..b2e0a91 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -124,6 +124,10 @@
__ bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+ return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
class EmitAdrCode {
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
@@ -8158,8 +8162,9 @@
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+ root_reg.GetCode(), narrow);
vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8174,15 +8179,16 @@
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8283,10 +8289,12 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
vixl32::Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = RegisterFrom(temp);
@@ -8294,12 +8302,15 @@
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(),
- obj.GetCode());
+ base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8316,19 +8327,24 @@
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
- __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // Note: We need a Wide NEG for the unpoisoning.
+ // Note: We need a specific width for the unpoisoning NEG.
if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8374,7 +8390,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8404,9 +8420,7 @@
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
// Note: We need a Wide NEG for the unpoisoning.
@@ -8414,6 +8428,8 @@
__ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5c36110..2ff9018 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -924,9 +924,11 @@
class ScopedForce32Bit {
public:
- explicit ScopedForce32Bit(Thumb2Assembler* assembler)
+ explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true)
: assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) {
- assembler->Force32Bit();
+ if (force) {
+ assembler->Force32Bit();
+ }
}
~ScopedForce32Bit() {