MIPS32: Improve method invocation
Improvements include:
- CodeGeneratorMIPS::GenerateStaticOrDirectCall() supports:
- MethodLoadKind::kDirectAddressWithFixup (via literals)
- CodePtrLocation::kCallDirectWithFixup (via literals)
- MethodLoadKind::kDexCachePcRelative
- 32-bit literals to support the above (not ready for general-
purpose applications yet because RA is not saved in leaf
methods, but is clobbered on MIPS32R2 when simulating
PC-relative addressing (MIPS32R6 is OK because it has
PC-relative addressing with the lwpc instruction))
- shorter instruction sequences for recursive static/direct
calls
Tested:
- test-art-host-gtest
- test-art-target-gtest and test-art-target-run-test-optimizing on:
- MIPS32R2 QEMU
- CI20 board
- MIPS32R6 (2nd arch) QEMU
Change-Id: Id5b137ad32d5590487fd154c9a01d3b3e7e044ff
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ac93083..ebaf1c0 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -39,6 +39,7 @@
for (auto& exception_block : exception_blocks_) {
EmitExceptionPoll(&exception_block);
}
+ EmitLiterals();
PromoteBranches();
}
@@ -444,6 +445,12 @@
EmitI(0x25, rs, rt, imm16);
}
+void MipsAssembler::Lwpc(Register rs, uint32_t imm19) {
+ CHECK(IsR6());
+ CHECK(IsUint<19>(imm19)) << imm19;
+ EmitI21(0x3B, rs, (0x01 << 19) | imm19);
+}
+
void MipsAssembler::Lui(Register rt, uint16_t imm16) {
EmitI(0xf, static_cast<Register>(0), rt, imm16);
}
@@ -532,6 +539,10 @@
EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
}
+void MipsAssembler::Bal(uint16_t imm16) {
+ EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16);
+}
+
void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
EmitI(0x4, rs, rt, imm16);
}
@@ -624,6 +635,11 @@
EmitI26(0x32, imm26);
}
+void MipsAssembler::Balc(uint32_t imm26) {
+ CHECK(IsR6());
+ EmitI26(0x3A, imm26);
+}
+
void MipsAssembler::Jic(Register rt, uint16_t imm16) {
CHECK(IsR6());
EmitI(0x36, static_cast<Register>(0), rt, imm16);
@@ -1489,30 +1505,47 @@
type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
}
-void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) {
+void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) {
+ CHECK_EQ(is_call && is_literal, false);
OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
if (is_r6) {
// R6
- if (is_call) {
+ if (is_literal) {
+ CHECK(!IsResolved());
+ type_ = kR6Literal;
+ } else if (is_call) {
InitShortOrLong(offset_size, kR6Call, kR6LongCall);
- } else if (condition_ == kUncond) {
- InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
} else {
- if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
- // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
- type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
- } else {
- InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+ switch (condition_) {
+ case kUncond:
+ InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+ break;
+ case kCondEQZ:
+ case kCondNEZ:
+ // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+ type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+ break;
+ default:
+ InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+ break;
}
}
} else {
// R2
- if (is_call) {
+ if (is_literal) {
+ CHECK(!IsResolved());
+ type_ = kLiteral;
+ } else if (is_call) {
InitShortOrLong(offset_size, kCall, kLongCall);
- } else if (condition_ == kUncond) {
- InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
} else {
- InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+ switch (condition_) {
+ case kUncond:
+ InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+ break;
+ default:
+ InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+ break;
+ }
}
}
old_type_ = type_;
@@ -1544,14 +1577,14 @@
}
}
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call)
: old_location_(location),
location_(location),
target_(target),
lhs_reg_(0),
rhs_reg_(0),
condition_(kUncond) {
- InitializeType(false, is_r6);
+ InitializeType(is_call, /* is_literal */ false, is_r6);
}
MipsAssembler::Branch::Branch(bool is_r6,
@@ -1608,19 +1641,23 @@
// Branch condition is always true, make the branch unconditional.
condition_ = kUncond;
}
- InitializeType(false, is_r6);
+ InitializeType(/* is_call */ false, /* is_literal */ false, is_r6);
}
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg)
: old_location_(location),
location_(location),
- target_(target),
- lhs_reg_(indirect_reg),
- rhs_reg_(0),
+ target_(kUnresolved),
+ lhs_reg_(dest_reg),
+ rhs_reg_(base_reg),
condition_(kUncond) {
- CHECK_NE(indirect_reg, ZERO);
- CHECK_NE(indirect_reg, AT);
- InitializeType(true, is_r6);
+ CHECK_NE(dest_reg, ZERO);
+ if (is_r6) {
+ CHECK_EQ(base_reg, ZERO);
+ } else {
+ CHECK_NE(base_reg, ZERO);
+ }
+ InitializeType(/* is_call */ false, /* is_literal */ true, is_r6);
}
MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition(
@@ -1722,19 +1759,27 @@
case kUncondBranch:
case kCondBranch:
case kCall:
+ // R2 near literal.
+ case kLiteral:
// R6 short branches.
case kR6UncondBranch:
case kR6CondBranch:
case kR6Call:
+ // R6 near literal.
+ case kR6Literal:
return false;
// R2 long branches.
case kLongUncondBranch:
case kLongCondBranch:
case kLongCall:
+ // R2 far literal.
+ case kFarLiteral:
// R6 long branches.
case kR6LongUncondBranch:
case kR6LongCondBranch:
case kR6LongCall:
+ // R6 far literal.
+ case kR6FarLiteral:
return true;
}
UNREACHABLE();
@@ -1803,6 +1848,10 @@
case kCall:
type_ = kLongCall;
break;
+ // R2 near literal.
+ case kLiteral:
+ type_ = kFarLiteral;
+ break;
// R6 short branches.
case kR6UncondBranch:
type_ = kR6LongUncondBranch;
@@ -1813,6 +1862,10 @@
case kR6Call:
type_ = kR6LongCall;
break;
+ // R6 near literal.
+ case kR6Literal:
+ type_ = kR6FarLiteral;
+ break;
default:
// Note: 'type_' is already long.
break;
@@ -1820,14 +1873,26 @@
CHECK(IsLong());
}
-uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const {
+ switch (branch->GetType()) {
+ case Branch::kLiteral:
+ case Branch::kFarLiteral:
+ return GetLabelLocation(&pc_rel_base_label_);
+ default:
+ return branch->GetLocation();
+ }
+}
+
+uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) {
+ // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+ // `this->GetLocation()` for everything else.
// If the branch is still unresolved or already long, nothing to do.
if (IsLong() || !IsResolved()) {
return 0;
}
// Promote the short branch to long if the offset size is too small
- // to hold the distance between location_ and target_.
- if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+ // to hold the distance between location and target_.
+ if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) {
PromoteToLong();
uint32_t old_size = GetOldSize();
uint32_t new_size = GetSize();
@@ -1837,7 +1902,7 @@
// The following logic is for debugging/testing purposes.
// Promote some short branches to long when it's not really required.
if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
- int64_t distance = static_cast<int64_t>(target_) - location_;
+ int64_t distance = static_cast<int64_t>(target_) - location;
distance = (distance >= 0) ? distance : -distance;
if (distance >= max_short_distance) {
PromoteToLong();
@@ -1854,12 +1919,26 @@
return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
}
-uint32_t MipsAssembler::Branch::GetOffset() const {
+uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const {
+ switch (branch->GetType()) {
+ case Branch::kLiteral:
+ case Branch::kFarLiteral:
+ return GetLabelLocation(&pc_rel_base_label_);
+ default:
+ return branch->GetOffsetLocation() +
+ Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t);
+ }
+}
+
+uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const {
+ // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+ // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)`
+ // for everything else.
CHECK(IsResolved());
uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
// Calculate the byte distance between instructions and also account for
// different PC-relative origins.
- uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+ uint32_t offset = target_ - location;
// Prepare the offset for encoding into the instruction(s).
offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
return offset;
@@ -1906,7 +1985,7 @@
label->BindTo(bound_pc);
}
-uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const {
+uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
CHECK(label->IsBound());
uint32_t target = label->Position();
if (label->prev_branch_id_plus_one_) {
@@ -1941,6 +2020,10 @@
return old_position + last_position_adjustment_;
}
+void MipsAssembler::BindPcRelBaseLabel() {
+ Bind(&pc_rel_base_label_);
+}
+
void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
uint32_t length = branches_.back().GetLength();
if (!label->IsBound()) {
@@ -1962,7 +2045,7 @@
void MipsAssembler::Buncond(MipsLabel* label) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(IsR6(), buffer_.Size(), target);
+ branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
FinalizeLabeledBranch(label);
}
@@ -1976,12 +2059,46 @@
FinalizeLabeledBranch(label);
}
-void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) {
+void MipsAssembler::Call(MipsLabel* label) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg);
+ branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
FinalizeLabeledBranch(label);
}
+Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) {
+ DCHECK(size == 4u || size == 8u) << size;
+ literals_.emplace_back(size, data);
+ return &literals_.back();
+}
+
+void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) {
+ // Literal loads are treated as pseudo branches since they require very similar handling.
+ DCHECK_EQ(literal->GetSize(), 4u);
+ MipsLabel* label = literal->GetLabel();
+ DCHECK(!label->IsBound());
+ branches_.emplace_back(IsR6(),
+ buffer_.Size(),
+ dest_reg,
+ base_reg);
+ FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::EmitLiterals() {
+ if (!literals_.empty()) {
+ // We don't support byte and half-word literals.
+ // TODO: proper alignment for 64-bit literals when they're implemented.
+ for (Literal& literal : literals_) {
+ MipsLabel* label = literal.GetLabel();
+ Bind(label);
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u);
+ for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+ buffer_.Emit<uint8_t>(literal.GetData()[i]);
+ }
+ }
+ }
+}
+
void MipsAssembler::PromoteBranches() {
// Promote short branches to long as necessary.
bool changed;
@@ -1989,7 +2106,8 @@
changed = false;
for (auto& branch : branches_) {
CHECK(branch.IsResolved());
- uint32_t delta = branch.PromoteIfNeeded();
+ uint32_t base = GetBranchLocationOrPcRelBase(&branch);
+ uint32_t delta = branch.PromoteIfNeeded(base);
// If this branch has been promoted and needs to expand in size,
// relocate all branches by the expansion size.
if (delta) {
@@ -2027,27 +2145,35 @@
// R2 short branches.
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch
- { 5, 2, 0, MipsAssembler::Branch::kOffset16, 0 }, // kCall
+ { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall
+ // R2 near literal.
+ { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLiteral
// R2 long branches.
{ 9, 3, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongUncondBranch
{ 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCondBranch
{ 6, 1, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCall
+ // R2 far literal.
+ { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral
// R6 short branches.
{ 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch
// Exception: kOffset23 for beqzc/bnezc.
- { 2, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Call
+ { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call
+ // R6 near literal.
+ { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Literal
// R6 long branches.
{ 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongUncondBranch
{ 3, 1, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCondBranch
- { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall
+ { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall
+ // R6 far literal.
+ { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral
};
-// Note: make sure branch_info_[] and mitBranch() are kept synchronized.
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
CHECK_EQ(overwriting_, true);
overwrite_location_ = branch->GetLocation();
- uint32_t offset = branch->GetOffset();
+ uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
BranchCondition condition = branch->GetCondition();
Register lhs = branch->GetLeftRegister();
Register rhs = branch->GetRightRegister();
@@ -2064,12 +2190,15 @@
Nop(); // TODO: improve by filling the delay slot.
break;
case Branch::kCall:
- Nal();
- Nop(); // TODO: is this NOP really needed here?
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
- Addiu(lhs, RA, offset);
- Jalr(lhs);
- Nop();
+ Bal(offset);
+ Nop(); // TODO: improve by filling the delay slot.
+ break;
+
+ // R2 near literal.
+ case Branch::kLiteral:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Lw(lhs, rhs, offset);
break;
// R2 long branches.
@@ -2123,11 +2252,20 @@
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lui(AT, High16Bits(offset));
Ori(AT, AT, Low16Bits(offset));
- Addu(lhs, AT, RA);
- Jalr(lhs);
+ Addu(AT, AT, RA);
+ Jalr(AT);
Nop();
break;
+ // R2 far literal.
+ case Branch::kFarLiteral:
+ offset += (offset & 0x8000) << 1; // Account for sign extension in lw.
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Lui(AT, High16Bits(offset));
+ Addu(AT, AT, rhs);
+ Lw(lhs, AT, Low16Bits(offset));
+ break;
+
// R6 short branches.
case Branch::kR6UncondBranch:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -2140,8 +2278,13 @@
break;
case Branch::kR6Call:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
- Addiupc(lhs, offset);
- Jialc(lhs, 0);
+ Balc(offset);
+ break;
+
+ // R6 near literal.
+ case Branch::kR6Literal:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Lwpc(lhs, offset);
break;
// R6 long branches.
@@ -2159,11 +2302,18 @@
Jic(AT, Low16Bits(offset));
break;
case Branch::kR6LongCall:
- offset += (offset & 0x8000) << 1; // Account for sign extension in addiu.
+ offset += (offset & 0x8000) << 1; // Account for sign extension in jialc.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
- Auipc(lhs, High16Bits(offset));
- Addiu(lhs, lhs, Low16Bits(offset));
- Jialc(lhs, 0);
+ Auipc(AT, High16Bits(offset));
+ Jialc(AT, Low16Bits(offset));
+ break;
+
+ // R6 far literal.
+ case Branch::kR6FarLiteral:
+ offset += (offset & 0x8000) << 1; // Account for sign extension in lw.
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Auipc(AT, High16Bits(offset));
+ Lw(lhs, AT, Low16Bits(offset));
break;
}
CHECK_EQ(overwrite_location_, branch->GetEndLocation());
@@ -2174,8 +2324,8 @@
Buncond(label);
}
-void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) {
- Call(label, indirect_reg);
+void MipsAssembler::Bal(MipsLabel* label) {
+ Call(label);
}
void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 31b3b31..1f7781f 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -17,10 +17,12 @@
#ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
#define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
+#include <deque>
#include <utility>
#include <vector>
#include "arch/mips/instruction_set_features_mips.h"
+#include "base/arena_containers.h"
#include "base/macros.h"
#include "constants_mips.h"
#include "globals.h"
@@ -79,6 +81,49 @@
DISALLOW_COPY_AND_ASSIGN(MipsLabel);
};
+// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
+class Literal {
+ public:
+ static constexpr size_t kMaxSize = 8;
+
+ Literal(uint32_t size, const uint8_t* data)
+ : label_(), size_(size) {
+ DCHECK_LE(size, Literal::kMaxSize);
+ memcpy(data_, data, size);
+ }
+
+ template <typename T>
+ T GetValue() const {
+ DCHECK_EQ(size_, sizeof(T));
+ T value;
+ memcpy(&value, data_, sizeof(T));
+ return value;
+ }
+
+ uint32_t GetSize() const {
+ return size_;
+ }
+
+ const uint8_t* GetData() const {
+ return data_;
+ }
+
+ MipsLabel* GetLabel() {
+ return &label_;
+ }
+
+ const MipsLabel* GetLabel() const {
+ return &label_;
+ }
+
+ private:
+ MipsLabel label_;
+ const uint32_t size_;
+ uint8_t data_[kMaxSize];
+
+ DISALLOW_COPY_AND_ASSIGN(Literal);
+};
+
// Slowpath entered when Thread::Current()->_exception is non-null.
class MipsExceptionSlowPath {
public:
@@ -107,6 +152,7 @@
: Assembler(arena),
overwriting_(false),
overwrite_location_(0),
+ literals_(arena->Adapter(kArenaAllocAssembler)),
last_position_adjustment_(0),
last_old_position_(0),
last_branch_id_(0),
@@ -182,6 +228,7 @@
void Lwr(Register rt, Register rs, uint16_t imm16);
void Lbu(Register rt, Register rs, uint16_t imm16);
void Lhu(Register rt, Register rs, uint16_t imm16);
+ void Lwpc(Register rs, uint32_t imm19); // R6
void Lui(Register rt, uint16_t imm16);
void Aui(Register rt, Register rs, uint16_t imm16); // R6
void Sync(uint32_t stype);
@@ -205,6 +252,7 @@
void Sltiu(Register rt, Register rs, uint16_t imm16);
void B(uint16_t imm16);
+ void Bal(uint16_t imm16);
void Beq(Register rs, Register rt, uint16_t imm16);
void Bne(Register rs, Register rt, uint16_t imm16);
void Beqz(Register rt, uint16_t imm16);
@@ -226,6 +274,7 @@
void Auipc(Register rs, uint16_t imm16); // R6
void Addiupc(Register rs, uint32_t imm19); // R6
void Bc(uint32_t imm26); // R6
+ void Balc(uint32_t imm26); // R6
void Jic(Register rt, uint16_t imm16); // R6
void Jialc(Register rt, uint16_t imm16); // R6
void Bltc(Register rs, Register rt, uint16_t imm16); // R6
@@ -365,7 +414,7 @@
// These will generate R2 branches or R6 branches as appropriate.
void Bind(MipsLabel* label);
void B(MipsLabel* label);
- void Jalr(MipsLabel* label, Register indirect_reg);
+ void Bal(MipsLabel* label);
void Beq(Register rs, Register rt, MipsLabel* label);
void Bne(Register rs, Register rt, MipsLabel* label);
void Beqz(Register rt, MipsLabel* label);
@@ -412,6 +461,21 @@
UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
}
+ // Create a new literal with a given value.
+ // NOTE: Force the template parameter to be explicitly specified.
+ template <typename T>
+ Literal* NewLiteral(typename Identity<T>::type value) {
+ static_assert(std::is_integral<T>::value, "T must be an integral type.");
+ return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
+ }
+
+ // Create a new literal with the given data.
+ Literal* NewLiteral(size_t size, const uint8_t* data);
+
+ // Load literal using the base register (for R2 only) or using PC-relative loads
+ // (for R6 only; base_reg must be ZERO).
+ void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal);
+
//
// Overridden common assembler high-level functionality.
//
@@ -569,12 +633,22 @@
// Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS,
// must be used instead of MipsLabel::GetPosition()).
- uint32_t GetLabelLocation(MipsLabel* label) const;
+ uint32_t GetLabelLocation(const MipsLabel* label) const;
// Get the final position of a label after local fixup based on the old position
// recorded before FinalizeCode().
uint32_t GetAdjustedPosition(uint32_t old_position);
+ // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by
+ // reading the PC value into a general-purpose register with the NAL instruction and then loading
+ // literals through this base register. The code generator calls this method (at most once per
+ // method being compiled) to bind a label to the location for which the PC value is acquired.
+ // The assembler then computes literal offsets relative to this label.
+ void BindPcRelBaseLabel();
+
+ // Note that PC-relative literal loads are handled as pseudo branches because they need very
+ // similar relocation and may similarly expand in size to accomodate for larger offsets relative
+ // to PC.
enum BranchCondition {
kCondLT,
kCondGE,
@@ -604,18 +678,26 @@
kUncondBranch,
kCondBranch,
kCall,
+ // R2 near literal.
+ kLiteral,
// R2 long branches.
kLongUncondBranch,
kLongCondBranch,
kLongCall,
+ // R2 far literal.
+ kFarLiteral,
// R6 short branches.
kR6UncondBranch,
kR6CondBranch,
kR6Call,
+ // R6 near literal.
+ kR6Literal,
// R6 long branches.
kR6LongUncondBranch,
kR6LongCondBranch,
kR6LongCall,
+ // R6 far literal.
+ kR6FarLiteral,
};
// Bit sizes of offsets defined as enums to minimize chance of typos.
enum OffsetBits {
@@ -650,17 +732,17 @@
};
static const BranchInfo branch_info_[/* Type */];
- // Unconditional branch.
- Branch(bool is_r6, uint32_t location, uint32_t target);
+ // Unconditional branch or call.
+ Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call);
// Conditional branch.
Branch(bool is_r6,
uint32_t location,
uint32_t target,
BranchCondition condition,
Register lhs_reg,
- Register rhs_reg = ZERO);
- // Call (branch and link) that stores the target address in a given register (i.e. T9).
- Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg);
+ Register rhs_reg);
+ // Literal.
+ Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg);
// Some conditional branches with lhs = rhs are effectively NOPs, while some
// others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
@@ -736,17 +818,18 @@
// that is allowed for short branches. This is for debugging/testing purposes.
// max_short_distance = 0 forces all short branches to become long.
// Use the implicit default argument when not debugging/testing.
- uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+ uint32_t PromoteIfNeeded(uint32_t location,
+ uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
// Returns the location of the instruction(s) containing the offset.
uint32_t GetOffsetLocation() const;
// Calculates and returns the offset ready for encoding in the branch instruction(s).
- uint32_t GetOffset() const;
+ uint32_t GetOffset(uint32_t location) const;
private:
// Completes branch construction by determining and recording its type.
- void InitializeType(bool is_call, bool is_r6);
+ void InitializeType(bool is_call, bool is_literal, bool is_r6);
// Helper for the above.
void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
@@ -776,12 +859,15 @@
void Buncond(MipsLabel* label);
void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
- void Call(MipsLabel* label, Register indirect_reg);
+ void Call(MipsLabel* label);
void FinalizeLabeledBranch(MipsLabel* label);
Branch* GetBranch(uint32_t branch_id);
const Branch* GetBranch(uint32_t branch_id) const;
+ uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
+ uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
+ void EmitLiterals();
void PromoteBranches();
void EmitBranch(Branch* branch);
void EmitBranches();
@@ -816,6 +902,15 @@
// The current overwrite location.
uint32_t overwrite_location_;
+ // Use std::deque<> for literal labels to allow insertions at the end
+ // without invalidating pointers and references to existing elements.
+ ArenaDeque<Literal> literals_;
+
+ // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC
+ // we get PC using the NAL instruction. This label marks the position within the assembler buffer
+ // that PC (from NAL) points to.
+ MipsLabel pc_rel_base_label_;
+
// Data for AdjustedPosition(), see the description there.
uint32_t last_position_adjustment_;
uint32_t last_old_position_;
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index ce92d60..49ef272 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -48,8 +48,30 @@
return "mips";
}
+ std::string GetAssemblerCmdName() OVERRIDE {
+ // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details.
+ return "gcc";
+ }
+
std::string GetAssemblerParameters() OVERRIDE {
- return " --no-warn -32 -march=mips32r6";
+ // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6
+ // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative
+ // branches in the .text section and so they require a relocation pass (there's a relocation
+ // section, .rela.text, that has the needed info to fix up the branches).
+ // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers.
+ // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the
+ // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily).
+ return " -march=mips32r6 -modd-spreg -Wa,--no-warn"
+ " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib";
+ }
+
+ void Pad(std::vector<uint8_t>& data) OVERRIDE {
+ // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+ // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+ // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+ // NOP is encoded as four zero bytes on MIPS.
+ size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+ data.insert(data.end(), pad_size, 0);
}
std::string GetDisassembleParameters() OVERRIDE {
@@ -272,6 +294,21 @@
DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui");
}
+TEST_F(AssemblerMIPS32r6Test, Auipc) {
+ DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Lwpc) {
+ // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset,
+ // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`.
+ // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right
+ // by 2 positions when encoding, hence `<< 2` to compensate for that shift.
+ // We capture the value of the immediate with `.set imm, {imm}` because the value is needed
+ // twice for the sign extension, but `{imm}` is substituted only once.
+ const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)";
+ DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc");
+}
+
TEST_F(AssemblerMIPS32r6Test, Bitswap) {
DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap");
}
@@ -598,12 +635,45 @@
DriverStr(expected, "StoreDToOffset");
}
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips::V0, mips::ZERO, literal);
+ constexpr size_t kAdduCount = 0x3FFDE;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ "lwpc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips::V0, mips::ZERO, literal);
+ constexpr size_t kAdduCount = 0x3FFDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "lw $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteral");
+}
+
//////////////
// BRANCHES //
//////////////
-// TODO: MipsAssembler::Auipc
-// MipsAssembler::Addiupc
+// TODO: MipsAssembler::Addiupc
// MipsAssembler::Bc
// MipsAssembler::Jic
// MipsAssembler::Jialc
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index a1d6ad6..50a8dc2 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2293,6 +2293,44 @@
DriverStr(expected, "LoadConst32");
}
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ BindPcRelBaseLabel();
+ __ LoadLiteral(mips::V0, mips::V1, literal);
+ constexpr size_t kAddiuCount = 0x1FDE;
+ for (size_t i = 0; i != kAddiuCount; ++i) {
+ __ Addiu(mips::A0, mips::A1, 0);
+ }
+
+ std::string expected =
+ "1:\n"
+ "lw $v0, %lo(2f - 1b)($v1)\n" +
+ RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ BindPcRelBaseLabel();
+ __ LoadLiteral(mips::V0, mips::V1, literal);
+ constexpr size_t kAdduCount = 0x1FDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "lui $at, %hi(2f - 1b)\n"
+ "addu $at, $at, $v1\n"
+ "lw $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteral");
+}
+
#undef __
} // namespace art