AArch64: Add tbz/tbnz and tst.
Since the branch offset supported by tbz/tbnz is quite small(-32k ~ +32k),
it will be replaced by tst and beq/bneq in the fix-up stage if the branch
offset is too large.
Change-Id: I4cace06bec6425e0f2e1f5f7c471eec08d06bca6
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 15c89f2..5115246 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -89,6 +89,7 @@
* M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
* B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
* H -> operand shift
+ * h -> 6-bit shift immediate
* T -> register shift (either ", lsl #0" or ", lsl #12")
* e -> register extend (e.g. uxtb #1)
* o -> register shift (e.g. lsl #1) for Word registers
@@ -614,10 +615,24 @@
kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
"subs", "!0r, !1R, #!2d", kFixupNone),
- ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+ ENCODING_MAP(WIDE(kA64Tst2rl), SF_VARIANTS(0x7200001f),
+ kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+ "tst", "!0r, !1l", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a00001f),
kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
- kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
"tst", "!0r, !1r!2o", kFixupNone),
+ // NOTE: Tbz/Tbnz does not require SETS_CCODES, but it may be replaced by some other LIRs
+ // which require SETS_CCODES in the fix-up stage.
+ ENCODING_MAP(WIDE(kA64Tbnz3rht), CUSTOM_VARIANTS(0x37000000, 0x37000000),
+ kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
+ "tbnz", "!0r, #!1h, !2t", kFixupTBxZ),
+ ENCODING_MAP(WIDE(kA64Tbz3rht), CUSTOM_VARIANTS(0x36000000, 0x36000000),
+ kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
+ "tbz", "!0r, #!1h, !2t", kFixupTBxZ),
ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
@@ -787,6 +802,11 @@
value |= ((operand & 0x1ffffc) >> 2) << 5;
bits |= value;
break;
+ case kFmtImm6Shift:
+ value = (operand & 0x1f) << 19;
+ value |= ((operand & 0x20) >> 5) << 31;
+ bits |= value;
+ break;
default:
LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
<< " (" << kind << ")";
@@ -827,11 +847,6 @@
*/
int generation = 0;
while (true) {
- // TODO(Arm64): check whether passes and offset adjustments are really necessary.
- // Currently they aren't, as - in the fixups below - LIR are never inserted.
- // Things can be different if jump ranges above 1 MB need to be supported.
- // If they are not, then we can get rid of the assembler retry logic.
-
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
generation ^= 1;
@@ -839,13 +854,9 @@
lir = first_fixup_;
prev_lir = NULL;
while (lir != NULL) {
- /*
- * NOTE: the lir being considered here will be encoded following the switch (so long as
- * we're not in a retry situation). However, any new non-pc_rel instructions inserted
- * due to retry must be explicitly encoded at the time of insertion. Note that
- * inserted instructions don't need use/def flags, but do need size and pc-rel status
- * properly updated.
- */
+ // NOTE: Any new non-pc_rel instructions inserted due to retry must be explicitly encoded at
+ // the time of insertion. Note that inserted instructions don't need use/def flags, but do
+ // need size and pc-rel status properly updated.
lir->offset += offset_adjustment;
// During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
lir->flags.generation = generation;
@@ -861,7 +872,8 @@
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ DCHECK_EQ(delta & 0x3, 0);
+ if (!IS_SIGNED_IMM19(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
}
lir->operands[0] = delta >> 2;
@@ -876,12 +888,75 @@
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ DCHECK_EQ(delta & 0x3, 0);
+ if (!IS_SIGNED_IMM19(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupLoad";
}
lir->operands[1] = delta >> 2;
break;
}
+ case kFixupTBxZ: {
+ int16_t opcode = lir->opcode;
+ RegStorage reg(lir->operands[0] | RegStorage::kValid);
+ int32_t imm = lir->operands[1];
+ DCHECK_EQ(IS_WIDE(opcode), reg.Is64Bit());
+ DCHECK_LT(imm, 64);
+ if (imm >= 32) {
+ DCHECK(IS_WIDE(opcode));
+ } else if (kIsDebugBuild && IS_WIDE(opcode)) {
+ // "tbz/tbnz x0, #imm(<32)" is the same with "tbz/tbnz w0, #imm(<32)", but GCC/oatdump
+ // will disassemble it as "tbz/tbnz w0, #imm(<32)". So unwide the LIR to make the
+ // compiler log behave the same with those disassembler in debug build.
+ // This will also affect tst instruction if it need to be replaced, but there is no
+ // performance difference between "tst Xt" and "tst Wt".
+ lir->opcode = UNWIDE(opcode);
+ lir->operands[0] = As32BitReg(reg).GetReg();
+ }
+
+ // Fix-up branch offset.
+ LIR *target_lir = lir->target;
+ DCHECK(target_lir);
+ CodeOffset pc = lir->offset;
+ CodeOffset target = target_lir->offset +
+ ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+ int32_t delta = target - pc;
+ DCHECK_EQ(delta & 0x3, 0);
+ // Check if branch offset can be encoded in tbz/tbnz.
+ if (!IS_SIGNED_IMM14(delta >> 2)) {
+ DexOffset dalvik_offset = lir->dalvik_offset;
+ int16_t opcode = lir->opcode;
+ LIR* target = lir->target;
+ // "tbz/tbnz Rt, #imm, label" -> "tst Rt, #(1<<imm)".
+ offset_adjustment -= lir->flags.size;
+ int32_t imm = EncodeLogicalImmediate(IS_WIDE(opcode), 1 << lir->operands[1]);
+ DCHECK_NE(imm, -1);
+ lir->opcode = IS_WIDE(opcode) ? WIDE(kA64Tst2rl) : kA64Tst2rl;
+ lir->operands[1] = imm;
+ lir->target = nullptr;
+ lir->flags.fixup = EncodingMap[kA64Tst2rl].fixup;
+ lir->flags.size = EncodingMap[kA64Tst2rl].size;
+ offset_adjustment += lir->flags.size;
+ // Insert "beq/bneq label".
+ opcode = UNWIDE(opcode);
+ DCHECK(opcode == kA64Tbz3rht || opcode == kA64Tbnz3rht);
+ LIR* new_lir = RawLIR(dalvik_offset, kA64B2ct,
+ opcode == kA64Tbz3rht ? kArmCondEq : kArmCondNe, 0, 0, 0, 0, target);
+ InsertLIRAfter(lir, new_lir);
+ new_lir->offset = lir->offset + lir->flags.size;
+ new_lir->flags.generation = generation;
+ new_lir->flags.fixup = EncodingMap[kA64B2ct].fixup;
+ new_lir->flags.size = EncodingMap[kA64B2ct].size;
+ offset_adjustment += new_lir->flags.size;
+ // lir no longer pcrel, unlink and link in new_lir.
+ ReplaceFixup(prev_lir, lir, new_lir);
+ prev_lir = new_lir; // Continue with the new instruction.
+ lir = new_lir->u.a.pcrel_next;
+ res = kRetryAll;
+ continue;
+ }
+ lir->operands[2] = delta >> 2;
+ break;
+ }
case kFixupAdr: {
LIR* target_lir = lir->target;
int32_t delta;
@@ -910,6 +985,7 @@
}
if (res == kSuccess) {
+ DCHECK_EQ(offset_adjustment, 0);
break;
} else {
assembler_retries++;