Rewrite use/def masks to support 128 bits.
Reduce LIR memory usage by holding masks by pointers in the
LIR rather than directly and using pre-defined const masks
for the common cases, allocating very few on the arena.
Change-Id: I0f6d27ef6867acd157184c8c74f9612cebfe6c16
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ec165af..d1ba239 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -206,77 +206,70 @@
/*
* Decode the register id.
*/
-uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) {
- uint64_t seed;
- int shift;
- int reg_id;
-
- reg_id = reg.GetRegNum();
- /* Double registers in x86 are just a single FP register */
- seed = 1;
- /* FP register starts at bit position 16 */
- shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0;
- /* Expand the double register id into single offset */
- shift += reg_id;
- return (seed << shift);
+ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
+ /* Double registers in x86 are just a single FP register. This is always just a single bit. */
+ return ResourceMask::Bit(
+ /* FP register starts at bit position 16 */
+ ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum());
}
-uint64_t X86Mir2Lir::GetPCUseDefEncoding() {
+ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const {
/*
* FIXME: might make sense to use a virtual resource encoding bit for pc. Might be
* able to clean up some of the x86/Arm_Mips differences
*/
LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86";
- return 0ULL;
+ return kEncodeNone;
}
-void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
+void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags,
+ ResourceMask* use_mask, ResourceMask* def_mask) {
DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
DCHECK(!lir->flags.use_def_invalid);
// X86-specific resource map setup here.
if (flags & REG_USE_SP) {
- lir->u.m.use_mask |= ENCODE_X86_REG_SP;
+ use_mask->SetBit(kX86RegSP);
}
if (flags & REG_DEF_SP) {
- lir->u.m.def_mask |= ENCODE_X86_REG_SP;
+ def_mask->SetBit(kX86RegSP);
}
if (flags & REG_DEFA) {
- SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg());
+ SetupRegMask(def_mask, rs_rAX.GetReg());
}
if (flags & REG_DEFD) {
- SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg());
+ SetupRegMask(def_mask, rs_rDX.GetReg());
}
if (flags & REG_USEA) {
- SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
+ SetupRegMask(use_mask, rs_rAX.GetReg());
}
if (flags & REG_USEC) {
- SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
+ SetupRegMask(use_mask, rs_rCX.GetReg());
}
if (flags & REG_USED) {
- SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg());
+ SetupRegMask(use_mask, rs_rDX.GetReg());
}
if (flags & REG_USEB) {
- SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg());
+ SetupRegMask(use_mask, rs_rBX.GetReg());
}
// Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
if (lir->opcode == kX86RepneScasw) {
- SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg());
- SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg());
- SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg());
- SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg());
+ SetupRegMask(use_mask, rs_rAX.GetReg());
+ SetupRegMask(use_mask, rs_rCX.GetReg());
+ SetupRegMask(use_mask, rs_rDI.GetReg());
+ SetupRegMask(def_mask, rs_rDI.GetReg());
}
if (flags & USE_FP_STACK) {
- lir->u.m.use_mask |= ENCODE_X86_FP_STACK;
- lir->u.m.def_mask |= ENCODE_X86_FP_STACK;
+ use_mask->SetBit(kX86FPStack);
+ def_mask->SetBit(kX86FPStack);
}
}
@@ -368,40 +361,40 @@
return buf;
}
-void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) {
+void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) {
char buf[256];
buf[0] = 0;
- if (mask == ENCODE_ALL) {
+ if (mask.Equals(kEncodeAll)) {
strcpy(buf, "all");
} else {
char num[8];
int i;
for (i = 0; i < kX86RegEnd; i++) {
- if (mask & (1ULL << i)) {
+ if (mask.HasBit(i)) {
snprintf(num, arraysize(num), "%d ", i);
strcat(buf, num);
}
}
- if (mask & ENCODE_CCODE) {
+ if (mask.HasBit(ResourceMask::kCCode)) {
strcat(buf, "cc ");
}
/* Memory bits */
- if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
+ if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) {
snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s",
DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
(DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
}
- if (mask & ENCODE_LITERAL) {
+ if (mask.HasBit(ResourceMask::kLiteral)) {
strcat(buf, "lit ");
}
- if (mask & ENCODE_HEAP_REF) {
+ if (mask.HasBit(ResourceMask::kHeapRef)) {
strcat(buf, "heap ");
}
- if (mask & ENCODE_MUST_NOT_ALIAS) {
+ if (mask.HasBit(ResourceMask::kMustNotAlias)) {
strcat(buf, "noalias ");
}
}
@@ -551,7 +544,7 @@
} else {
// Mark as a scheduling barrier.
DCHECK(!mem_barrier->flags.use_def_invalid);
- mem_barrier->u.m.def_mask = ENCODE_ALL;
+ mem_barrier->u.m.def_mask = &kEncodeAll;
}
return ret;
#else
@@ -822,6 +815,7 @@
int r_base = TargetReg(kSp).GetReg();
int displacement = SRegOffset(rl_dest.s_reg_low);
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo);
AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
false /* is_load */, true /* is64bit */);
@@ -1109,7 +1103,10 @@
} else {
// Load the start index from stack, remembering that we pushed EDI.
int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
- Load32Disp(rs_rX86_SP, displacement, rs_rBX);
+ {
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+ Load32Disp(rs_rX86_SP, displacement, rs_rBX);
+ }
OpRegReg(kOpXor, rs_rDI, rs_rDI);
OpRegReg(kOpCmp, rs_rBX, rs_rDI);
OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
@@ -1413,10 +1410,10 @@
// We don't know the proper offset for the value, so pick one that will force
// 4 byte offset. We will fix this up in the assembler later to have the right
// value.
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */);
load->flags.fixup = kFixupLoad;
load->target = data_target;
- SetMemRefType(load, true, kLiteral);
}
void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
@@ -1856,6 +1853,7 @@
* end up half-promoted. In those cases, we must flush the promoted
* half to memory as well.
*/
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
for (int i = 0; i < cu_->num_ins; i++) {
PromotionMap* v_map = &promotion_map_[start_vreg + i];
RegStorage reg = RegStorage::InvalidReg();
@@ -1986,12 +1984,14 @@
if (loc.wide) {
loc = UpdateLocWide(loc);
if (loc.location == kLocPhysReg) {
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
}
next_arg += 2;
} else {
loc = UpdateLoc(loc);
if (loc.location == kLocPhysReg) {
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
}
next_arg++;
@@ -2008,6 +2008,8 @@
int current_src_offset = start_offset;
int current_dest_offset = outs_offset;
+ // Only davik regs are accessed in this loop; no next_call_insn() calls.
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
while (regs_left_to_pass_via_stack > 0) {
// This is based on the knowledge that the stack itself is 16-byte aligned.
bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
@@ -2045,6 +2047,7 @@
bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
if (src_is_16b_aligned) {
ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
} else if (src_is_8b_aligned) {
@@ -2074,8 +2077,7 @@
AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
} else {
// Set barrier for 128-bit load.
- SetMemRefType(ld1, true /* is_load */, kDalvikReg);
- ld1->u.m.def_mask = ENCODE_ALL;
+ ld1->u.m.def_mask = &kEncodeAll;
}
}
if (st1 != nullptr) {
@@ -2085,8 +2087,7 @@
AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
} else {
// Set barrier for 128-bit store.
- SetMemRefType(st1, false /* is_load */, kDalvikReg);
- st1->u.m.def_mask = ENCODE_ALL;
+ st1->u.m.def_mask = &kEncodeAll;
}
}
@@ -2123,20 +2124,23 @@
if (!reg.Valid()) {
int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
- if (rl_arg.wide) {
- if (rl_arg.location == kLocPhysReg) {
- StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+ {
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+ if (rl_arg.wide) {
+ if (rl_arg.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+ } else {
+ LoadValueDirectWideFixed(rl_arg, regWide);
+ StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+ }
+ i++;
} else {
- LoadValueDirectWideFixed(rl_arg, regWide);
- StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
- }
- i++;
- } else {
- if (rl_arg.location == kLocPhysReg) {
- StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
- } else {
- LoadValueDirectFixed(rl_arg, regSingle);
- StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+ if (rl_arg.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+ } else {
+ LoadValueDirectFixed(rl_arg, regSingle);
+ StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+ }
}
}
call_state = next_call_insn(cu_, info, call_state, target_method,