Rework type & size inference, literal usage
Fixes a bug in the old type inference mechanism (wasn't properly
propogating type info across Phi & move nodes). Combined type and
size inferences passes.
Fixed long-standing bug in the code to load a special double-precision
immediate (would have been extremely difficult to hit this in the field).
Improved loading floating point immediates.
Change-Id: I1ec72edc3b25525f14d965089f8952d4f0294942
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index 62706a7..d22c267 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -932,7 +932,7 @@
kFmtUnused, -1, -1,
IS_UNARY_OP | REG_USE0 | IS_BRANCH,
"add", "rPC, !0C", 1),
- ENCODING_MAP(kThumb2AdrST, 0xf20f0000,
+ ENCODING_MAP(kThumb2Adr, 0xf20f0000,
kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
IS_TERTIARY_OP | REG_DEF0,/* Note: doesn't affect flags */
@@ -1085,6 +1085,7 @@
if (lir->opcode == kThumbLdrPcRel ||
lir->opcode == kThumb2LdrPcRel12 ||
lir->opcode == kThumbAddPcRel ||
+ ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
/*
* PC-relative loads are mostly used to load immediates
@@ -1106,57 +1107,34 @@
LOG(FATAL) << "Unexpected pc-rel offset " << delta;
}
// Now, check for the two difficult cases
- if (1 || ((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
- ((lir->opcode == kThumb2Vldrs) && (delta > 1020))) {
- /*
- * OK - the load doesn't work. We'll just materialize
- * the immediate directly using mov16l and mov16h.
- * It's a little ugly for float immediates as we don't have
- * float ops like the core mov imm16H/L. In this case
- * we'll materialize in a core register (rLR) and then copy.
- * NOTE/WARNING: This is a *very* fragile workaround that will
- * be addressed in a later release when we have a late spill
- * capability. We can get away with it for now because rLR
- * is currently only used during call setups, and our convention
- * requires all arguments to be passed in core register & the
- * frame (and thus, we won't see any vlrds in the sequence).
- * The normal resource mask mechanism will prevent any damaging
- * code motion.
- */
- int tgtReg = (lir->opcode == kThumb2Vldrs) ? rLR :
- lir->operands[0];
- int immVal = lirTarget->operands[0];
- // The standard utilities won't work here - build manually
- ArmLIR *newMov16L =
+ if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+ ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
+ ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+ int baseReg = (lir->opcode == kThumb2LdrPcRel12) ?
+ lir->operands[0] : rLR;
+
+ // Add new Adr to generate the address
+ ArmLIR *newAdr =
(ArmLIR *)oatNew(sizeof(ArmLIR), true);
- newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
- newMov16L->opcode = kThumb2MovImm16;
- newMov16L->operands[0] = tgtReg;
- newMov16L->operands[1] = immVal & 0xffff;
- oatSetupResourceMasks(newMov16L);
- oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
- ArmLIR *newMov16H =
- (ArmLIR *)oatNew(sizeof(ArmLIR), true);
- newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
- newMov16H->opcode = kThumb2MovImm16H;
- newMov16H->operands[0] = tgtReg;
- newMov16H->operands[1] = (immVal >> 16) & 0xffff;
- oatSetupResourceMasks(newMov16H);
- oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
- if (lir->opcode == kThumb2Vldrs) {
- // Convert the vldrs to a kThumb2Fmsr
- lir->opcode = kThumb2Fmsr;
- lir->operands[1] = rLR;
- lir->generic.target = NULL;
- lir->operands[2] = 0;
- oatSetupResourceMasks(lir);
- } else {
- // Nullify the original load
- lir->flags.isNop = true;
+ newAdr->generic.dalvikOffset = lir->generic.dalvikOffset;
+ newAdr->generic.target = lir->generic.target;
+ newAdr->opcode = kThumb2Adr;
+ newAdr->operands[0] = baseReg;
+ oatSetupResourceMasks(newAdr);
+ oatInsertLIRBefore((LIR*)lir, (LIR*)newAdr);
+
+ // Convert to normal load
+ if (lir->opcode == kThumb2LdrPcRel12) {
+ lir->opcode = kThumb2LdrRRI12;
}
+ // Change the load to be relative to the new Adr base
+ lir->operands[1] = baseReg;
+ lir->operands[2] = 0;
+ oatSetupResourceMasks(lir);
res = kRetryAll;
} else {
- if (lir->opcode == kThumb2Vldrs) {
+ if ((lir->opcode == kThumb2Vldrs) ||
+ (lir->opcode == kThumb2Vldrd)) {
lir->operands[2] = delta >> 2;
} else {
lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
@@ -1259,16 +1237,19 @@
lir->operands[0] = (delta >> 12) & 0x7ff;
NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
- } else if (lir->opcode == kThumb2AdrST) {
+ } else if (lir->opcode == kThumb2Adr) {
SwitchTable *tabRec = (SwitchTable*)lir->operands[2];
- int disp = tabRec->offset - ((lir->generic.offset + 4) & ~3);
+ ArmLIR* target = (ArmLIR*)lir->generic.target;
+ int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
+ int disp = targetDisp - ((lir->generic.offset + 4) & ~3);
if (disp < 4096) {
lir->operands[1] = disp;
} else {
- // convert to ldimm16l, ldimm16h, add tgt, pc, r12
+ // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
ArmLIR *newMov16L =
(ArmLIR *)oatNew(sizeof(ArmLIR), true);
newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
+ newMov16L->generic.target = lir->generic.target;
newMov16L->opcode = kThumb2MovImm16LST;
newMov16L->operands[0] = lir->operands[0];
newMov16L->operands[2] = (intptr_t)lir;
@@ -1278,6 +1259,7 @@
ArmLIR *newMov16H =
(ArmLIR *)oatNew(sizeof(ArmLIR), true);
newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
+ newMov16H->generic.target = lir->generic.target;
newMov16H->opcode = kThumb2MovImm16HST;
newMov16H->operands[0] = lir->operands[0];
newMov16H->operands[2] = (intptr_t)lir;
@@ -1294,13 +1276,19 @@
// operands[1] should hold disp, [2] has add, [3] has tabRec
ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
- lir->operands[1] = (tabRec->offset -
+ // If tabRec is null, this is a literal load - use generic.target
+ ArmLIR* target = (ArmLIR*)lir->generic.target;
+ int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
+ lir->operands[1] = (targetDisp -
(addPCInst->generic.offset + 4)) & 0xffff;
} else if (lir->opcode == kThumb2MovImm16HST) {
// operands[1] should hold disp, [2] has add, [3] has tabRec
ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
- lir->operands[1] = ((tabRec->offset -
+ // If tabRec is null, this is a literal load - use generic.target
+ ArmLIR* target = (ArmLIR*)lir->generic.target;
+ int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
+ lir->operands[1] = ((targetDisp -
(addPCInst->generic.offset + 4)) >> 16) & 0xffff;
}
ArmEncodingMap *encoder = &EncodingMap[lir->opcode];