Implement all vector instructions for X86
Add X86 code generation for the vector operations. Added support for
X86 disassembler for the new instructions.
Change-Id: I72b48f5efa3a516a16bb1dd4bdb5c9270a8db53a
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e7a629a..889ea8b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -81,6 +81,16 @@
#endif
};
+static const RegStorage xp_temps_arr_32[] = {
+ rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+};
+static const RegStorage xp_temps_arr_64[] = {
+ rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+#ifdef TARGET_REX_SUPPORT
+ rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+#endif
+};
+
static const std::vector<RegStorage> empty_pool;
static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
@@ -111,6 +121,11 @@
static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
+static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32,
+ xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0]));
+static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64,
+ xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0]));
+
RegStorage rs_rX86_SP;
X86NativeRegisterPool rX86_ARG0;
@@ -209,7 +224,7 @@
/* Double registers in x86 are just a single FP register */
seed = 1;
/* FP register starts at bit position 16 */
- shift = reg.IsFloat() ? kX86FPReg0 : 0;
+ shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0;
/* Expand the double register id into single offset */
shift += reg_id;
return (seed << shift);
@@ -542,17 +557,31 @@
// Target-specific adjustments.
+ // Add in XMM registers.
+ const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
+ for (RegStorage reg : *xp_temps) {
+ RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
+ reginfo_map_.Put(reg.GetReg(), info);
+ info->SetIsTemp(true);
+ }
+
// Alias single precision xmm to double xmms.
// TODO: as needed, add larger vector sizes - alias all to the largest.
GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_);
for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
int sp_reg_num = info->GetReg().GetRegNum();
+ RegStorage xp_reg = RegStorage::Solo128(sp_reg_num);
+ RegisterInfo* xp_reg_info = GetRegInfo(xp_reg);
+ // 128-bit xmm vector register's master storage should refer to itself.
+ DCHECK_EQ(xp_reg_info, xp_reg_info->Master());
+
+ // Redirect 32-bit vector's master storage to 128-bit vector.
+ info->SetMaster(xp_reg_info);
+
RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
- // 64-bit xmm vector register's master storage should refer to itself.
- DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
- // Redirect 32-bit vector's master storage to 64-bit vector.
- info->SetMaster(dp_reg_info);
+ // Redirect 64-bit vector's master storage to 128-bit vector.
+ dp_reg_info->SetMaster(xp_reg_info);
}
// Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
@@ -1240,6 +1269,45 @@
case kMirOpConstVector:
GenConst128(bb, mir);
break;
+ case kMirOpMoveVector:
+ GenMoveVector(bb, mir);
+ break;
+ case kMirOpPackedMultiply:
+ GenMultiplyVector(bb, mir);
+ break;
+ case kMirOpPackedAddition:
+ GenAddVector(bb, mir);
+ break;
+ case kMirOpPackedSubtract:
+ GenSubtractVector(bb, mir);
+ break;
+ case kMirOpPackedShiftLeft:
+ GenShiftLeftVector(bb, mir);
+ break;
+ case kMirOpPackedSignedShiftRight:
+ GenSignedShiftRightVector(bb, mir);
+ break;
+ case kMirOpPackedUnsignedShiftRight:
+ GenUnsignedShiftRightVector(bb, mir);
+ break;
+ case kMirOpPackedAnd:
+ GenAndVector(bb, mir);
+ break;
+ case kMirOpPackedOr:
+ GenOrVector(bb, mir);
+ break;
+ case kMirOpPackedXor:
+ GenXorVector(bb, mir);
+ break;
+ case kMirOpPackedAddReduce:
+ GenAddReduceVector(bb, mir);
+ break;
+ case kMirOpPackedReduce:
+ GenReduceVector(bb, mir);
+ break;
+ case kMirOpPackedSet:
+ GenSetVector(bb, mir);
+ break;
default:
break;
}
@@ -1249,9 +1317,9 @@
int type_size = mir->dalvikInsn.vA;
// We support 128 bit vectors.
DCHECK_EQ(type_size & 0xFFFF, 128);
- int reg = mir->dalvikInsn.vB;
- DCHECK_LT(reg, 8);
+ RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
uint32_t *args = mir->dalvikInsn.arg;
+ int reg = rs_dest.GetReg();
// Check for all 0 case.
if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
NewLIR2(kX86XorpsRR, reg, reg);
@@ -1277,6 +1345,287 @@
SetMemRefType(load, true, kLiteral);
}
+void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
+ // We only support 128 bit registers.
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC);
+ NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg());
+}
+
+void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PmulldRR;
+ break;
+ case kSignedHalf:
+ opcode = kX86PmullwRR;
+ break;
+ case kSingle:
+ opcode = kX86MulpsRR;
+ break;
+ case kDouble:
+ opcode = kX86MulpdRR;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector multiply " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PadddRR;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PaddwRR;
+ break;
+ case kUnsignedByte:
+ case kSignedByte:
+ opcode = kX86PaddbRR;
+ break;
+ case kSingle:
+ opcode = kX86AddpsRR;
+ break;
+ case kDouble:
+ opcode = kX86AddpdRR;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector addition " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PsubdRR;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PsubwRR;
+ break;
+ case kUnsignedByte:
+ case kSignedByte:
+ opcode = kX86PsubbRR;
+ break;
+ case kSingle:
+ opcode = kX86SubpsRR;
+ break;
+ case kDouble:
+ opcode = kX86SubpdRR;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector subtraction " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int imm = mir->dalvikInsn.vC;
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PslldRI;
+ break;
+ case k64:
+ opcode = kX86PsllqRI;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PsllwRI;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector shift left " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int imm = mir->dalvikInsn.vC;
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PsradRI;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PsrawRI;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector signed shift right " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int imm = mir->dalvikInsn.vC;
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PsrldRI;
+ break;
+ case k64:
+ opcode = kX86PsrlqRI;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PsrlwRI;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) {
+ // We only support 128 bit registers.
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) {
+ // We only support 128 bit registers.
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) {
+ // We only support 128 bit registers.
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
+void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int imm = mir->dalvikInsn.vC;
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PhadddRR;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PhaddwRR;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector add reduce " << opsize;
+ break;
+ }
+ NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+}
+
+void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int index = mir->dalvikInsn.arg[0];
+ int opcode = 0;
+ switch (opsize) {
+ case k32:
+ opcode = kX86PextrdRRI;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ opcode = kX86PextrwRRI;
+ break;
+ case kUnsignedByte:
+ case kSignedByte:
+ opcode = kX86PextrbRRI;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector reduce " << opsize;
+ break;
+ }
+ // We need to extract to a GPR.
+ RegStorage temp = AllocTemp();
+ NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index);
+
+ // Assume that the destination VR is in the def for the mir.
+ RegLocation rl_dest = mir_graph_->GetDest(mir);
+ RegLocation rl_temp =
+ {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG};
+ StoreValue(rl_dest, rl_temp);
+}
+
+void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
+ DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
+ RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int op_low = 0, op_high = 0;
+ switch (opsize) {
+ case k32:
+ op_low = kX86PshufdRRI;
+ break;
+ case kSignedHalf:
+ case kUnsignedHalf:
+ // Handles low quadword.
+ op_low = kX86PshuflwRRI;
+ // Handles upper quadword.
+ op_high = kX86PshufdRRI;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported vector set " << opsize;
+ break;
+ }
+
+ // Load the value from the VR into a GPR.
+ RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+ rl_src = LoadValue(rl_src, kCoreReg);
+
+ // Load the value into the XMM register.
+ NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg());
+
+ // Now shuffle the value across the destination.
+ NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0);
+
+ // And then repeat as needed.
+ if (op_high != 0) {
+ NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0);
+ }
+}
+
+
LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
for (LIR *p = const_vectors_; p != nullptr; p = p->next) {