blob: 2684f92b3a9318192860d36142cef1e697b68d6f [file] [log] [blame]
Eugene Zelenko5c883452017-09-27 23:26:01 +00001//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// instrinsics
Ayman Musaeadb58f2017-05-15 11:30:54 +00003//
4// The LLVM Compiler Infrastructure
5//
6// This file is distributed under the University of Illinois Open Source
7// License. See LICENSE.TXT for details.
8//
9//===----------------------------------------------------------------------===//
10//
11// This pass replaces masked memory intrinsics - when unsupported by the target
12// - with a chain of basic blocks, that deal with the elements one-by-one if the
13// appropriate mask bit is set.
14//
15//===----------------------------------------------------------------------===//
16
Eugene Zelenko5c883452017-09-27 23:26:01 +000017#include "llvm/ADT/Twine.h"
Ayman Musaeadb58f2017-05-15 11:30:54 +000018#include "llvm/Analysis/TargetTransformInfo.h"
David Blaikiee3a9b4c2017-11-17 01:07:10 +000019#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenko5c883452017-09-27 23:26:01 +000020#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/Constant.h"
22#include "llvm/IR/Constants.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/Function.h"
Ayman Musaeadb58f2017-05-15 11:30:54 +000025#include "llvm/IR/IRBuilder.h"
Eugene Zelenko5c883452017-09-27 23:26:01 +000026#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Instructions.h"
Reid Kleckner66191212017-09-07 23:27:44 +000029#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenko5c883452017-09-27 23:26:01 +000030#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/Value.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/Casting.h"
Eugene Zelenko5c883452017-09-27 23:26:01 +000035#include <algorithm>
36#include <cassert>
Ayman Musaeadb58f2017-05-15 11:30:54 +000037
38using namespace llvm;
39
40#define DEBUG_TYPE "scalarize-masked-mem-intrin"
41
42namespace {
43
44class ScalarizeMaskedMemIntrin : public FunctionPass {
Eugene Zelenko5c883452017-09-27 23:26:01 +000045 const TargetTransformInfo *TTI = nullptr;
Ayman Musaeadb58f2017-05-15 11:30:54 +000046
47public:
48 static char ID; // Pass identification, replacement for typeid
Eugene Zelenko5c883452017-09-27 23:26:01 +000049
50 explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
Ayman Musaeadb58f2017-05-15 11:30:54 +000051 initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
52 }
Eugene Zelenko5c883452017-09-27 23:26:01 +000053
Ayman Musaeadb58f2017-05-15 11:30:54 +000054 bool runOnFunction(Function &F) override;
55
56 StringRef getPassName() const override {
57 return "Scalarize Masked Memory Intrinsics";
58 }
59
60 void getAnalysisUsage(AnalysisUsage &AU) const override {
61 AU.addRequired<TargetTransformInfoWrapperPass>();
62 }
63
64private:
65 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
66 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
67};
Eugene Zelenko5c883452017-09-27 23:26:01 +000068
69} // end anonymous namespace
Ayman Musaeadb58f2017-05-15 11:30:54 +000070
71char ScalarizeMaskedMemIntrin::ID = 0;
Eugene Zelenko5c883452017-09-27 23:26:01 +000072
Matthias Braun94c49042017-05-25 21:26:32 +000073INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
74 "Scalarize unsupported masked memory intrinsics", false, false)
Ayman Musaeadb58f2017-05-15 11:30:54 +000075
76FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
77 return new ScalarizeMaskedMemIntrin();
78}
79
Craig Topperf947b532018-09-27 22:31:42 +000080static bool isConstantIntVector(Value *Mask) {
81 Constant *C = dyn_cast<Constant>(Mask);
82 if (!C)
83 return false;
84
85 unsigned NumElts = Mask->getType()->getVectorNumElements();
86 for (unsigned i = 0; i != NumElts; ++i) {
87 Constant *CElt = C->getAggregateElement(i);
88 if (!CElt || !isa<ConstantInt>(CElt))
89 return false;
90 }
91
92 return true;
93}
94
Ayman Musaeadb58f2017-05-15 11:30:54 +000095// Translate a masked load intrinsic like
96// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
97// <16 x i1> %mask, <16 x i32> %passthru)
98// to a chain of basic blocks, with loading element one-by-one if
99// the appropriate mask bit is set
100//
101// %1 = bitcast i8* %addr to i32*
102// %2 = extractelement <16 x i1> %mask, i32 0
Craig Topper3844ee82018-09-27 21:28:39 +0000103// br i1 %2, label %cond.load, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000104//
105// cond.load: ; preds = %0
Craig Topper3844ee82018-09-27 21:28:39 +0000106// %3 = getelementptr i32* %1, i32 0
107// %4 = load i32* %3
Craig Topper4f954002018-09-27 21:28:52 +0000108// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
Ayman Musaeadb58f2017-05-15 11:30:54 +0000109// br label %else
110//
111// else: ; preds = %0, %cond.load
Craig Topper3844ee82018-09-27 21:28:39 +0000112// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
113// %6 = extractelement <16 x i1> %mask, i32 1
114// br i1 %6, label %cond.load1, label %else2
Ayman Musaeadb58f2017-05-15 11:30:54 +0000115//
116// cond.load1: ; preds = %else
Craig Topper3844ee82018-09-27 21:28:39 +0000117// %7 = getelementptr i32* %1, i32 1
118// %8 = load i32* %7
119// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
Ayman Musaeadb58f2017-05-15 11:30:54 +0000120// br label %else2
121//
122// else2: ; preds = %else, %cond.load1
Craig Topper3844ee82018-09-27 21:28:39 +0000123// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
124// %10 = extractelement <16 x i1> %mask, i32 2
125// br i1 %10, label %cond.load4, label %else5
Ayman Musaeadb58f2017-05-15 11:30:54 +0000126//
127static void scalarizeMaskedLoad(CallInst *CI) {
128 Value *Ptr = CI->getArgOperand(0);
129 Value *Alignment = CI->getArgOperand(1);
130 Value *Mask = CI->getArgOperand(2);
131 Value *Src0 = CI->getArgOperand(3);
132
133 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Craig Topper5c98f232018-09-27 22:31:40 +0000134 VectorType *VecType = cast<VectorType>(CI->getType());
Ayman Musaeadb58f2017-05-15 11:30:54 +0000135
Craig Topper5c98f232018-09-27 22:31:40 +0000136 Type *EltTy = VecType->getElementType();
Ayman Musaeadb58f2017-05-15 11:30:54 +0000137
138 IRBuilder<> Builder(CI->getContext());
139 Instruction *InsertPt = CI;
140 BasicBlock *IfBlock = CI->getParent();
Ayman Musaeadb58f2017-05-15 11:30:54 +0000141
142 Builder.SetInsertPoint(InsertPt);
143 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
144
145 // Short-cut if the mask is all-true.
Craig Topper1a8d3252018-09-27 21:28:41 +0000146 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000147 Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
148 CI->replaceAllUsesWith(NewI);
149 CI->eraseFromParent();
150 return;
151 }
152
153 // Adjust alignment for the scalar instruction.
Craig Topper67529042018-09-28 03:35:37 +0000154 AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000155 // Bitcast %addr fron i8* to EltTy*
156 Type *NewPtrType =
157 EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
158 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
159 unsigned VectorWidth = VecType->getNumElements();
160
Ayman Musaeadb58f2017-05-15 11:30:54 +0000161 // The result vector
Craig Topper4f954002018-09-27 21:28:52 +0000162 Value *VResult = Src0;
Ayman Musaeadb58f2017-05-15 11:30:54 +0000163
Craig Topperf947b532018-09-27 22:31:42 +0000164 if (isConstantIntVector(Mask)) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000165 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topper4b75b6e2018-09-27 21:28:46 +0000166 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musaeadb58f2017-05-15 11:30:54 +0000167 continue;
168 Value *Gep =
169 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
170 LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
171 VResult =
172 Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
173 }
Craig Topper4f954002018-09-27 21:28:52 +0000174 CI->replaceAllUsesWith(VResult);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000175 CI->eraseFromParent();
176 return;
177 }
178
Ayman Musaeadb58f2017-05-15 11:30:54 +0000179 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000180 // Fill the "else" block, created in the previous iteration
181 //
182 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
183 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
Craig Topperb41c4e12018-09-27 18:01:48 +0000184 // br i1 %mask_1, label %cond.load, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000185 //
Ayman Musaeadb58f2017-05-15 11:30:54 +0000186
187 Value *Predicate =
188 Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Ayman Musaeadb58f2017-05-15 11:30:54 +0000189
190 // Create "cond" block
191 //
192 // %EltAddr = getelementptr i32* %1, i32 0
193 // %Elt = load i32* %EltAddr
194 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
195 //
Craig Toppera0a61c42018-10-30 20:33:58 +0000196 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
197 "cond.load");
Ayman Musaeadb58f2017-05-15 11:30:54 +0000198 Builder.SetInsertPoint(InsertPt);
199
200 Value *Gep =
201 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
202 LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
Craig Topper4f954002018-09-27 21:28:52 +0000203 Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
204 Builder.getInt32(Idx));
Ayman Musaeadb58f2017-05-15 11:30:54 +0000205
206 // Create "else" block, fill it in the next iteration
207 BasicBlock *NewIfBlock =
208 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
209 Builder.SetInsertPoint(InsertPt);
210 Instruction *OldBr = IfBlock->getTerminator();
Craig Topperb41c4e12018-09-27 18:01:48 +0000211 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000212 OldBr->eraseFromParent();
Craig Toppera0a61c42018-10-30 20:33:58 +0000213 BasicBlock *PrevIfBlock = IfBlock;
Ayman Musaeadb58f2017-05-15 11:30:54 +0000214 IfBlock = NewIfBlock;
Craig Topper4f954002018-09-27 21:28:52 +0000215
216 // Create the phi to join the new and previous value.
217 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
218 Phi->addIncoming(NewVResult, CondBlock);
219 Phi->addIncoming(VResult, PrevIfBlock);
220 VResult = Phi;
Ayman Musaeadb58f2017-05-15 11:30:54 +0000221 }
222
Craig Topper4f954002018-09-27 21:28:52 +0000223 CI->replaceAllUsesWith(VResult);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000224 CI->eraseFromParent();
225}
226
227// Translate a masked store intrinsic, like
228// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
229// <16 x i1> %mask)
230// to a chain of basic blocks, that stores element one-by-one if
231// the appropriate mask bit is set
232//
233// %1 = bitcast i8* %addr to i32*
234// %2 = extractelement <16 x i1> %mask, i32 0
Craig Topper3844ee82018-09-27 21:28:39 +0000235// br i1 %2, label %cond.store, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000236//
237// cond.store: ; preds = %0
Craig Topper3844ee82018-09-27 21:28:39 +0000238// %3 = extractelement <16 x i32> %val, i32 0
239// %4 = getelementptr i32* %1, i32 0
240// store i32 %3, i32* %4
Ayman Musaeadb58f2017-05-15 11:30:54 +0000241// br label %else
242//
243// else: ; preds = %0, %cond.store
Craig Topper3844ee82018-09-27 21:28:39 +0000244// %5 = extractelement <16 x i1> %mask, i32 1
245// br i1 %5, label %cond.store1, label %else2
Ayman Musaeadb58f2017-05-15 11:30:54 +0000246//
247// cond.store1: ; preds = %else
Craig Topper3844ee82018-09-27 21:28:39 +0000248// %6 = extractelement <16 x i32> %val, i32 1
249// %7 = getelementptr i32* %1, i32 1
250// store i32 %6, i32* %7
Ayman Musaeadb58f2017-05-15 11:30:54 +0000251// br label %else2
252// . . .
253static void scalarizeMaskedStore(CallInst *CI) {
254 Value *Src = CI->getArgOperand(0);
255 Value *Ptr = CI->getArgOperand(1);
256 Value *Alignment = CI->getArgOperand(2);
257 Value *Mask = CI->getArgOperand(3);
258
259 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Craig Topper5c98f232018-09-27 22:31:40 +0000260 VectorType *VecType = cast<VectorType>(Src->getType());
Ayman Musaeadb58f2017-05-15 11:30:54 +0000261
262 Type *EltTy = VecType->getElementType();
263
264 IRBuilder<> Builder(CI->getContext());
265 Instruction *InsertPt = CI;
266 BasicBlock *IfBlock = CI->getParent();
267 Builder.SetInsertPoint(InsertPt);
268 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
269
270 // Short-cut if the mask is all-true.
Craig Topper1a8d3252018-09-27 21:28:41 +0000271 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000272 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
273 CI->eraseFromParent();
274 return;
275 }
276
277 // Adjust alignment for the scalar instruction.
Craig Topper67529042018-09-28 03:35:37 +0000278 AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000279 // Bitcast %addr fron i8* to EltTy*
280 Type *NewPtrType =
281 EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
282 Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
283 unsigned VectorWidth = VecType->getNumElements();
284
Craig Topperf947b532018-09-27 22:31:42 +0000285 if (isConstantIntVector(Mask)) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000286 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topper4b75b6e2018-09-27 21:28:46 +0000287 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musaeadb58f2017-05-15 11:30:54 +0000288 continue;
289 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
290 Value *Gep =
291 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
292 Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
293 }
294 CI->eraseFromParent();
295 return;
296 }
297
298 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000299 // Fill the "else" block, created in the previous iteration
300 //
301 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
Craig Topperb41c4e12018-09-27 18:01:48 +0000302 // br i1 %mask_1, label %cond.store, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000303 //
304 Value *Predicate =
305 Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Ayman Musaeadb58f2017-05-15 11:30:54 +0000306
307 // Create "cond" block
308 //
309 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
310 // %EltAddr = getelementptr i32* %1, i32 0
311 // %store i32 %OneElt, i32* %EltAddr
312 //
313 BasicBlock *CondBlock =
314 IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
315 Builder.SetInsertPoint(InsertPt);
316
317 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
318 Value *Gep =
319 Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
320 Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
321
322 // Create "else" block, fill it in the next iteration
323 BasicBlock *NewIfBlock =
324 CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
325 Builder.SetInsertPoint(InsertPt);
326 Instruction *OldBr = IfBlock->getTerminator();
Craig Topperb41c4e12018-09-27 18:01:48 +0000327 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000328 OldBr->eraseFromParent();
329 IfBlock = NewIfBlock;
330 }
331 CI->eraseFromParent();
332}
333
334// Translate a masked gather intrinsic like
335// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
336// <16 x i1> %Mask, <16 x i32> %Src)
337// to a chain of basic blocks, with loading element one-by-one if
338// the appropriate mask bit is set
339//
Craig Topper3844ee82018-09-27 21:28:39 +0000340// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
341// %Mask0 = extractelement <16 x i1> %Mask, i32 0
342// br i1 %Mask0, label %cond.load, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000343//
344// cond.load:
Craig Topper3844ee82018-09-27 21:28:39 +0000345// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
346// %Load0 = load i32, i32* %Ptr0, align 4
347// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
Ayman Musaeadb58f2017-05-15 11:30:54 +0000348// br label %else
349//
350// else:
Craig Topper3844ee82018-09-27 21:28:39 +0000351// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
352// %Mask1 = extractelement <16 x i1> %Mask, i32 1
353// br i1 %Mask1, label %cond.load1, label %else2
Ayman Musaeadb58f2017-05-15 11:30:54 +0000354//
355// cond.load1:
Craig Topper3844ee82018-09-27 21:28:39 +0000356// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
357// %Load1 = load i32, i32* %Ptr1, align 4
358// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
Ayman Musaeadb58f2017-05-15 11:30:54 +0000359// br label %else2
360// . . .
Craig Topper3844ee82018-09-27 21:28:39 +0000361// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
Ayman Musaeadb58f2017-05-15 11:30:54 +0000362// ret <16 x i32> %Result
363static void scalarizeMaskedGather(CallInst *CI) {
364 Value *Ptrs = CI->getArgOperand(0);
365 Value *Alignment = CI->getArgOperand(1);
366 Value *Mask = CI->getArgOperand(2);
367 Value *Src0 = CI->getArgOperand(3);
368
Craig Topper5c98f232018-09-27 22:31:40 +0000369 VectorType *VecType = cast<VectorType>(CI->getType());
Ayman Musaeadb58f2017-05-15 11:30:54 +0000370
371 IRBuilder<> Builder(CI->getContext());
372 Instruction *InsertPt = CI;
373 BasicBlock *IfBlock = CI->getParent();
Ayman Musaeadb58f2017-05-15 11:30:54 +0000374 Builder.SetInsertPoint(InsertPt);
375 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
376
377 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
378
Ayman Musaeadb58f2017-05-15 11:30:54 +0000379 // The result vector
Craig Topper2122d182018-09-27 21:28:59 +0000380 Value *VResult = Src0;
Ayman Musaeadb58f2017-05-15 11:30:54 +0000381 unsigned VectorWidth = VecType->getNumElements();
382
383 // Shorten the way if the mask is a vector of constants.
Craig Topperf947b532018-09-27 22:31:42 +0000384 if (isConstantIntVector(Mask)) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000385 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topper4b75b6e2018-09-27 21:28:46 +0000386 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musaeadb58f2017-05-15 11:30:54 +0000387 continue;
388 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
389 "Ptr" + Twine(Idx));
390 LoadInst *Load =
391 Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
392 VResult = Builder.CreateInsertElement(
393 VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
394 }
Craig Topper2122d182018-09-27 21:28:59 +0000395 CI->replaceAllUsesWith(VResult);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000396 CI->eraseFromParent();
397 return;
398 }
399
Ayman Musaeadb58f2017-05-15 11:30:54 +0000400 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000401 // Fill the "else" block, created in the previous iteration
402 //
403 // %Mask1 = extractelement <16 x i1> %Mask, i32 1
Craig Topperb41c4e12018-09-27 18:01:48 +0000404 // br i1 %Mask1, label %cond.load, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000405 //
Ayman Musaeadb58f2017-05-15 11:30:54 +0000406
407 Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
408 "Mask" + Twine(Idx));
Ayman Musaeadb58f2017-05-15 11:30:54 +0000409
410 // Create "cond" block
411 //
412 // %EltAddr = getelementptr i32* %1, i32 0
413 // %Elt = load i32* %EltAddr
414 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
415 //
Craig Toppera0a61c42018-10-30 20:33:58 +0000416 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Ayman Musaeadb58f2017-05-15 11:30:54 +0000417 Builder.SetInsertPoint(InsertPt);
418
419 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
420 "Ptr" + Twine(Idx));
421 LoadInst *Load =
422 Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
Craig Topper2122d182018-09-27 21:28:59 +0000423 Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
424 Builder.getInt32(Idx),
425 "Res" + Twine(Idx));
Ayman Musaeadb58f2017-05-15 11:30:54 +0000426
427 // Create "else" block, fill it in the next iteration
428 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
429 Builder.SetInsertPoint(InsertPt);
430 Instruction *OldBr = IfBlock->getTerminator();
Craig Topperb41c4e12018-09-27 18:01:48 +0000431 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000432 OldBr->eraseFromParent();
Craig Toppera0a61c42018-10-30 20:33:58 +0000433 BasicBlock *PrevIfBlock = IfBlock;
Ayman Musaeadb58f2017-05-15 11:30:54 +0000434 IfBlock = NewIfBlock;
Craig Topper2122d182018-09-27 21:28:59 +0000435
436 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
437 Phi->addIncoming(NewVResult, CondBlock);
438 Phi->addIncoming(VResult, PrevIfBlock);
439 VResult = Phi;
Ayman Musaeadb58f2017-05-15 11:30:54 +0000440 }
441
Craig Topper2122d182018-09-27 21:28:59 +0000442 CI->replaceAllUsesWith(VResult);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000443 CI->eraseFromParent();
444}
445
446// Translate a masked scatter intrinsic, like
447// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
448// <16 x i1> %Mask)
449// to a chain of basic blocks, that stores element one-by-one if
450// the appropriate mask bit is set.
451//
Craig Topper3844ee82018-09-27 21:28:39 +0000452// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
453// %Mask0 = extractelement <16 x i1> %Mask, i32 0
454// br i1 %Mask0, label %cond.store, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000455//
456// cond.store:
Craig Topper3844ee82018-09-27 21:28:39 +0000457// %Elt0 = extractelement <16 x i32> %Src, i32 0
458// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
459// store i32 %Elt0, i32* %Ptr0, align 4
Ayman Musaeadb58f2017-05-15 11:30:54 +0000460// br label %else
461//
462// else:
Craig Topper3844ee82018-09-27 21:28:39 +0000463// %Mask1 = extractelement <16 x i1> %Mask, i32 1
464// br i1 %Mask1, label %cond.store1, label %else2
Ayman Musaeadb58f2017-05-15 11:30:54 +0000465//
466// cond.store1:
Craig Topper3844ee82018-09-27 21:28:39 +0000467// %Elt1 = extractelement <16 x i32> %Src, i32 1
468// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
469// store i32 %Elt1, i32* %Ptr1, align 4
Ayman Musaeadb58f2017-05-15 11:30:54 +0000470// br label %else2
471// . . .
472static void scalarizeMaskedScatter(CallInst *CI) {
473 Value *Src = CI->getArgOperand(0);
474 Value *Ptrs = CI->getArgOperand(1);
475 Value *Alignment = CI->getArgOperand(2);
476 Value *Mask = CI->getArgOperand(3);
477
478 assert(isa<VectorType>(Src->getType()) &&
479 "Unexpected data type in masked scatter intrinsic");
480 assert(isa<VectorType>(Ptrs->getType()) &&
481 isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
482 "Vector of pointers is expected in masked scatter intrinsic");
483
484 IRBuilder<> Builder(CI->getContext());
485 Instruction *InsertPt = CI;
486 BasicBlock *IfBlock = CI->getParent();
487 Builder.SetInsertPoint(InsertPt);
488 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
489
490 unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
491 unsigned VectorWidth = Src->getType()->getVectorNumElements();
492
493 // Shorten the way if the mask is a vector of constants.
Craig Topperf947b532018-09-27 22:31:42 +0000494 if (isConstantIntVector(Mask)) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000495 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Craig Topper4b75b6e2018-09-27 21:28:46 +0000496 if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
Ayman Musaeadb58f2017-05-15 11:30:54 +0000497 continue;
498 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
499 "Elt" + Twine(Idx));
500 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
501 "Ptr" + Twine(Idx));
502 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
503 }
504 CI->eraseFromParent();
505 return;
506 }
Craig Topper1a8d3252018-09-27 21:28:41 +0000507
Ayman Musaeadb58f2017-05-15 11:30:54 +0000508 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
509 // Fill the "else" block, created in the previous iteration
510 //
Craig Topperb41c4e12018-09-27 18:01:48 +0000511 // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
512 // br i1 %Mask1, label %cond.store, label %else
Ayman Musaeadb58f2017-05-15 11:30:54 +0000513 //
514 Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
515 "Mask" + Twine(Idx));
Ayman Musaeadb58f2017-05-15 11:30:54 +0000516
517 // Create "cond" block
518 //
Craig Topper3844ee82018-09-27 21:28:39 +0000519 // %Elt1 = extractelement <16 x i32> %Src, i32 1
520 // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
521 // %store i32 %Elt1, i32* %Ptr1
Ayman Musaeadb58f2017-05-15 11:30:54 +0000522 //
523 BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
524 Builder.SetInsertPoint(InsertPt);
525
526 Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
527 "Elt" + Twine(Idx));
528 Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
529 "Ptr" + Twine(Idx));
530 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
531
532 // Create "else" block, fill it in the next iteration
533 BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
534 Builder.SetInsertPoint(InsertPt);
535 Instruction *OldBr = IfBlock->getTerminator();
Craig Topperb41c4e12018-09-27 18:01:48 +0000536 BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
Ayman Musaeadb58f2017-05-15 11:30:54 +0000537 OldBr->eraseFromParent();
538 IfBlock = NewIfBlock;
539 }
540 CI->eraseFromParent();
541}
542
543bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000544 bool EverMadeChange = false;
545
546 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
547
548 bool MadeChange = true;
549 while (MadeChange) {
550 MadeChange = false;
551 for (Function::iterator I = F.begin(); I != F.end();) {
552 BasicBlock *BB = &*I++;
553 bool ModifiedDTOnIteration = false;
554 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
555
556 // Restart BB iteration if the dominator tree of the Function was changed
557 if (ModifiedDTOnIteration)
558 break;
559 }
560
561 EverMadeChange |= MadeChange;
562 }
563
564 return EverMadeChange;
565}
566
567bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
568 bool MadeChange = false;
569
570 BasicBlock::iterator CurInstIterator = BB.begin();
571 while (CurInstIterator != BB.end()) {
572 if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
573 MadeChange |= optimizeCallInst(CI, ModifiedDT);
574 if (ModifiedDT)
575 return true;
576 }
577
578 return MadeChange;
579}
580
581bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
582 bool &ModifiedDT) {
Ayman Musaeadb58f2017-05-15 11:30:54 +0000583 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
584 if (II) {
585 switch (II->getIntrinsicID()) {
586 default:
587 break;
Eugene Zelenko5c883452017-09-27 23:26:01 +0000588 case Intrinsic::masked_load:
Ayman Musaeadb58f2017-05-15 11:30:54 +0000589 // Scalarize unsupported vector masked load
590 if (!TTI->isLegalMaskedLoad(CI->getType())) {
591 scalarizeMaskedLoad(CI);
592 ModifiedDT = true;
593 return true;
594 }
595 return false;
Eugene Zelenko5c883452017-09-27 23:26:01 +0000596 case Intrinsic::masked_store:
Ayman Musaeadb58f2017-05-15 11:30:54 +0000597 if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
598 scalarizeMaskedStore(CI);
599 ModifiedDT = true;
600 return true;
601 }
602 return false;
Eugene Zelenko5c883452017-09-27 23:26:01 +0000603 case Intrinsic::masked_gather:
Ayman Musaeadb58f2017-05-15 11:30:54 +0000604 if (!TTI->isLegalMaskedGather(CI->getType())) {
605 scalarizeMaskedGather(CI);
606 ModifiedDT = true;
607 return true;
608 }
609 return false;
Eugene Zelenko5c883452017-09-27 23:26:01 +0000610 case Intrinsic::masked_scatter:
Ayman Musaeadb58f2017-05-15 11:30:54 +0000611 if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
612 scalarizeMaskedScatter(CI);
613 ModifiedDT = true;
614 return true;
615 }
616 return false;
617 }
Ayman Musaeadb58f2017-05-15 11:30:54 +0000618 }
619
620 return false;
621}