Blame - lib/Analysis/DivergenceAnalysis.cpp - platform_external_llvm80

blob: 7ba23854a3cc53efb3fdd68d7ca4ea78bf87a0fc [file] [log] [blame]

Nicolai Haehnle	981ceb8	2018-10-18 09:38:44 +0000	[diff] [blame]	1	//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements a general divergence analysis for loop vectorization
				11	// and GPU programs. It determines which branches and values in a loop or GPU
				12	// program are divergent. It can help branch optimizations such as jump
				13	// threading and loop unswitching to make better decisions.
				14	//
				15	// GPU programs typically use the SIMD execution model, where multiple threads
				16	// in the same execution group have to execute in lock-step. Therefore, if the
				17	// code contains divergent branches (i.e., threads in a group do not agree on
				18	// which path of the branch to take), the group of threads has to execute all
				19	// the paths from that branch with different subsets of threads enabled until
				20	// they re-converge.
				21	//
				22	// Due to this execution model, some optimizations such as jump
				23	// threading and loop unswitching can interfere with thread re-convergence.
				24	// Therefore, an analysis that computes which branches in a GPU program are
				25	// divergent can help the compiler to selectively run these optimizations.
				26	//
				27	// This implementation is derived from the Vectorization Analysis of the
				28	// Region Vectorizer (RV). That implementation in turn is based on the approach
				29	// described in
				30	//
				31	// Improving Performance of OpenCL on CPUs
				32	// Ralf Karrenberg and Sebastian Hack
				33	// CC '12
				34	//
				35	// This DivergenceAnalysis implementation is generic in the sense that it does
				36	// not itself identify original sources of divergence.
				37	// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
				38	// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
				39	// (e.g., special variables that hold the thread ID or the iteration variable).
				40	//
				41	// The generic implementation propagates divergence to variables that are data
				42	// or sync dependent on a source of divergence.
				43	//
				44	// While data dependency is a well-known concept, the notion of sync dependency
				45	// is worth more explanation. Sync dependence characterizes the control flow
				46	// aspect of the propagation of branch divergence. For example,
				47	//
				48	// %cond = icmp slt i32 %tid, 10
				49	// br i1 %cond, label %then, label %else
				50	// then:
				51	// br label %merge
				52	// else:
				53	// br label %merge
				54	// merge:
				55	// %a = phi i32 [ 0, %then ], [ 1, %else ]
				56	//
				57	// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
				58	// because %tid is not on its use-def chains, %a is sync dependent on %tid
				59	// because the branch "br i1 %cond" depends on %tid and affects which value %a
				60	// is assigned to.
				61	//
				62	// The sync dependence detection (which branch induces divergence in which join
				63	// points) is implemented in the SyncDependenceAnalysis.
				64	//
				65	// The current DivergenceAnalysis implementation has the following limitations:
				66	// 1. intra-procedural. It conservatively considers the arguments of a
				67	// non-kernel-entry function and the return value of a function call as
				68	// divergent.
				69	// 2. memory as black box. It conservatively considers values loaded from
				70	// generic or local address as divergent. This can be improved by leveraging
				71	// pointer analysis and/or by modelling non-escaping memory objects in SSA
				72	// as done in RV.
				73	//
				74	//===----------------------------------------------------------------------===//
				75
				76	#include "llvm/Analysis/DivergenceAnalysis.h"
				77	#include "llvm/Analysis/LoopInfo.h"
				78	#include "llvm/Analysis/Passes.h"
				79	#include "llvm/Analysis/PostDominators.h"
				80	#include "llvm/Analysis/TargetTransformInfo.h"
				81	#include "llvm/IR/Dominators.h"
				82	#include "llvm/IR/InstIterator.h"
				83	#include "llvm/IR/Instructions.h"
				84	#include "llvm/IR/IntrinsicInst.h"
				85	#include "llvm/IR/Value.h"
				86	#include "llvm/Support/Debug.h"
				87	#include "llvm/Support/raw_ostream.h"
				88	#include <vector>
				89
				90	using namespace llvm;
				91
				92	#define DEBUG_TYPE "divergence-analysis"
				93
				94	// class DivergenceAnalysis
				95	DivergenceAnalysis::DivergenceAnalysis(
				96	const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
				97	const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
				98	: F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
				99	IsLCSSAForm(IsLCSSAForm) {}
				100
				101	void DivergenceAnalysis::markDivergent(const Value &DivVal) {
				102	assert(isa<Instruction>(DivVal) \|\| isa<Argument>(DivVal));
				103	assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
				104	DivergentValues.insert(&DivVal);
				105	}
				106
				107	void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
				108	UniformOverrides.insert(&UniVal);
				109	}
				110
Chandler Carruth	39edf63	2018-10-19 00:22:10 +0000	[diff] [blame]	111	bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
Nicolai Haehnle	981ceb8	2018-10-18 09:38:44 +0000	[diff] [blame]	112	if (Term.getNumSuccessors() <= 1)
				113	return false;
				114	if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
				115	assert(BranchTerm->isConditional());
				116	return isDivergent(*BranchTerm->getCondition());
				117	}
				118	if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
				119	return isDivergent(*SwitchTerm->getCondition());
				120	}
				121	if (isa<InvokeInst>(Term)) {
				122	return false; // ignore abnormal executions through landingpad
				123	}
				124
				125	llvm_unreachable("unexpected terminator");
				126	}
				127
				128	bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
				129	// TODO function calls with side effects, etc
				130	for (const auto &Op : I.operands()) {
				131	if (isDivergent(*Op))
				132	return true;
				133	}
				134	return false;
				135	}
				136
				137	bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
				138	const Value &Val) const {
				139	const auto *Inst = dyn_cast<const Instruction>(&Val);
				140	if (!Inst)
				141	return false;
				142	// check whether any divergent loop carrying Val terminates before control
				143	// proceeds to ObservingBlock
				144	for (const auto *Loop = LI.getLoopFor(Inst->getParent());
				145	Loop != RegionLoop && !Loop->contains(&ObservingBlock);
				146	Loop = Loop->getParentLoop()) {
				147	if (DivergentLoops.find(Loop) != DivergentLoops.end())
				148	return true;
				149	}
				150
				151	return false;
				152	}
				153
				154	bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
				155	// joining divergent disjoint path in Phi parent block
				156	if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
				157	return true;
				158	}
				159
				160	// An incoming value could be divergent by itself.
				161	// Otherwise, an incoming value could be uniform within the loop
				162	// that carries its definition but it may appear divergent
				163	// from outside the loop. This happens when divergent loop exits
				164	// drop definitions of that uniform value in different iterations.
				165	//
				166	// for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
				167	// if (i % thread_id == 0) break; // divergent loop exit
				168	// }
				169	// int divI = i; // divI is divergent
				170	for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
				171	const auto *InVal = Phi.getIncomingValue(i);
				172	if (isDivergent(*Phi.getIncomingValue(i)) \|\|
				173	isTemporalDivergent(Phi.getParent(), InVal)) {
				174	return true;
				175	}
				176	}
				177	return false;
				178	}
				179
				180	bool DivergenceAnalysis::inRegion(const Instruction &I) const {
				181	return I.getParent() && inRegion(*I.getParent());
				182	}
				183
				184	bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
				185	return (!RegionLoop && BB.getParent() == &F) \|\| RegionLoop->contains(&BB);
				186	}
				187
				188	// marks all users of loop-carried values of the loop headed by LoopHeader as
				189	// divergent
				190	void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
				191	auto *DivLoop = LI.getLoopFor(&LoopHeader);
				192	assert(DivLoop && "loopHeader is not actually part of a loop");
				193
				194	SmallVector<BasicBlock *, 8> TaintStack;
				195	DivLoop->getExitBlocks(TaintStack);
				196
				197	// Otherwise potential users of loop-carried values could be anywhere in the
				198	// dominance region of DivLoop (including its fringes for phi nodes)
				199	DenseSet<const BasicBlock *> Visited;
				200	for (auto *Block : TaintStack) {
				201	Visited.insert(Block);
				202	}
				203	Visited.insert(&LoopHeader);
				204
				205	while (!TaintStack.empty()) {
				206	auto *UserBlock = TaintStack.back();
				207	TaintStack.pop_back();
				208
				209	// don't spread divergence beyond the region
				210	if (!inRegion(*UserBlock))
				211	continue;
				212
				213	assert(!DivLoop->contains(UserBlock) &&
				214	"irreducible control flow detected");
				215
				216	// phi nodes at the fringes of the dominance region
				217	if (!DT.dominates(&LoopHeader, UserBlock)) {
				218	// all PHI nodes of UserBlock become divergent
				219	for (auto &Phi : UserBlock->phis()) {
				220	Worklist.push_back(&Phi);
				221	}
				222	continue;
				223	}
				224
				225	// taint outside users of values carried by DivLoop
				226	for (auto &I : *UserBlock) {
				227	if (isAlwaysUniform(I))
				228	continue;
				229	if (isDivergent(I))
				230	continue;
				231
				232	for (auto &Op : I.operands()) {
				233	auto *OpInst = dyn_cast<Instruction>(&Op);
				234	if (!OpInst)
				235	continue;
				236	if (DivLoop->contains(OpInst->getParent())) {
				237	markDivergent(I);
				238	pushUsers(I);
				239	break;
				240	}
				241	}
				242	}
				243
				244	// visit all blocks in the dominance region
				245	for (auto *SuccBlock : successors(UserBlock)) {
				246	if (!Visited.insert(SuccBlock).second) {
				247	continue;
				248	}
				249	TaintStack.push_back(SuccBlock);
				250	}
				251	}
				252	}
				253
				254	void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
				255	for (const auto &Phi : Block.phis()) {
				256	if (isDivergent(Phi))
				257	continue;
				258	Worklist.push_back(&Phi);
				259	}
				260	}
				261
				262	void DivergenceAnalysis::pushUsers(const Value &V) {
				263	for (const auto *User : V.users()) {
				264	const auto *UserInst = dyn_cast<const Instruction>(User);
				265	if (!UserInst)
				266	continue;
				267
				268	if (isDivergent(*UserInst))
				269	continue;
				270
				271	// only compute divergent inside loop
				272	if (!inRegion(*UserInst))
				273	continue;
				274	Worklist.push_back(UserInst);
				275	}
				276	}
				277
				278	bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
				279	const Loop *BranchLoop) {
				280	LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
				281
				282	// ignore divergence outside the region
				283	if (!inRegion(JoinBlock)) {
				284	return false;
				285	}
				286
				287	// push non-divergent phi nodes in JoinBlock to the worklist
				288	pushPHINodes(JoinBlock);
				289
				290	// JoinBlock is a divergent loop exit
				291	if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
				292	return true;
				293	}
				294
				295	// disjoint-paths divergent at JoinBlock
				296	markBlockJoinDivergent(JoinBlock);
				297	return false;
				298	}
				299
Chandler Carruth	39edf63	2018-10-19 00:22:10 +0000	[diff] [blame]	300	void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
Nicolai Haehnle	981ceb8	2018-10-18 09:38:44 +0000	[diff] [blame]	301	LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
				302
				303	markDivergent(Term);
				304
				305	const auto *BranchLoop = LI.getLoopFor(Term.getParent());
				306
				307	// whether there is a divergent loop exit from BranchLoop (if any)
				308	bool IsBranchLoopDivergent = false;
				309
				310	// iterate over all blocks reachable by disjoint from Term within the loop
				311	// also iterates over loop exits that become divergent due to Term.
				312	for (const auto *JoinBlock : SDA.join_blocks(Term)) {
				313	IsBranchLoopDivergent \|= propagateJoinDivergence(*JoinBlock, BranchLoop);
				314	}
				315
				316	// Branch loop is a divergent loop due to the divergent branch in Term
				317	if (IsBranchLoopDivergent) {
				318	assert(BranchLoop);
				319	if (!DivergentLoops.insert(BranchLoop).second) {
				320	return;
				321	}
				322	propagateLoopDivergence(*BranchLoop);
				323	}
				324	}
				325
				326	void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
				327	LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
				328
				329	// don't propagate beyond region
				330	if (!inRegion(*ExitingLoop.getHeader()))
				331	return;
				332
				333	const auto *BranchLoop = ExitingLoop.getParentLoop();
				334
				335	// Uses of loop-carried values could occur anywhere
				336	// within the dominance region of the definition. All loop-carried
				337	// definitions are dominated by the loop header (reducible control).
				338	// Thus all users have to be in the dominance region of the loop header,
				339	// except PHI nodes that can also live at the fringes of the dom region
				340	// (incoming defining value).
				341	if (!IsLCSSAForm)
				342	taintLoopLiveOuts(*ExitingLoop.getHeader());
				343
				344	// whether there is a divergent loop exit from BranchLoop (if any)
				345	bool IsBranchLoopDivergent = false;
				346
				347	// iterate over all blocks reachable by disjoint paths from exits of
				348	// ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
				349	// become divergent.
				350	for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
				351	IsBranchLoopDivergent \|= propagateJoinDivergence(*JoinBlock, BranchLoop);
				352	}
				353
				354	// Branch loop is a divergent due to divergent loop exit in ExitingLoop
				355	if (IsBranchLoopDivergent) {
				356	assert(BranchLoop);
				357	if (!DivergentLoops.insert(BranchLoop).second) {
				358	return;
				359	}
				360	propagateLoopDivergence(*BranchLoop);
				361	}
				362	}
				363
				364	void DivergenceAnalysis::compute() {
				365	for (auto *DivVal : DivergentValues) {
				366	pushUsers(*DivVal);
				367	}
				368
				369	// propagate divergence
				370	while (!Worklist.empty()) {
				371	const Instruction &I = *Worklist.back();
				372	Worklist.pop_back();
				373
				374	// maintain uniformity of overrides
				375	if (isAlwaysUniform(I))
				376	continue;
				377
				378	bool WasDivergent = isDivergent(I);
				379	if (WasDivergent)
				380	continue;
				381
				382	// propagate divergence caused by terminator
Chandler Carruth	39edf63	2018-10-19 00:22:10 +0000	[diff] [blame]	383	if (I.isTerminator()) {
				384	if (updateTerminator(I)) {
Nicolai Haehnle	981ceb8	2018-10-18 09:38:44 +0000	[diff] [blame]	385	// propagate control divergence to affected instructions
Chandler Carruth	39edf63	2018-10-19 00:22:10 +0000	[diff] [blame]	386	propagateBranchDivergence(I);
Nicolai Haehnle	981ceb8	2018-10-18 09:38:44 +0000	[diff] [blame]	387	continue;
				388	}
				389	}
				390
				391	// update divergence of I due to divergent operands
				392	bool DivergentUpd = false;
				393	const auto *Phi = dyn_cast<const PHINode>(&I);
				394	if (Phi) {
				395	DivergentUpd = updatePHINode(*Phi);
				396	} else {
				397	DivergentUpd = updateNormalInstruction(I);
				398	}
				399
				400	// propagate value divergence to users
				401	if (DivergentUpd) {
				402	markDivergent(I);
				403	pushUsers(I);
				404	}
				405	}
				406	}
				407
				408	bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
				409	return UniformOverrides.find(&V) != UniformOverrides.end();
				410	}
				411
				412	bool DivergenceAnalysis::isDivergent(const Value &V) const {
				413	return DivergentValues.find(&V) != DivergentValues.end();
				414	}
				415
				416	void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
				417	if (DivergentValues.empty())
				418	return;
				419	// iterate instructions using instructions() to ensure a deterministic order.
				420	for (auto &I : instructions(F)) {
				421	if (isDivergent(I))
				422	OS << "DIVERGENT:" << I << '\n';
				423	}
				424	}
Nicolai Haehnle	39a2bbd	2018-11-30 22:55:20 +0000	[diff] [blame]	425
				426	// class GPUDivergenceAnalysis
				427	GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
				428	const DominatorTree &DT,
				429	const PostDominatorTree &PDT,
				430	const LoopInfo &LI,
				431	const TargetTransformInfo &TTI)
				432	: SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
				433	for (auto &I : instructions(F)) {
				434	if (TTI.isSourceOfDivergence(&I)) {
				435	DA.markDivergent(I);
				436	} else if (TTI.isAlwaysUniform(&I)) {
				437	DA.addUniformOverride(I);
				438	}
				439	}
				440	for (auto &Arg : F.args()) {
				441	if (TTI.isSourceOfDivergence(&Arg)) {
				442	DA.markDivergent(Arg);
				443	}
				444	}
				445
				446	DA.compute();
				447	}
				448
				449	bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
				450	return DA.isDivergent(val);
				451	}
				452
				453	void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
				454	OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
				455	DA.print(OS, mod);
				456	OS << "}\n";
				457	}