Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 1 | //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis |
| 2 | //Implementation -==// |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 3 | // |
| 4 | // The LLVM Compiler Infrastructure |
| 5 | // |
| 6 | // This file is distributed under the University of Illinois Open Source |
| 7 | // License. See LICENSE.TXT for details. |
| 8 | // |
| 9 | //===----------------------------------------------------------------------===// |
| 10 | // |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 11 | // This file implements divergence analysis which determines whether a branch |
| 12 | // in a GPU program is divergent.It can help branch optimizations such as jump |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 13 | // threading and loop unswitching to make better decisions. |
| 14 | // |
| 15 | // GPU programs typically use the SIMD execution model, where multiple threads |
| 16 | // in the same execution group have to execute in lock-step. Therefore, if the |
| 17 | // code contains divergent branches (i.e., threads in a group do not agree on |
| 18 | // which path of the branch to take), the group of threads has to execute all |
| 19 | // the paths from that branch with different subsets of threads enabled until |
| 20 | // they converge at the immediately post-dominating BB of the paths. |
| 21 | // |
| 22 | // Due to this execution model, some optimizations such as jump |
| 23 | // threading and loop unswitching can be unfortunately harmful when performed on |
| 24 | // divergent branches. Therefore, an analysis that computes which branches in a |
| 25 | // GPU program are divergent can help the compiler to selectively run these |
| 26 | // optimizations. |
| 27 | // |
| 28 | // This file defines divergence analysis which computes a conservative but |
| 29 | // non-trivial approximation of all divergent branches in a GPU program. It |
| 30 | // partially implements the approach described in |
| 31 | // |
| 32 | // Divergence Analysis |
| 33 | // Sampaio, Souza, Collange, Pereira |
| 34 | // TOPLAS '13 |
| 35 | // |
| 36 | // The divergence analysis identifies the sources of divergence (e.g., special |
| 37 | // variables that hold the thread ID), and recursively marks variables that are |
| 38 | // data or sync dependent on a source of divergence as divergent. |
| 39 | // |
| 40 | // While data dependency is a well-known concept, the notion of sync dependency |
| 41 | // is worth more explanation. Sync dependence characterizes the control flow |
| 42 | // aspect of the propagation of branch divergence. For example, |
| 43 | // |
| 44 | // %cond = icmp slt i32 %tid, 10 |
| 45 | // br i1 %cond, label %then, label %else |
| 46 | // then: |
| 47 | // br label %merge |
| 48 | // else: |
| 49 | // br label %merge |
| 50 | // merge: |
| 51 | // %a = phi i32 [ 0, %then ], [ 1, %else ] |
| 52 | // |
| 53 | // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid |
| 54 | // because %tid is not on its use-def chains, %a is sync dependent on %tid |
| 55 | // because the branch "br i1 %cond" depends on %tid and affects which value %a |
| 56 | // is assigned to. |
| 57 | // |
| 58 | // The current implementation has the following limitations: |
| 59 | // 1. intra-procedural. It conservatively considers the arguments of a |
| 60 | // non-kernel-entry function and the return value of a function call as |
| 61 | // divergent. |
| 62 | // 2. memory as black box. It conservatively considers values loaded from |
| 63 | // generic or local address as divergent. This can be improved by leveraging |
| 64 | // pointer analysis. |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 65 | // |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 66 | //===----------------------------------------------------------------------===// |
| 67 | |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 68 | #include "llvm/ADT/PostOrderIterator.h" |
| 69 | #include "llvm/Analysis/CFG.h" |
| 70 | #include "llvm/Analysis/DivergenceAnalysis.h" |
Nicolai Haehnle | b33a403 | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 71 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 72 | #include "llvm/Analysis/Passes.h" |
| 73 | #include "llvm/Analysis/PostDominators.h" |
| 74 | #include "llvm/Analysis/TargetTransformInfo.h" |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 75 | #include "llvm/IR/Dominators.h" |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 76 | #include "llvm/IR/InstIterator.h" |
| 77 | #include "llvm/IR/Instructions.h" |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 78 | #include "llvm/IR/Value.h" |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 79 | #include "llvm/Support/Debug.h" |
| 80 | #include "llvm/Support/raw_ostream.h" |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 81 | #include <vector> |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 82 | using namespace llvm; |
| 83 | |
Tim Renouf | 6069d4b | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 84 | #define DEBUG_TYPE "divergence" |
| 85 | |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 86 | // transparently use the GPUDivergenceAnalysis |
| 87 | static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false), |
| 88 | cl::Hidden, |
| 89 | cl::desc("turn the LegacyDivergenceAnalysis into " |
| 90 | "a wrapper for GPUDivergenceAnalysis")); |
| 91 | |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 92 | namespace { |
| 93 | |
| 94 | class DivergencePropagator { |
| 95 | public: |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 96 | DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, |
| 97 | PostDominatorTree &PDT, DenseSet<const Value *> &DV) |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 98 | : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} |
| 99 | void populateWithSourcesOfDivergence(); |
| 100 | void propagate(); |
| 101 | |
| 102 | private: |
| 103 | // A helper function that explores data dependents of V. |
| 104 | void exploreDataDependency(Value *V); |
| 105 | // A helper function that explores sync dependents of TI. |
Chandler Carruth | 5b21ab8 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 106 | void exploreSyncDependency(Instruction *TI); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 107 | // Computes the influence region from Start to End. This region includes all |
Jingyue Wu | 509e2e9 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 108 | // basic blocks on any simple path from Start to End. |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 109 | void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, |
| 110 | DenseSet<BasicBlock *> &InfluenceRegion); |
| 111 | // Finds all users of I that are outside the influence region, and add these |
| 112 | // users to Worklist. |
| 113 | void findUsersOutsideInfluenceRegion( |
| 114 | Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion); |
| 115 | |
| 116 | Function &F; |
| 117 | TargetTransformInfo &TTI; |
| 118 | DominatorTree &DT; |
| 119 | PostDominatorTree &PDT; |
| 120 | std::vector<Value *> Worklist; // Stack for DFS. |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 121 | DenseSet<const Value *> &DV; // Stores all divergent values. |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 122 | }; |
| 123 | |
| 124 | void DivergencePropagator::populateWithSourcesOfDivergence() { |
| 125 | Worklist.clear(); |
| 126 | DV.clear(); |
Nico Rieck | 3dd7bf5 | 2015-08-06 19:10:45 +0000 | [diff] [blame] | 127 | for (auto &I : instructions(F)) { |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 128 | if (TTI.isSourceOfDivergence(&I)) { |
| 129 | Worklist.push_back(&I); |
| 130 | DV.insert(&I); |
| 131 | } |
| 132 | } |
| 133 | for (auto &Arg : F.args()) { |
| 134 | if (TTI.isSourceOfDivergence(&Arg)) { |
| 135 | Worklist.push_back(&Arg); |
| 136 | DV.insert(&Arg); |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | |
Chandler Carruth | 5b21ab8 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 141 | void DivergencePropagator::exploreSyncDependency(Instruction *TI) { |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 142 | // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's |
| 143 | // immediate post dominator are divergent. This rule handles if-then-else |
| 144 | // patterns. For example, |
| 145 | // |
| 146 | // if (tid < 5) |
| 147 | // a1 = 1; |
| 148 | // else |
| 149 | // a2 = 2; |
| 150 | // a = phi(a1, a2); // sync dependent on (tid < 5) |
| 151 | BasicBlock *ThisBB = TI->getParent(); |
Matt Arsenault | e8448ab | 2016-04-29 06:17:47 +0000 | [diff] [blame] | 152 | |
| 153 | // Unreachable blocks may not be in the dominator tree. |
| 154 | if (!DT.isReachableFromEntry(ThisBB)) |
| 155 | return; |
| 156 | |
Matt Arsenault | 51485ff | 2016-05-09 16:57:08 +0000 | [diff] [blame] | 157 | // If the function has no exit blocks or doesn't reach any exit blocks, the |
| 158 | // post dominator may be null. |
| 159 | DomTreeNode *ThisNode = PDT.getNode(ThisBB); |
| 160 | if (!ThisNode) |
| 161 | return; |
| 162 | |
| 163 | BasicBlock *IPostDom = ThisNode->getIDom()->getBlock(); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 164 | if (IPostDom == nullptr) |
| 165 | return; |
| 166 | |
| 167 | for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) { |
| 168 | // A PHINode is uniform if it returns the same value no matter which path is |
| 169 | // taken. |
Nicolai Haehnle | a16ecd43 | 2016-04-14 17:42:47 +0000 | [diff] [blame] | 170 | if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second) |
Duncan P. N. Exon Smith | d3a5adc | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 171 | Worklist.push_back(&*I); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 172 | } |
| 173 | |
| 174 | // Propagation rule 2: if a value defined in a loop is used outside, the user |
| 175 | // is sync dependent on the condition of the loop exits that dominate the |
| 176 | // user. For example, |
| 177 | // |
| 178 | // int i = 0; |
| 179 | // do { |
| 180 | // i++; |
| 181 | // if (foo(i)) ... // uniform |
| 182 | // } while (i < tid); |
| 183 | // if (bar(i)) ... // divergent |
| 184 | // |
| 185 | // A program may contain unstructured loops. Therefore, we cannot leverage |
| 186 | // LoopInfo, which only recognizes natural loops. |
| 187 | // |
| 188 | // The algorithm used here handles both natural and unstructured loops. Given |
| 189 | // a branch TI, we first compute its influence region, the union of all simple |
| 190 | // paths from TI to its immediate post dominator (IPostDom). Then, we search |
| 191 | // for all the values defined in the influence region but used outside. All |
| 192 | // these users are sync dependent on TI. |
| 193 | DenseSet<BasicBlock *> InfluenceRegion; |
| 194 | computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion); |
| 195 | // An insight that can speed up the search process is that all the in-region |
| 196 | // values that are used outside must dominate TI. Therefore, instead of |
| 197 | // searching every basic blocks in the influence region, we search all the |
| 198 | // dominators of TI until it is outside the influence region. |
| 199 | BasicBlock *InfluencedBB = ThisBB; |
| 200 | while (InfluenceRegion.count(InfluencedBB)) { |
| 201 | for (auto &I : *InfluencedBB) |
| 202 | findUsersOutsideInfluenceRegion(I, InfluenceRegion); |
| 203 | DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); |
| 204 | if (IDomNode == nullptr) |
| 205 | break; |
| 206 | InfluencedBB = IDomNode->getBlock(); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | void DivergencePropagator::findUsersOutsideInfluenceRegion( |
| 211 | Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { |
| 212 | for (User *U : I.users()) { |
| 213 | Instruction *UserInst = cast<Instruction>(U); |
| 214 | if (!InfluenceRegion.count(UserInst->getParent())) { |
| 215 | if (DV.insert(UserInst).second) |
| 216 | Worklist.push_back(UserInst); |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | |
Jingyue Wu | 509e2e9 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 221 | // A helper function for computeInfluenceRegion that adds successors of "ThisBB" |
| 222 | // to the influence region. |
| 223 | static void |
| 224 | addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End, |
| 225 | DenseSet<BasicBlock *> &InfluenceRegion, |
| 226 | std::vector<BasicBlock *> &InfluenceStack) { |
| 227 | for (BasicBlock *Succ : successors(ThisBB)) { |
| 228 | if (Succ != End && InfluenceRegion.insert(Succ).second) |
| 229 | InfluenceStack.push_back(Succ); |
| 230 | } |
| 231 | } |
| 232 | |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 233 | void DivergencePropagator::computeInfluenceRegion( |
| 234 | BasicBlock *Start, BasicBlock *End, |
| 235 | DenseSet<BasicBlock *> &InfluenceRegion) { |
| 236 | assert(PDT.properlyDominates(End, Start) && |
| 237 | "End does not properly dominate Start"); |
Jingyue Wu | 509e2e9 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 238 | |
| 239 | // The influence region starts from the end of "Start" to the beginning of |
| 240 | // "End". Therefore, "Start" should not be in the region unless "Start" is in |
| 241 | // a loop that doesn't contain "End". |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 242 | std::vector<BasicBlock *> InfluenceStack; |
Jingyue Wu | 509e2e9 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 243 | addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 244 | while (!InfluenceStack.empty()) { |
| 245 | BasicBlock *BB = InfluenceStack.back(); |
| 246 | InfluenceStack.pop_back(); |
Jingyue Wu | 509e2e9 | 2015-12-18 21:44:26 +0000 | [diff] [blame] | 247 | addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 248 | } |
| 249 | } |
| 250 | |
| 251 | void DivergencePropagator::exploreDataDependency(Value *V) { |
| 252 | // Follow def-use chains of V. |
| 253 | for (User *U : V->users()) { |
| 254 | Instruction *UserInst = cast<Instruction>(U); |
Alexander Timofeev | 7807f69 | 2017-06-15 19:33:10 +0000 | [diff] [blame] | 255 | if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 256 | Worklist.push_back(UserInst); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | void DivergencePropagator::propagate() { |
| 261 | // Traverse the dependency graph using DFS. |
| 262 | while (!Worklist.empty()) { |
| 263 | Value *V = Worklist.back(); |
| 264 | Worklist.pop_back(); |
Chandler Carruth | 5b21ab8 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 265 | if (Instruction *I = dyn_cast<Instruction>(V)) { |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 266 | // Terminators with less than two successors won't introduce sync |
| 267 | // dependency. Ignore them. |
Chandler Carruth | 5b21ab8 | 2018-10-18 00:36:15 +0000 | [diff] [blame] | 268 | if (I->isTerminator() && I->getNumSuccessors() > 1) |
| 269 | exploreSyncDependency(I); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 270 | } |
| 271 | exploreDataDependency(V); |
| 272 | } |
| 273 | } |
| 274 | |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 275 | } // namespace |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 276 | |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 277 | // Register this pass. |
Nicolai Haehnle | b33a403 | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 278 | char LegacyDivergenceAnalysis::ID = 0; |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 279 | INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", |
| 280 | "Legacy Divergence Analysis", false, true) |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 281 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
Hongbin Zheng | 5d7472e | 2016-02-25 17:54:07 +0000 | [diff] [blame] | 282 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 283 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 284 | INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence", |
| 285 | "Legacy Divergence Analysis", false, true) |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 286 | |
Nicolai Haehnle | b33a403 | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 287 | FunctionPass *llvm::createLegacyDivergenceAnalysisPass() { |
| 288 | return new LegacyDivergenceAnalysis(); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 289 | } |
| 290 | |
Nicolai Haehnle | b33a403 | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 291 | void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 292 | AU.addRequired<DominatorTreeWrapperPass>(); |
Hongbin Zheng | 5d7472e | 2016-02-25 17:54:07 +0000 | [diff] [blame] | 293 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 294 | if (UseGPUDA) |
| 295 | AU.addRequired<LoopInfoWrapperPass>(); |
Marcello Maggioni | eaf5bf9 | 2015-09-21 17:58:14 +0000 | [diff] [blame] | 296 | AU.setPreservesAll(); |
| 297 | } |
| 298 | |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 299 | bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis( |
| 300 | const Function &F) const { |
| 301 | if (!UseGPUDA) |
| 302 | return false; |
| 303 | |
| 304 | // GPUDivergenceAnalysis requires a reducible CFG. |
| 305 | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 306 | using RPOTraversal = ReversePostOrderTraversal<const Function *>; |
| 307 | RPOTraversal FuncRPOT(&F); |
| 308 | return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal, |
| 309 | const LoopInfo>(FuncRPOT, LI); |
| 310 | } |
| 311 | |
Nicolai Haehnle | b33a403 | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 312 | bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 313 | auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); |
| 314 | if (TTIWP == nullptr) |
| 315 | return false; |
| 316 | |
| 317 | TargetTransformInfo &TTI = TTIWP->getTTI(F); |
| 318 | // Fast path: if the target does not have branch divergence, we do not mark |
| 319 | // any branch as divergent. |
| 320 | if (!TTI.hasBranchDivergence()) |
| 321 | return false; |
| 322 | |
| 323 | DivergentValues.clear(); |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 324 | gpuDA = nullptr; |
| 325 | |
| 326 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
Hongbin Zheng | 5d7472e | 2016-02-25 17:54:07 +0000 | [diff] [blame] | 327 | auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 328 | |
| 329 | if (shouldUseGPUDivergenceAnalysis(F)) { |
| 330 | // run the new GPU divergence analysis |
| 331 | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 332 | gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI); |
| 333 | |
| 334 | } else { |
| 335 | // run LLVM's existing DivergenceAnalysis |
| 336 | DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues); |
| 337 | DP.populateWithSourcesOfDivergence(); |
| 338 | DP.propagate(); |
| 339 | } |
| 340 | |
| 341 | LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName() |
| 342 | << ":\n"; |
| 343 | print(dbgs(), F.getParent())); |
| 344 | |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 345 | return false; |
| 346 | } |
| 347 | |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 348 | bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { |
| 349 | if (gpuDA) { |
| 350 | return gpuDA->isDivergent(*V); |
| 351 | } |
| 352 | return DivergentValues.count(V); |
| 353 | } |
| 354 | |
Nicolai Haehnle | b33a403 | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 355 | void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 356 | if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 357 | return; |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 358 | |
Nicolai Haehnle | cd3135e | 2018-11-30 23:07:49 +0000 | [diff] [blame] | 359 | const Function *F = nullptr; |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 360 | if (!DivergentValues.empty()) { |
| 361 | const Value *FirstDivergentValue = *DivergentValues.begin(); |
| 362 | if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) { |
| 363 | F = Arg->getParent(); |
| 364 | } else if (const Instruction *I = |
| 365 | dyn_cast<Instruction>(FirstDivergentValue)) { |
| 366 | F = I->getParent()->getParent(); |
| 367 | } else { |
| 368 | llvm_unreachable("Only arguments and instructions can be divergent"); |
| 369 | } |
| 370 | } else if (gpuDA) { |
| 371 | F = &gpuDA->getFunction(); |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 372 | } |
Nicolai Haehnle | cd3135e | 2018-11-30 23:07:49 +0000 | [diff] [blame] | 373 | if (!F) |
| 374 | return; |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 375 | |
| 376 | // Dumps all divergent values in F, arguments and then instructions. |
| 377 | for (auto &Arg : F->args()) { |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 378 | OS << (isDivergent(&Arg) ? "DIVERGENT: " : " "); |
Tim Renouf | 6069d4b | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 379 | OS << Arg << "\n"; |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 380 | } |
Nico Rieck | 3dd7bf5 | 2015-08-06 19:10:45 +0000 | [diff] [blame] | 381 | // Iterate instructions using instructions() to ensure a deterministic order. |
Tim Renouf | 6069d4b | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 382 | for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) { |
| 383 | auto &BB = *BI; |
| 384 | OS << "\n " << BB.getName() << ":\n"; |
| 385 | for (auto &I : BB.instructionsWithoutDebug()) { |
Nicolai Haehnle | 39a2bbd | 2018-11-30 22:55:20 +0000 | [diff] [blame] | 386 | OS << (isDivergent(&I) ? "DIVERGENT: " : " "); |
Tim Renouf | 6069d4b | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 387 | OS << I << "\n"; |
| 388 | } |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 389 | } |
Tim Renouf | 6069d4b | 2018-07-13 13:13:30 +0000 | [diff] [blame] | 390 | OS << "\n"; |
Jingyue Wu | 5733100 | 2015-04-10 05:03:50 +0000 | [diff] [blame] | 391 | } |