Dean Michael Berris | 9969df3 | 2018-08-30 01:43:22 +0000 | [diff] [blame] | 1 | //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // Defines the XRay Profile class representing the latency profile generated by |
| 11 | // XRay's profiling mode. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | #include "llvm/XRay/Profile.h" |
| 15 | |
| 16 | #include "llvm/Support/DataExtractor.h" |
| 17 | #include "llvm/Support/Error.h" |
| 18 | #include "llvm/Support/FileSystem.h" |
| 19 | #include "llvm/XRay/Trace.h" |
| 20 | #include <deque> |
| 21 | #include <memory> |
| 22 | |
| 23 | namespace llvm { |
| 24 | namespace xray { |
| 25 | |
| 26 | Profile::Profile(const Profile &O) { |
| 27 | // We need to re-create all the tries from the original (O), into the current |
| 28 | // Profile being initialized, through the Block instances we see. |
| 29 | for (const auto &Block : O) { |
| 30 | Blocks.push_back({Block.Thread, {}}); |
| 31 | auto &B = Blocks.back(); |
| 32 | for (const auto &PathData : Block.PathData) |
| 33 | B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))), |
| 34 | PathData.second}); |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | Profile &Profile::operator=(const Profile &O) { |
| 39 | Profile P = O; |
| 40 | *this = std::move(P); |
| 41 | return *this; |
| 42 | } |
| 43 | |
| 44 | namespace { |
| 45 | |
| 46 | struct BlockHeader { |
| 47 | uint32_t Size; |
| 48 | uint32_t Number; |
| 49 | uint64_t Thread; |
| 50 | }; |
| 51 | |
| 52 | static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, |
| 53 | uint32_t &Offset) { |
| 54 | BlockHeader H; |
| 55 | uint32_t CurrentOffset = Offset; |
| 56 | H.Size = Extractor.getU32(&Offset); |
| 57 | if (Offset == CurrentOffset) |
| 58 | return make_error<StringError>( |
| 59 | Twine("Error parsing block header size at offset '") + |
| 60 | Twine(CurrentOffset) + "'", |
| 61 | std::make_error_code(std::errc::invalid_argument)); |
| 62 | CurrentOffset = Offset; |
| 63 | H.Number = Extractor.getU32(&Offset); |
| 64 | if (Offset == CurrentOffset) |
| 65 | return make_error<StringError>( |
| 66 | Twine("Error parsing block header number at offset '") + |
| 67 | Twine(CurrentOffset) + "'", |
| 68 | std::make_error_code(std::errc::invalid_argument)); |
| 69 | CurrentOffset = Offset; |
| 70 | H.Thread = Extractor.getU64(&Offset); |
| 71 | if (Offset == CurrentOffset) |
| 72 | return make_error<StringError>( |
| 73 | Twine("Error parsing block header thread id at offset '") + |
| 74 | Twine(CurrentOffset) + "'", |
| 75 | std::make_error_code(std::errc::invalid_argument)); |
| 76 | return H; |
| 77 | } |
| 78 | |
| 79 | static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor, |
| 80 | uint32_t &Offset) { |
| 81 | // We're reading a sequence of int32_t's until we find a 0. |
| 82 | std::vector<Profile::FuncID> Path; |
| 83 | auto CurrentOffset = Offset; |
| 84 | int32_t FuncId; |
| 85 | do { |
| 86 | FuncId = Extractor.getSigned(&Offset, 4); |
| 87 | if (CurrentOffset == Offset) |
| 88 | return make_error<StringError>( |
| 89 | Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", |
| 90 | std::make_error_code(std::errc::invalid_argument)); |
| 91 | CurrentOffset = Offset; |
| 92 | Path.push_back(FuncId); |
| 93 | } while (FuncId != 0); |
| 94 | return std::move(Path); |
| 95 | } |
| 96 | |
| 97 | static Expected<Profile::Data> readData(DataExtractor &Extractor, |
| 98 | uint32_t &Offset) { |
| 99 | // We expect a certain number of elements for Data: |
| 100 | // - A 64-bit CallCount |
| 101 | // - A 64-bit CumulativeLocalTime counter |
| 102 | Profile::Data D; |
| 103 | auto CurrentOffset = Offset; |
| 104 | D.CallCount = Extractor.getU64(&Offset); |
| 105 | if (CurrentOffset == Offset) |
| 106 | return make_error<StringError>( |
| 107 | Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + |
| 108 | "'", |
| 109 | std::make_error_code(std::errc::invalid_argument)); |
| 110 | CurrentOffset = Offset; |
| 111 | D.CumulativeLocalTime = Extractor.getU64(&Offset); |
| 112 | if (CurrentOffset == Offset) |
| 113 | return make_error<StringError>( |
| 114 | Twine("Error parsing cumulative local time at offset '") + |
| 115 | Twine(CurrentOffset) + "'", |
| 116 | std::make_error_code(std::errc::invalid_argument)); |
| 117 | return D; |
| 118 | } |
| 119 | |
| 120 | } // namespace |
| 121 | |
| 122 | Error Profile::addBlock(Block &&B) { |
| 123 | if (B.PathData.empty()) |
| 124 | return make_error<StringError>( |
| 125 | "Block may not have empty path data.", |
| 126 | std::make_error_code(std::errc::invalid_argument)); |
| 127 | |
| 128 | Blocks.emplace_back(std::move(B)); |
| 129 | return Error::success(); |
| 130 | } |
| 131 | |
| 132 | Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { |
| 133 | auto It = PathIDMap.find(P); |
| 134 | if (It == PathIDMap.end()) |
| 135 | return make_error<StringError>( |
| 136 | Twine("PathID not found: ") + Twine(P), |
| 137 | std::make_error_code(std::errc::invalid_argument)); |
| 138 | std::vector<Profile::FuncID> Path; |
| 139 | for (auto Node = It->second; Node; Node = Node->Caller) |
| 140 | Path.push_back(Node->Func); |
| 141 | return std::move(Path); |
| 142 | } |
| 143 | |
| 144 | Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { |
| 145 | if (P.empty()) |
| 146 | return 0; |
| 147 | |
| 148 | auto RootToLeafPath = reverse(P); |
| 149 | |
| 150 | // Find the root. |
| 151 | auto It = RootToLeafPath.begin(); |
| 152 | auto PathRoot = *It++; |
| 153 | auto RootIt = |
| 154 | find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); |
| 155 | |
| 156 | // If we've not seen this root before, remember it. |
| 157 | TrieNode *Node = nullptr; |
| 158 | if (RootIt == Roots.end()) { |
| 159 | NodeStorage.emplace_back(); |
| 160 | Node = &NodeStorage.back(); |
| 161 | Node->Func = PathRoot; |
| 162 | Roots.push_back(Node); |
| 163 | } else { |
| 164 | Node = *RootIt; |
| 165 | } |
| 166 | |
| 167 | // Now traverse the path, re-creating if necessary. |
| 168 | while (It != RootToLeafPath.end()) { |
| 169 | auto NodeFuncID = *It++; |
| 170 | auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { |
| 171 | return N->Func == NodeFuncID; |
| 172 | }); |
| 173 | if (CalleeIt == Node->Callees.end()) { |
| 174 | NodeStorage.emplace_back(); |
| 175 | auto NewNode = &NodeStorage.back(); |
| 176 | NewNode->Func = NodeFuncID; |
| 177 | NewNode->Caller = Node; |
| 178 | Node->Callees.push_back(NewNode); |
| 179 | Node = NewNode; |
| 180 | } else { |
| 181 | Node = *CalleeIt; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | // At this point, Node *must* be pointing at the leaf. |
| 186 | assert(Node->Func == P.front()); |
| 187 | if (Node->ID == 0) { |
| 188 | Node->ID = NextID++; |
| 189 | PathIDMap.insert({Node->ID, Node}); |
| 190 | } |
| 191 | return Node->ID; |
| 192 | } |
| 193 | |
| 194 | Profile mergeProfilesByThread(const Profile &L, const Profile &R) { |
| 195 | Profile Merged; |
| 196 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
| 197 | using PathDataMapPtr = std::unique_ptr<PathDataMap>; |
| 198 | using PathDataVector = decltype(Profile::Block::PathData); |
| 199 | using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; |
| 200 | ThreadProfileIndexMap ThreadProfileIndex; |
| 201 | |
| 202 | for (const auto &P : {std::ref(L), std::ref(R)}) |
| 203 | for (const auto &Block : P.get()) { |
| 204 | ThreadProfileIndexMap::iterator It; |
| 205 | std::tie(It, std::ignore) = ThreadProfileIndex.insert( |
| 206 | {Block.Thread, PathDataMapPtr{new PathDataMap()}}); |
| 207 | for (const auto &PathAndData : Block.PathData) { |
| 208 | auto &PathID = PathAndData.first; |
| 209 | auto &Data = PathAndData.second; |
| 210 | auto NewPathID = |
| 211 | Merged.internPath(cantFail(P.get().expandPath(PathID))); |
| 212 | PathDataMap::iterator PathDataIt; |
| 213 | bool Inserted; |
| 214 | std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); |
| 215 | if (!Inserted) { |
| 216 | auto &ExistingData = PathDataIt->second; |
| 217 | ExistingData.CallCount += Data.CallCount; |
| 218 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | for (const auto &IndexedThreadBlock : ThreadProfileIndex) { |
| 224 | PathDataVector PathAndData; |
| 225 | PathAndData.reserve(IndexedThreadBlock.second->size()); |
| 226 | copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); |
| 227 | cantFail( |
| 228 | Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); |
| 229 | } |
| 230 | return Merged; |
| 231 | } |
| 232 | |
| 233 | Profile mergeProfilesByStack(const Profile &L, const Profile &R) { |
| 234 | Profile Merged; |
| 235 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
| 236 | PathDataMap PathData; |
| 237 | using PathDataVector = decltype(Profile::Block::PathData); |
| 238 | for (const auto &P : {std::ref(L), std::ref(R)}) |
| 239 | for (const auto &Block : P.get()) |
| 240 | for (const auto &PathAndData : Block.PathData) { |
| 241 | auto &PathId = PathAndData.first; |
| 242 | auto &Data = PathAndData.second; |
| 243 | auto NewPathID = |
| 244 | Merged.internPath(cantFail(P.get().expandPath(PathId))); |
| 245 | PathDataMap::iterator PathDataIt; |
| 246 | bool Inserted; |
| 247 | std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data}); |
| 248 | if (!Inserted) { |
| 249 | auto &ExistingData = PathDataIt->second; |
| 250 | ExistingData.CallCount += Data.CallCount; |
| 251 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | // In the end there's a single Block, for thread 0. |
| 256 | PathDataVector Block; |
| 257 | Block.reserve(PathData.size()); |
| 258 | copy(PathData, std::back_inserter(Block)); |
| 259 | cantFail(Merged.addBlock({0, std::move(Block)})); |
| 260 | return Merged; |
| 261 | } |
| 262 | |
| 263 | Expected<Profile> loadProfile(StringRef Filename) { |
| 264 | int Fd; |
| 265 | if (auto EC = sys::fs::openFileForRead(Filename, Fd)) |
| 266 | return make_error<StringError>( |
| 267 | Twine("Cannot read profile from '") + Filename + "'", EC); |
| 268 | |
| 269 | uint64_t FileSize; |
| 270 | if (auto EC = sys::fs::file_size(Filename, FileSize)) |
| 271 | return make_error<StringError>( |
| 272 | Twine("Cannot get filesize of '") + Filename + "'", EC); |
| 273 | |
| 274 | std::error_code EC; |
| 275 | sys::fs::mapped_file_region MappedFile( |
| 276 | Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); |
| 277 | if (EC) |
| 278 | return make_error<StringError>( |
| 279 | Twine("Cannot mmap profile '") + Filename + "'", EC); |
| 280 | StringRef Data(MappedFile.data(), MappedFile.size()); |
| 281 | |
| 282 | Profile P; |
| 283 | uint32_t Offset = 0; |
| 284 | DataExtractor Extractor(Data, true, 8); |
| 285 | |
| 286 | // For each block we get from the file: |
| 287 | while (Offset != MappedFile.size()) { |
| 288 | auto HeaderOrError = readBlockHeader(Extractor, Offset); |
| 289 | if (!HeaderOrError) |
| 290 | return HeaderOrError.takeError(); |
| 291 | |
| 292 | // TODO: Maybe store this header information for each block, even just for |
| 293 | // debugging? |
| 294 | const auto &Header = HeaderOrError.get(); |
| 295 | |
| 296 | // Read in the path data. |
| 297 | auto PathOrError = readPath(Extractor, Offset); |
| 298 | if (!PathOrError) |
| 299 | return PathOrError.takeError(); |
| 300 | const auto &Path = PathOrError.get(); |
| 301 | |
| 302 | // For each path we encounter, we should intern it to get a PathID. |
| 303 | auto DataOrError = readData(Extractor, Offset); |
| 304 | if (!DataOrError) |
| 305 | return DataOrError.takeError(); |
| 306 | auto &Data = DataOrError.get(); |
| 307 | |
| 308 | if (auto E = |
| 309 | P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, |
| 310 | {{P.internPath(Path), std::move(Data)}}})) |
| 311 | return std::move(E); |
| 312 | } |
| 313 | |
| 314 | return P; |
| 315 | } |
| 316 | |
| 317 | namespace { |
| 318 | |
| 319 | struct StackEntry { |
| 320 | uint64_t Timestamp; |
| 321 | Profile::FuncID FuncId; |
| 322 | }; |
| 323 | |
| 324 | } // namespace |
| 325 | |
| 326 | Expected<Profile> profileFromTrace(const Trace &T) { |
| 327 | Profile P; |
| 328 | |
| 329 | // The implementation of the algorithm re-creates the execution of |
| 330 | // the functions based on the trace data. To do this, we set up a number of |
| 331 | // data structures to track the execution context of every thread in the |
| 332 | // Trace. |
| 333 | DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; |
| 334 | DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> |
| 335 | ThreadPathData; |
| 336 | |
| 337 | // We then do a pass through the Trace to account data on a per-thread-basis. |
| 338 | for (const auto &E : T) { |
| 339 | auto &TSD = ThreadStacks[E.TId]; |
| 340 | switch (E.Type) { |
| 341 | case RecordTypes::ENTER: |
| 342 | case RecordTypes::ENTER_ARG: |
| 343 | |
| 344 | // Push entries into the function call stack. |
| 345 | TSD.push_back({E.TSC, E.FuncId}); |
| 346 | break; |
| 347 | |
| 348 | case RecordTypes::EXIT: |
| 349 | case RecordTypes::TAIL_EXIT: |
| 350 | |
| 351 | // Exits cause some accounting to happen, based on the state of the stack. |
| 352 | // For each function we pop off the stack, we take note of the path and |
| 353 | // record the cumulative state for this path. As we're doing this, we |
| 354 | // intern the path into the Profile. |
| 355 | while (!TSD.empty()) { |
| 356 | auto Top = TSD.back(); |
| 357 | auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC); |
| 358 | SmallVector<Profile::FuncID, 16> Path; |
| 359 | transform(reverse(TSD), std::back_inserter(Path), |
| 360 | std::mem_fn(&StackEntry::FuncId)); |
| 361 | auto InternedPath = P.internPath(Path); |
| 362 | auto &TPD = ThreadPathData[E.TId][InternedPath]; |
| 363 | ++TPD.CallCount; |
| 364 | TPD.CumulativeLocalTime += FunctionLocalTime; |
| 365 | TSD.pop_back(); |
| 366 | |
| 367 | // If we've matched the corresponding entry event for this function, |
| 368 | // then we exit the loop. |
| 369 | if (Top.FuncId == E.FuncId) |
| 370 | break; |
| 371 | |
| 372 | // FIXME: Consider the intermediate times and the cumulative tree time |
| 373 | // as well. |
| 374 | } |
| 375 | |
| 376 | break; |
Dean Michael Berris | b92b0b8 | 2018-11-06 08:51:37 +0000 | [diff] [blame] | 377 | |
| 378 | case RecordTypes::CUSTOM_EVENT: |
| 379 | case RecordTypes::TYPED_EVENT: |
| 380 | // TODO: Support an extension point to allow handling of custom and typed |
| 381 | // events in profiles. |
| 382 | break; |
Dean Michael Berris | 9969df3 | 2018-08-30 01:43:22 +0000 | [diff] [blame] | 383 | } |
| 384 | } |
| 385 | |
| 386 | // Once we've gone through the Trace, we now create one Block per thread in |
| 387 | // the Profile. |
| 388 | for (const auto &ThreadPaths : ThreadPathData) { |
| 389 | const auto &TID = ThreadPaths.first; |
| 390 | const auto &PathsData = ThreadPaths.second; |
| 391 | if (auto E = P.addBlock({ |
| 392 | TID, |
| 393 | std::vector<std::pair<Profile::PathID, Profile::Data>>( |
| 394 | PathsData.begin(), PathsData.end()), |
| 395 | })) |
| 396 | return std::move(E); |
| 397 | } |
| 398 | |
| 399 | return P; |
| 400 | } |
| 401 | |
| 402 | } // namespace xray |
| 403 | } // namespace llvm |