Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 1 | #include "llvm/ADT/DenseMap.h" |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 2 | #include "llvm/ADT/StringExtras.h" |
| 3 | #include "llvm/ADT/StringSet.h" |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 4 | #include "llvm/DebugInfo/DIContext.h" |
| 5 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| 6 | #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" |
| 7 | #include "llvm/Object/ObjectFile.h" |
| 8 | |
| 9 | #define DEBUG_TYPE "dwarfdump" |
| 10 | using namespace llvm; |
| 11 | using namespace object; |
| 12 | |
| 13 | /// Holds statistics for one function (or other entity that has a PC range and |
| 14 | /// contains variables, such as a compile unit). |
| 15 | struct PerFunctionStats { |
| 16 | /// Number of inlined instances of this function. |
| 17 | unsigned NumFnInlined = 0; |
| 18 | /// Number of variables with location across all inlined instances. |
| 19 | unsigned TotalVarWithLoc = 0; |
| 20 | /// Number of constants with location across all inlined instances. |
| 21 | unsigned ConstantMembers = 0; |
| 22 | /// List of all Variables in this function. |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 23 | StringSet<> VarsInFunction; |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 24 | /// Compile units also cover a PC range, but have this flag set to false. |
| 25 | bool IsFunction = false; |
| 26 | }; |
| 27 | |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 28 | /// Holds accumulated global statistics about DIEs. |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 29 | struct GlobalStats { |
| 30 | /// Total number of PC range bytes covered by DW_AT_locations. |
| 31 | unsigned ScopeBytesCovered = 0; |
| 32 | /// Total number of PC range bytes in each variable's enclosing scope, |
| 33 | /// starting from the first definition of the variable. |
| 34 | unsigned ScopeBytesFromFirstDefinition = 0; |
Vedant Kumar | 78b8f9d | 2018-10-05 20:37:17 +0000 | [diff] [blame] | 35 | /// Total number of call site entries (DW_TAG_call_site). |
| 36 | unsigned CallSiteEntries = 0; |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 37 | /// Total byte size of concrete functions. This byte size includes |
| 38 | /// inline functions contained in the concrete functions. |
| 39 | uint64_t FunctionSize = 0; |
| 40 | /// Total byte size of inlined functions. This is the total number of bytes |
| 41 | /// for the top inline functions within concrete functions. This can help |
| 42 | /// tune the inline settings when compiling to match user expectations. |
| 43 | uint64_t InlineFunctionSize = 0; |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 44 | }; |
| 45 | |
| 46 | /// Extract the low pc from a Die. |
| 47 | static uint64_t getLowPC(DWARFDie Die) { |
Wolfgang Pieb | 5a4174c | 2018-06-20 22:56:37 +0000 | [diff] [blame] | 48 | auto RangesOrError = Die.getAddressRanges(); |
| 49 | DWARFAddressRangesVector Ranges; |
| 50 | if (RangesOrError) |
| 51 | Ranges = RangesOrError.get(); |
| 52 | else |
| 53 | llvm::consumeError(RangesOrError.takeError()); |
| 54 | if (Ranges.size()) |
| 55 | return Ranges[0].LowPC; |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 56 | return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0); |
| 57 | } |
| 58 | |
| 59 | /// Collect debug info quality metrics for one DIE. |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 60 | static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, |
| 61 | std::string VarPrefix, uint64_t ScopeLowPC, |
| 62 | uint64_t BytesInScope, |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 63 | uint32_t InlineDepth, |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 64 | StringMap<PerFunctionStats> &FnStatMap, |
| 65 | GlobalStats &GlobalStats) { |
| 66 | bool HasLoc = false; |
| 67 | uint64_t BytesCovered = 0; |
| 68 | uint64_t OffsetToFirstDefinition = 0; |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 69 | |
Vedant Kumar | 78b8f9d | 2018-10-05 20:37:17 +0000 | [diff] [blame] | 70 | if (Die.getTag() == dwarf::DW_TAG_call_site) { |
| 71 | GlobalStats.CallSiteEntries++; |
| 72 | return; |
| 73 | } |
| 74 | |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 75 | if (Die.getTag() != dwarf::DW_TAG_formal_parameter && |
| 76 | Die.getTag() != dwarf::DW_TAG_variable && |
| 77 | Die.getTag() != dwarf::DW_TAG_member) { |
| 78 | // Not a variable or constant member. |
| 79 | return; |
| 80 | } |
| 81 | |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 82 | if (Die.find(dwarf::DW_AT_const_value)) { |
| 83 | // This catches constant members *and* variables. |
| 84 | HasLoc = true; |
| 85 | BytesCovered = BytesInScope; |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 86 | } else { |
| 87 | if (Die.getTag() == dwarf::DW_TAG_member) { |
| 88 | // Non-const member. |
| 89 | return; |
| 90 | } |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 91 | // Handle variables and function arguments. |
| 92 | auto FormValue = Die.find(dwarf::DW_AT_location); |
| 93 | HasLoc = FormValue.hasValue(); |
| 94 | if (HasLoc) { |
| 95 | // Get PC coverage. |
| 96 | if (auto DebugLocOffset = FormValue->getAsSectionOffset()) { |
| 97 | auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc(); |
| 98 | if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) { |
| 99 | for (auto Entry : List->Entries) |
| 100 | BytesCovered += Entry.End - Entry.Begin; |
| 101 | if (List->Entries.size()) { |
| 102 | uint64_t FirstDef = List->Entries[0].Begin; |
| 103 | uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE()); |
| 104 | // Ranges sometimes start before the lexical scope. |
| 105 | if (UnitOfs + FirstDef >= ScopeLowPC) |
| 106 | OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC; |
| 107 | // Or even after it. Count that as a failure. |
| 108 | if (OffsetToFirstDefinition > BytesInScope) |
| 109 | OffsetToFirstDefinition = 0; |
| 110 | } |
| 111 | } |
| 112 | assert(BytesInScope); |
| 113 | } else { |
| 114 | // Assume the entire range is covered by a single location. |
| 115 | BytesCovered = BytesInScope; |
| 116 | } |
| 117 | } |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 118 | } |
| 119 | |
| 120 | // Collect PC range coverage data. |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 121 | auto &FnStats = FnStatMap[FnPrefix]; |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 122 | if (DWARFDie D = |
| 123 | Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) |
| 124 | Die = D; |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 125 | // By using the variable name + the path through the lexical block tree, the |
| 126 | // keys are consistent across duplicate abstract origins in different CUs. |
| 127 | std::string VarName = StringRef(Die.getName(DINameKind::ShortName)); |
| 128 | FnStats.VarsInFunction.insert(VarPrefix+VarName); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 129 | if (BytesInScope) { |
| 130 | FnStats.TotalVarWithLoc += (unsigned)HasLoc; |
| 131 | // Adjust for the fact the variables often start their lifetime in the |
| 132 | // middle of the scope. |
| 133 | BytesInScope -= OffsetToFirstDefinition; |
| 134 | // Turns out we have a lot of ranges that extend past the lexical scope. |
| 135 | GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered); |
| 136 | GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope; |
| 137 | assert(GlobalStats.ScopeBytesCovered <= |
| 138 | GlobalStats.ScopeBytesFromFirstDefinition); |
| 139 | } else { |
| 140 | FnStats.ConstantMembers++; |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | /// Recursively collect debug info quality metrics. |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 145 | static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, |
| 146 | std::string VarPrefix, uint64_t ScopeLowPC, |
| 147 | uint64_t BytesInScope, |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 148 | uint32_t InlineDepth, |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 149 | StringMap<PerFunctionStats> &FnStatMap, |
| 150 | GlobalStats &GlobalStats) { |
| 151 | // Handle any kind of lexical scope. |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 152 | const dwarf::Tag Tag = Die.getTag(); |
| 153 | const bool IsFunction = Tag == dwarf::DW_TAG_subprogram; |
| 154 | const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block; |
| 155 | const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine; |
| 156 | if (IsFunction || IsInlinedFunction || IsBlock) { |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 157 | |
| 158 | // Reset VarPrefix when entering a new function. |
| 159 | if (Die.getTag() == dwarf::DW_TAG_subprogram || |
| 160 | Die.getTag() == dwarf::DW_TAG_inlined_subroutine) |
| 161 | VarPrefix = "v"; |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 162 | |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 163 | // Ignore forward declarations. |
| 164 | if (Die.find(dwarf::DW_AT_declaration)) |
| 165 | return; |
| 166 | |
| 167 | // Count the function. |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 168 | if (!IsBlock) { |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 169 | StringRef Name = Die.getName(DINameKind::LinkageName); |
| 170 | if (Name.empty()) |
| 171 | Name = Die.getName(DINameKind::ShortName); |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 172 | FnPrefix = Name; |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 173 | // Skip over abstract origins. |
| 174 | if (Die.find(dwarf::DW_AT_inline)) |
| 175 | return; |
| 176 | // We've seen an (inlined) instance of this function. |
| 177 | auto &FnStats = FnStatMap[Name]; |
| 178 | FnStats.NumFnInlined++; |
| 179 | FnStats.IsFunction = true; |
| 180 | } |
| 181 | |
| 182 | // PC Ranges. |
Wolfgang Pieb | 5a4174c | 2018-06-20 22:56:37 +0000 | [diff] [blame] | 183 | auto RangesOrError = Die.getAddressRanges(); |
| 184 | if (!RangesOrError) { |
| 185 | llvm::consumeError(RangesOrError.takeError()); |
| 186 | return; |
| 187 | } |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 188 | |
Wolfgang Pieb | 5a4174c | 2018-06-20 22:56:37 +0000 | [diff] [blame] | 189 | auto Ranges = RangesOrError.get(); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 190 | uint64_t BytesInThisScope = 0; |
| 191 | for (auto Range : Ranges) |
| 192 | BytesInThisScope += Range.HighPC - Range.LowPC; |
| 193 | ScopeLowPC = getLowPC(Die); |
| 194 | |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 195 | if (BytesInThisScope) { |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 196 | BytesInScope = BytesInThisScope; |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 197 | if (IsFunction) |
| 198 | GlobalStats.FunctionSize += BytesInThisScope; |
| 199 | else if (IsInlinedFunction && InlineDepth == 0) |
| 200 | GlobalStats.InlineFunctionSize += BytesInThisScope; |
| 201 | } |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 202 | } else { |
| 203 | // Not a scope, visit the Die itself. It could be a variable. |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 204 | collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope, |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 205 | InlineDepth, FnStatMap, GlobalStats); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 206 | } |
| 207 | |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 208 | // Set InlineDepth correctly for child recursion |
| 209 | if (IsFunction) |
| 210 | InlineDepth = 0; |
| 211 | else if (IsInlinedFunction) |
| 212 | ++InlineDepth; |
| 213 | |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 214 | // Traverse children. |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 215 | unsigned LexicalBlockIndex = 0; |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 216 | DWARFDie Child = Die.getFirstChild(); |
| 217 | while (Child) { |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 218 | std::string ChildVarPrefix = VarPrefix; |
| 219 | if (Child.getTag() == dwarf::DW_TAG_lexical_block) |
| 220 | ChildVarPrefix += toHex(LexicalBlockIndex++) + '.'; |
| 221 | |
| 222 | collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC, |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 223 | BytesInScope, InlineDepth, FnStatMap, GlobalStats); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 224 | Child = Child.getSibling(); |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | /// Print machine-readable output. |
| 229 | /// The machine-readable format is single-line JSON output. |
| 230 | /// \{ |
| 231 | static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) { |
| 232 | OS << ",\"" << Key << "\":\"" << Value << '"'; |
Nicola Zaghen | 0818e78 | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 233 | LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 234 | } |
| 235 | static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) { |
| 236 | OS << ",\"" << Key << "\":" << Value; |
Nicola Zaghen | 0818e78 | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 237 | LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 238 | } |
| 239 | /// \} |
| 240 | |
| 241 | /// Collect debug info quality metrics for an entire DIContext. |
| 242 | /// |
| 243 | /// Do the impossible and reduce the quality of the debug info down to a few |
| 244 | /// numbers. The idea is to condense the data into numbers that can be tracked |
| 245 | /// over time to identify trends in newer compiler versions and gauge the effect |
| 246 | /// of particular optimizations. The raw numbers themselves are not particularly |
| 247 | /// useful, only the delta between compiling the same program with different |
| 248 | /// compilers is. |
| 249 | bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, |
| 250 | Twine Filename, raw_ostream &OS) { |
| 251 | StringRef FormatName = Obj.getFileFormatName(); |
| 252 | GlobalStats GlobalStats; |
| 253 | StringMap<PerFunctionStats> Statistics; |
| 254 | for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units()) |
| 255 | if (DWARFDie CUDie = CU->getUnitDIE(false)) |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 256 | collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 257 | |
| 258 | /// The version number should be increased every time the algorithm is changed |
| 259 | /// (including bug fixes). New metrics may be added without increasing the |
| 260 | /// version. |
| 261 | unsigned Version = 1; |
| 262 | unsigned VarTotal = 0; |
| 263 | unsigned VarUnique = 0; |
| 264 | unsigned VarWithLoc = 0; |
| 265 | unsigned NumFunctions = 0; |
| 266 | unsigned NumInlinedFunctions = 0; |
| 267 | for (auto &Entry : Statistics) { |
| 268 | PerFunctionStats &Stats = Entry.getValue(); |
| 269 | unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined; |
| 270 | unsigned Constants = Stats.ConstantMembers; |
| 271 | VarWithLoc += Stats.TotalVarWithLoc + Constants; |
| 272 | VarTotal += TotalVars + Constants; |
| 273 | VarUnique += Stats.VarsInFunction.size(); |
Adrian Prantl | bb1f2f9d | 2018-09-21 21:59:34 +0000 | [diff] [blame] | 274 | LLVM_DEBUG(for (auto &V : Stats.VarsInFunction) llvm::dbgs() |
| 275 | << Entry.getKey() << ": " << V.getKey() << "\n"); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 276 | NumFunctions += Stats.IsFunction; |
| 277 | NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined; |
| 278 | } |
| 279 | |
| 280 | // Print summary. |
| 281 | OS.SetBufferSize(1024); |
Adrian Prantl | 24a3723 | 2018-10-05 17:41:30 +0000 | [diff] [blame] | 282 | OS << "{\"version\":" << Version; |
Nicola Zaghen | 0818e78 | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 283 | LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n"; |
| 284 | llvm::dbgs() << "---------------------------------\n"); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 285 | printDatum(OS, "file", Filename.str()); |
| 286 | printDatum(OS, "format", FormatName); |
| 287 | printDatum(OS, "source functions", NumFunctions); |
| 288 | printDatum(OS, "inlined functions", NumInlinedFunctions); |
| 289 | printDatum(OS, "unique source variables", VarUnique); |
| 290 | printDatum(OS, "source variables", VarTotal); |
| 291 | printDatum(OS, "variables with location", VarWithLoc); |
Vedant Kumar | 78b8f9d | 2018-10-05 20:37:17 +0000 | [diff] [blame] | 292 | printDatum(OS, "call site entries", GlobalStats.CallSiteEntries); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 293 | printDatum(OS, "scope bytes total", |
| 294 | GlobalStats.ScopeBytesFromFirstDefinition); |
| 295 | printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered); |
Greg Clayton | 6517c35 | 2018-11-09 18:10:02 +0000 | [diff] [blame] | 296 | printDatum(OS, "total function size", GlobalStats.FunctionSize); |
| 297 | printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize); |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 298 | OS << "}\n"; |
Nicola Zaghen | 0818e78 | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 299 | LLVM_DEBUG( |
Adrian Prantl | eadc313 | 2017-10-06 20:24:34 +0000 | [diff] [blame] | 300 | llvm::dbgs() << "Total Availability: " |
| 301 | << (int)std::round((VarWithLoc * 100.0) / VarTotal) << "%\n"; |
| 302 | llvm::dbgs() << "PC Ranges covered: " |
| 303 | << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) / |
| 304 | GlobalStats.ScopeBytesFromFirstDefinition) |
| 305 | << "%\n"); |
| 306 | return true; |
| 307 | } |