Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 2 | |
| 3 | # Copyright 2021 Google LLC |
| 4 | # |
| 5 | # Use of this source code is governed by a BSD-style license that can be |
| 6 | # found in the LICENSE file. |
| 7 | |
| 8 | |
| 9 | # This script is written to process the output from bloaty, read via stdin |
| 10 | # The easiest way to use the script: |
| 11 | # |
| 12 | # bloaty <path_to_binary> -d compileunits,symbols -n 0 --tsv | bloaty_treemap.py > bloaty.html |
| 13 | # |
| 14 | # Open the resulting .html file in your browser. |
| 15 | |
| 16 | # TODO: Deal with symbols vs. fullsymbols, even both? |
| 17 | # TODO: Support aggregation by scope, rather than file (split C++ identifiers on '::') |
| 18 | # TODO: Deal with duplicate symbols better. These are actually good targets for optimization. |
| 19 | # They are sometimes static functions in headers (so they appear in multiple .o files), |
| 20 | # There are also symbols that appear multiple times due to inlining (eg, kNoCropRect). |
| 21 | # TODO: Figure out why some symbols are misattributed. Eg, Swizzle::Convert and ::Make are tied |
| 22 | # to the header by nm, and then to one caller (at random) by bloaty. They're not inlined, |
| 23 | # though. Unless LTO is doing something wacky here? Scope-aggregation may be the answer? |
| 24 | # Ultimately, this seems like an issue with bloaty and/or debug information itself. |
| 25 | |
| 26 | import os |
| 27 | import sys |
| 28 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 29 | parent_map = {} |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 30 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 31 | # For a given filepath "foo/bar/baz.cpp", `add_path` outputs rows to the data table |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 32 | # establishing the node hierarchy, and ensures that each line is emitted exactly once: |
| 33 | # |
| 34 | # ['foo/bar/baz.cpp', 'foo/bar', 0], |
| 35 | # ['foo/bar', 'foo', 0], |
| 36 | # ['foo', 'ROOT', 0], |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 37 | def add_path(path): |
| 38 | if not path in parent_map: |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 39 | head = os.path.split(path)[0] |
| 40 | if not head: |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 41 | parent_map[path] = "ROOT" |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 42 | else: |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 43 | add_path(head) |
| 44 | parent_map[path] = head |
Leandro Lovisolo | 5a7d91c | 2021-12-01 16:03:27 -0500 | [diff] [blame] | 45 | |
| 46 | # We add a suffix to paths to eliminate the chances of a path name colliding with a symbol |
| 47 | # name. This is important because google.visualization.TreeMap requires node names to be |
| 48 | # unique, and a file such as test/foo/bar.cpp would create a node named "test", which could |
| 49 | # collide with a symbol named "test" defined in a C++ file. |
| 50 | # |
| 51 | # Assumptions made: |
| 52 | # - No C++ symbol ends with " (Path)". |
| 53 | # - No C++ symbol is named "ROOT". |
| 54 | parent = parent_map[path] |
| 55 | if parent != "ROOT": parent = "%s (Path)" % parent |
| 56 | print("['%s (Path)', '%s', 0]," % (path, parent)) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 57 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 58 | def main(): |
| 59 | # HTML/script header, plus the first two (fixed) rows of the data table |
| 60 | print(""" |
| 61 | <html> |
| 62 | <head> |
| 63 | <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
| 64 | <script type="text/javascript"> |
| 65 | google.charts.load('current', {'packages':['treemap']}); |
| 66 | google.charts.setOnLoadCallback(drawChart); |
| 67 | function drawChart() { |
| 68 | const data = google.visualization.arrayToDataTable([ |
| 69 | ['Name', 'Parent', 'Size'], |
| 70 | ['ROOT', null, 0],""") |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 71 | |
Leandro Lovisolo | 5a7d91c | 2021-12-01 16:03:27 -0500 | [diff] [blame] | 72 | symbol_frequencies = {} |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 73 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 74 | # Skip header row |
| 75 | # TODO: In the future, we could use this to automatically detect the source columns |
| 76 | next(sys.stdin) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 77 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 78 | for line in sys.stdin: |
| 79 | vals = line.rstrip().split("\t") |
| 80 | if len(vals) != 4: |
| 81 | print("ERROR: Failed to match line\n" + line) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 82 | sys.exit(1) |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 83 | (filepath, symbol, vmsize, filesize) = vals |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 84 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 85 | # Skip any entry where the filepath or symbol starts with '[' |
| 86 | # These tend to be section meta-data and debug information |
| 87 | if filepath.startswith("[") or symbol.startswith("["): |
| 88 | continue |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 89 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 90 | # Strip the leading ../../ from paths |
| 91 | while filepath.startswith("../"): |
John Stiles | e783065 | 2021-09-13 12:00:35 -0400 | [diff] [blame] | 92 | filepath = filepath[3:]; |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 93 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 94 | # Files in third_party sometimes have absolute paths. Strip those: |
| 95 | if filepath.startswith("/"): |
| 96 | rel_path_start = filepath.find("third_party") |
| 97 | if rel_path_start >= 0: |
| 98 | filepath = filepath[rel_path_start:] |
| 99 | else: |
| 100 | print("ERROR: Unexpected absolute path:\n" + filepath) |
| 101 | sys.exit(1) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 102 | |
Brian Osman | f57f12d | 2021-07-19 14:24:11 -0400 | [diff] [blame] | 103 | # Symbols involving C++ lambdas can contain single quotes |
| 104 | symbol = symbol.replace("'", "\\'") |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 105 | |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 106 | # Ensure that we've added intermediate nodes for all portions of this file path |
| 107 | add_path(filepath) |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 108 | |
Leandro Lovisolo | 5a7d91c | 2021-12-01 16:03:27 -0500 | [diff] [blame] | 109 | # Ensure that our final symbol name is unique (a repeated "foo" symbol becomes "foo_1", |
| 110 | # "foo_2", etc.) |
| 111 | if symbol not in symbol_frequencies: |
| 112 | symbol_frequencies[symbol] = 1 |
| 113 | else: |
| 114 | freq = symbol_frequencies[symbol] |
| 115 | symbol_frequencies[symbol] = freq + 1 |
| 116 | symbol += "_" + str(freq) |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 117 | |
| 118 | # Append another row for our sanitized data |
Leandro Lovisolo | 5a7d91c | 2021-12-01 16:03:27 -0500 | [diff] [blame] | 119 | print("['%s', '%s (Path)', %d]," % (symbol, filepath, int(filesize))) |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 120 | |
| 121 | # HTML/script footer |
| 122 | print(""" ]); |
| 123 | tree = new google.visualization.TreeMap(document.getElementById('chart_div')); |
| 124 | tree.draw(data, { |
| 125 | generateTooltip: showTooltip |
| 126 | }); |
| 127 | |
| 128 | function showTooltip(row, size, value) { |
| 129 | const escapedLabel = data.getValue(row, 0) |
| 130 | .replace('&', '&') |
| 131 | .replace('<', '<') |
| 132 | .replace('>', '>') |
| 133 | return `<div style="background:#fd9; padding:10px; border-style:solid"> |
| 134 | <span style="font-family:Courier"> ${escapedLabel} <br> |
| 135 | Size: ${size} </div>`; |
| 136 | } |
Brian Osman | 060dd70 | 2021-07-15 13:14:51 -0400 | [diff] [blame] | 137 | } |
Brian Osman | 3c1c4c0 | 2021-07-19 09:41:47 -0400 | [diff] [blame] | 138 | </script> |
| 139 | </head> |
| 140 | <body> |
| 141 | <div id="chart_div" style="width: 100%; height: 100%;"></div> |
| 142 | </body> |
| 143 | </html>""") |
| 144 | |
| 145 | if __name__ == "__main__": |
| 146 | main() |