Krzysztof Kosiński | b136111 | 2021-03-11 18:05:01 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 2 | # |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 3 | # Copyright (C) 2013 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 16 | |
| 17 | """Module for looking up symbolic debugging information. |
| 18 | |
| 19 | The information can include symbol names, offsets, and source locations. |
| 20 | """ |
| 21 | |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 22 | import atexit |
Elliott Hughes | 0836593 | 2014-06-13 18:12:25 -0700 | [diff] [blame] | 23 | import glob |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 24 | import os |
Yang Ni | e4b2a1a | 2014-11-06 17:42:33 -0800 | [diff] [blame] | 25 | import platform |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 26 | import re |
Julien Desprez | fd06c73 | 2021-04-20 14:31:19 -0700 | [diff] [blame] | 27 | import shutil |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 28 | import signal |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 29 | import subprocess |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 30 | import unittest |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 31 | |
Krzysztof Kosiński | b136111 | 2021-03-11 18:05:01 -0800 | [diff] [blame] | 32 | ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP", ".") |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 33 | |
Pirama Arumuga Nainar | 8e96f31 | 2021-06-24 15:53:09 -0700 | [diff] [blame] | 34 | |
| 35 | def FindClangDir(): |
| 36 | get_clang_version = ANDROID_BUILD_TOP + "/build/soong/scripts/get_clang_version.py" |
| 37 | if os.path.exists(get_clang_version): |
| 38 | # We want the script to fail if get_clang_version.py exists but is unable |
| 39 | # to find the clang version. |
| 40 | version_output = subprocess.check_output(get_clang_version, text=True) |
Pirama Arumuga Nainar | a26dc34 | 2021-07-02 09:11:37 -0700 | [diff] [blame] | 41 | return ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/" + version_output.strip() |
Pirama Arumuga Nainar | 8e96f31 | 2021-06-24 15:53:09 -0700 | [diff] [blame] | 42 | else: |
| 43 | return None |
| 44 | |
| 45 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 46 | def FindSymbolsDir(): |
| 47 | saveddir = os.getcwd() |
| 48 | os.chdir(ANDROID_BUILD_TOP) |
Andreas Gampe | 9240b45 | 2018-10-26 14:17:30 -0700 | [diff] [blame] | 49 | stream = None |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 50 | try: |
Dan Willemsen | d3fc8fa | 2017-10-17 14:04:56 -0700 | [diff] [blame] | 51 | cmd = "build/soong/soong_ui.bash --dumpvar-mode --abs TARGET_OUT_UNSTRIPPED" |
David Srbecky | fd1e416 | 2021-04-27 22:24:36 +0100 | [diff] [blame] | 52 | stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, shell=True).stdout |
Krzysztof Kosiński | b136111 | 2021-03-11 18:05:01 -0800 | [diff] [blame] | 53 | return str(stream.read().strip()) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 54 | finally: |
Andreas Gampe | 9240b45 | 2018-10-26 14:17:30 -0700 | [diff] [blame] | 55 | if stream is not None: |
| 56 | stream.close() |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 57 | os.chdir(saveddir) |
| 58 | |
| 59 | SYMBOLS_DIR = FindSymbolsDir() |
| 60 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 61 | ARCH_IS_32BIT = None |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 62 | |
David Srbecky | 80547ae | 2021-11-01 21:59:59 +0000 | [diff] [blame] | 63 | VERBOSE = False |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 64 | |
| 65 | # These are private. Do not access them from other modules. |
| 66 | _CACHED_TOOLCHAIN = None |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 67 | _CACHED_CXX_FILT = None |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 68 | |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 69 | # Caches for symbolized information. |
| 70 | _SYMBOL_INFORMATION_ADDR2LINE_CACHE = {} |
| 71 | _SYMBOL_INFORMATION_OBJDUMP_CACHE = {} |
| 72 | _SYMBOL_DEMANGLING_CACHE = {} |
| 73 | |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 74 | # Caches for pipes to subprocesses. |
| 75 | |
| 76 | class ProcessCache: |
| 77 | _cmd2pipe = {} |
| 78 | _lru = [] |
| 79 | |
| 80 | # Max number of open pipes. |
| 81 | _PIPE_MAX_OPEN = 10 |
| 82 | |
| 83 | def GetProcess(self, cmd): |
| 84 | cmd_tuple = tuple(cmd) # Need to use a tuple as lists can't be dict keys. |
| 85 | # Pipe already available? |
| 86 | if cmd_tuple in self._cmd2pipe: |
| 87 | pipe = self._cmd2pipe[cmd_tuple] |
| 88 | # Update LRU. |
| 89 | self._lru = [(cmd_tuple, pipe)] + [i for i in self._lru if i[0] != cmd_tuple] |
| 90 | return pipe |
| 91 | |
| 92 | # Not cached, yet. Open a new one. |
| 93 | |
| 94 | # Check if too many are open, close the old ones. |
| 95 | while len(self._lru) >= self._PIPE_MAX_OPEN: |
| 96 | open_cmd, open_pipe = self._lru.pop() |
| 97 | del self._cmd2pipe[open_cmd] |
| 98 | self.TerminateProcess(open_pipe) |
| 99 | |
| 100 | # Create and put into cache. |
| 101 | pipe = self.SpawnProcess(cmd) |
| 102 | self._cmd2pipe[cmd_tuple] = pipe |
| 103 | self._lru = [(cmd_tuple, pipe)] + self._lru |
| 104 | return pipe |
| 105 | |
| 106 | def SpawnProcess(self, cmd): |
David Srbecky | fd1e416 | 2021-04-27 22:24:36 +0100 | [diff] [blame] | 107 | return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 108 | |
| 109 | def TerminateProcess(self, pipe): |
| 110 | pipe.stdin.close() |
| 111 | pipe.stdout.close() |
| 112 | pipe.terminate() |
| 113 | pipe.wait() |
| 114 | |
| 115 | def KillAllProcesses(self): |
| 116 | for _, open_pipe in self._lru: |
| 117 | self.TerminateProcess(open_pipe) |
| 118 | _cmd2pipe = {} |
| 119 | _lru = [] |
| 120 | |
| 121 | |
| 122 | _PIPE_ADDR2LINE_CACHE = ProcessCache() |
| 123 | _PIPE_CPPFILT_CACHE = ProcessCache() |
| 124 | |
| 125 | |
| 126 | # Process cache cleanup on shutdown. |
| 127 | |
| 128 | def CloseAllPipes(): |
| 129 | _PIPE_ADDR2LINE_CACHE.KillAllProcesses() |
| 130 | _PIPE_CPPFILT_CACHE.KillAllProcesses() |
| 131 | |
| 132 | |
| 133 | atexit.register(CloseAllPipes) |
| 134 | |
| 135 | |
| 136 | def PipeTermHandler(signum, frame): |
| 137 | CloseAllPipes() |
| 138 | os._exit(0) |
| 139 | |
| 140 | |
| 141 | for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM): |
| 142 | signal.signal(sig, PipeTermHandler) |
| 143 | |
| 144 | |
| 145 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 146 | |
Elliott Hughes | 0836593 | 2014-06-13 18:12:25 -0700 | [diff] [blame] | 147 | def ToolPath(tool, toolchain=None): |
Julien Desprez | fd06c73 | 2021-04-20 14:31:19 -0700 | [diff] [blame] | 148 | """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """ |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 149 | if shutil.which(tool): |
| 150 | return tool |
Elliott Hughes | 0836593 | 2014-06-13 18:12:25 -0700 | [diff] [blame] | 151 | if not toolchain: |
| 152 | toolchain = FindToolchain() |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 153 | return os.path.join(toolchain, tool) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 154 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 155 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 156 | def FindToolchain(): |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 157 | """Returns the toolchain.""" |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 158 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 159 | global _CACHED_TOOLCHAIN |
| 160 | if _CACHED_TOOLCHAIN: |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 161 | return _CACHED_TOOLCHAIN |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 162 | |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 163 | llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/"; |
| 164 | if not os.path.exists(llvm_binutils_dir): |
| 165 | raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir)) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 166 | |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 167 | _CACHED_TOOLCHAIN = llvm_binutils_dir |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 168 | print("Using toolchain from:", _CACHED_TOOLCHAIN) |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 169 | return _CACHED_TOOLCHAIN |
| 170 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 171 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 172 | def SymbolInformation(lib, addr): |
| 173 | """Look up symbol information about an address. |
| 174 | |
| 175 | Args: |
| 176 | lib: library (or executable) pathname containing symbols |
| 177 | addr: string hexidecimal address |
| 178 | |
| 179 | Returns: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 180 | A list of the form [(source_symbol, source_location, |
| 181 | object_symbol_with_offset)]. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 182 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 183 | If the function has been inlined then the list may contain |
| 184 | more than one element with the symbols for the most deeply |
| 185 | nested inlined location appearing first. The list is |
| 186 | always non-empty, even if no information is available. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 187 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 188 | Usually you want to display the source_location and |
| 189 | object_symbol_with_offset from the last element in the list. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 190 | """ |
| 191 | info = SymbolInformationForSet(lib, set([addr])) |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 192 | return (info and info.get(addr)) or [(None, None, None)] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 193 | |
| 194 | |
| 195 | def SymbolInformationForSet(lib, unique_addrs): |
| 196 | """Look up symbol information for a set of addresses from the given library. |
| 197 | |
| 198 | Args: |
| 199 | lib: library (or executable) pathname containing symbols |
| 200 | unique_addrs: set of hexidecimal addresses |
| 201 | |
| 202 | Returns: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 203 | A dictionary of the form {addr: [(source_symbol, source_location, |
| 204 | object_symbol_with_offset)]} where each address has a list of |
| 205 | associated symbols and locations. The list is always non-empty. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 206 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 207 | If the function has been inlined then the list may contain |
| 208 | more than one element with the symbols for the most deeply |
| 209 | nested inlined location appearing first. The list is |
| 210 | always non-empty, even if no information is available. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 211 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 212 | Usually you want to display the source_location and |
| 213 | object_symbol_with_offset from the last element in the list. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 214 | """ |
| 215 | if not lib: |
| 216 | return None |
| 217 | |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 218 | addr_to_line = CallLlvmSymbolizerForSet(lib, unique_addrs) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 219 | if not addr_to_line: |
| 220 | return None |
| 221 | |
| 222 | addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) |
| 223 | if not addr_to_objdump: |
| 224 | return None |
| 225 | |
| 226 | result = {} |
| 227 | for addr in unique_addrs: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 228 | source_info = addr_to_line.get(addr) |
| 229 | if not source_info: |
| 230 | source_info = [(None, None)] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 231 | if addr in addr_to_objdump: |
| 232 | (object_symbol, object_offset) = addr_to_objdump.get(addr) |
| 233 | object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, |
| 234 | object_offset) |
| 235 | else: |
| 236 | object_symbol_with_offset = None |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 237 | result[addr] = [(source_symbol, source_location, object_symbol_with_offset) |
| 238 | for (source_symbol, source_location) in source_info] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 239 | |
| 240 | return result |
| 241 | |
| 242 | |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 243 | def CallLlvmSymbolizerForSet(lib, unique_addrs): |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 244 | """Look up line and symbol information for a set of addresses. |
| 245 | |
| 246 | Args: |
| 247 | lib: library (or executable) pathname containing symbols |
| 248 | unique_addrs: set of string hexidecimal addresses look up. |
| 249 | |
| 250 | Returns: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 251 | A dictionary of the form {addr: [(symbol, file:line)]} where |
| 252 | each address has a list of associated symbols and locations |
| 253 | or an empty list if no symbol information was found. |
| 254 | |
| 255 | If the function has been inlined then the list may contain |
| 256 | more than one element with the symbols for the most deeply |
| 257 | nested inlined location appearing first. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 258 | """ |
| 259 | if not lib: |
| 260 | return None |
| 261 | |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 262 | result = {} |
| 263 | addrs = sorted(unique_addrs) |
| 264 | |
| 265 | if lib in _SYMBOL_INFORMATION_ADDR2LINE_CACHE: |
| 266 | addr_cache = _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] |
| 267 | |
| 268 | # Go through and handle all known addresses. |
| 269 | for x in range(len(addrs)): |
| 270 | next_addr = addrs.pop(0) |
| 271 | if next_addr in addr_cache: |
| 272 | result[next_addr] = addr_cache[next_addr] |
| 273 | else: |
| 274 | # Re-add, needs to be symbolized. |
| 275 | addrs.append(next_addr) |
| 276 | |
| 277 | if not addrs: |
| 278 | # Everything was cached, we're done. |
| 279 | return result |
| 280 | else: |
| 281 | addr_cache = {} |
| 282 | _SYMBOL_INFORMATION_ADDR2LINE_CACHE[lib] = addr_cache |
| 283 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 284 | symbols = SYMBOLS_DIR + lib |
| 285 | if not os.path.exists(symbols): |
Christopher Ferris | ece64c4 | 2015-08-20 20:09:09 -0700 | [diff] [blame] | 286 | symbols = lib |
| 287 | if not os.path.exists(symbols): |
| 288 | return None |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 289 | |
Christopher Ferris | 5f1b4f0 | 2016-09-19 13:24:37 -0700 | [diff] [blame] | 290 | # Make sure the symbols path is not a directory. |
| 291 | if os.path.isdir(symbols): |
| 292 | return None |
| 293 | |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 294 | cmd = [ToolPath("llvm-symbolizer"), "--functions", "--inlines", |
| 295 | "--demangle", "--obj=" + symbols, "--output-style=GNU"] |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 296 | child = _PIPE_ADDR2LINE_CACHE.GetProcess(cmd) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 297 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 298 | for addr in addrs: |
Christopher Ferris | 6fc7aef | 2018-08-09 12:40:05 -0700 | [diff] [blame] | 299 | try: |
| 300 | child.stdin.write("0x%s\n" % addr) |
| 301 | child.stdin.flush() |
| 302 | records = [] |
| 303 | first = True |
| 304 | while True: |
| 305 | symbol = child.stdout.readline().strip() |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 306 | if not symbol: |
Christopher Ferris | 6fc7aef | 2018-08-09 12:40:05 -0700 | [diff] [blame] | 307 | break |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 308 | location = child.stdout.readline().strip() |
Christopher Ferris | 6fc7aef | 2018-08-09 12:40:05 -0700 | [diff] [blame] | 309 | records.append((symbol, location)) |
| 310 | if first: |
| 311 | # Write a blank line as a sentinel so we know when to stop |
| 312 | # reading inlines from the output. |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 313 | # The blank line will cause llvm-symbolizer to emit a blank line. |
Christopher Ferris | 6fc7aef | 2018-08-09 12:40:05 -0700 | [diff] [blame] | 314 | child.stdin.write("\n") |
Krzysztof Kosiński | b136111 | 2021-03-11 18:05:01 -0800 | [diff] [blame] | 315 | child.stdin.flush() |
Christopher Ferris | 6fc7aef | 2018-08-09 12:40:05 -0700 | [diff] [blame] | 316 | first = False |
| 317 | except IOError as e: |
| 318 | # Remove the / in front of the library name to match other output. |
| 319 | records = [(None, lib[1:] + " ***Error: " + str(e))] |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 320 | result[addr] = records |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 321 | addr_cache[addr] = records |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 322 | return result |
| 323 | |
| 324 | |
| 325 | def CallObjdumpForSet(lib, unique_addrs): |
| 326 | """Use objdump to find out the names of the containing functions. |
| 327 | |
| 328 | Args: |
| 329 | lib: library (or executable) pathname containing symbols |
| 330 | unique_addrs: set of string hexidecimal addresses to find the functions for. |
| 331 | |
| 332 | Returns: |
| 333 | A dictionary of the form {addr: (string symbol, offset)}. |
| 334 | """ |
| 335 | if not lib: |
| 336 | return None |
| 337 | |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 338 | result = {} |
| 339 | addrs = sorted(unique_addrs) |
| 340 | |
| 341 | addr_cache = None |
| 342 | if lib in _SYMBOL_INFORMATION_OBJDUMP_CACHE: |
| 343 | addr_cache = _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] |
| 344 | |
| 345 | # Go through and handle all known addresses. |
| 346 | for x in range(len(addrs)): |
| 347 | next_addr = addrs.pop(0) |
| 348 | if next_addr in addr_cache: |
| 349 | result[next_addr] = addr_cache[next_addr] |
| 350 | else: |
| 351 | # Re-add, needs to be symbolized. |
| 352 | addrs.append(next_addr) |
| 353 | |
| 354 | if not addrs: |
| 355 | # Everything was cached, we're done. |
| 356 | return result |
| 357 | else: |
| 358 | addr_cache = {} |
| 359 | _SYMBOL_INFORMATION_OBJDUMP_CACHE[lib] = addr_cache |
| 360 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 361 | symbols = SYMBOLS_DIR + lib |
| 362 | if not os.path.exists(symbols): |
Christopher Ferris | ece64c4 | 2015-08-20 20:09:09 -0700 | [diff] [blame] | 363 | symbols = lib |
| 364 | if not os.path.exists(symbols): |
| 365 | return None |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 366 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 367 | start_addr_dec = str(int(addrs[0], 16)) |
| 368 | stop_addr_dec = str(int(addrs[-1], 16) + 8) |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 369 | cmd = [ToolPath("llvm-objdump"), |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 370 | "--section=.text", |
| 371 | "--demangle", |
| 372 | "--disassemble", |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 373 | "--start-address=" + start_addr_dec, |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 374 | "--stop-address=" + stop_addr_dec, |
| 375 | symbols] |
| 376 | |
| 377 | # Function lines look like: |
| 378 | # 000177b0 <android::IBinder::~IBinder()+0x2c>: |
| 379 | # We pull out the address and function first. Then we check for an optional |
| 380 | # offset. This is tricky due to functions that look like "operator+(..)+0x2c" |
| 381 | func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") |
| 382 | offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") |
| 383 | |
| 384 | # A disassembly line looks like: |
| 385 | # 177b2: b510 push {r4, lr} |
| 386 | asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") |
| 387 | |
| 388 | current_symbol = None # The current function symbol in the disassembly. |
| 389 | current_symbol_addr = 0 # The address of the current function. |
| 390 | addr_index = 0 # The address that we are currently looking for. |
| 391 | |
David Srbecky | fd1e416 | 2021-04-27 22:24:36 +0100 | [diff] [blame] | 392 | stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True).stdout |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 393 | for line in stream: |
| 394 | # Is it a function line like: |
| 395 | # 000177b0 <android::IBinder::~IBinder()>: |
| 396 | components = func_regexp.match(line) |
| 397 | if components: |
| 398 | # This is a new function, so record the current function and its address. |
| 399 | current_symbol_addr = int(components.group(1), 16) |
| 400 | current_symbol = components.group(2) |
| 401 | |
| 402 | # Does it have an optional offset like: "foo(..)+0x2c"? |
| 403 | components = offset_regexp.match(current_symbol) |
| 404 | if components: |
| 405 | current_symbol = components.group(1) |
| 406 | offset = components.group(2) |
| 407 | if offset: |
| 408 | current_symbol_addr -= int(offset, 16) |
| 409 | |
| 410 | # Is it an disassembly line like: |
| 411 | # 177b2: b510 push {r4, lr} |
| 412 | components = asm_regexp.match(line) |
| 413 | if components: |
| 414 | addr = components.group(1) |
| 415 | target_addr = addrs[addr_index] |
| 416 | i_addr = int(addr, 16) |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 417 | i_target = int(target_addr, 16) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 418 | if i_addr == i_target: |
| 419 | result[target_addr] = (current_symbol, i_target - current_symbol_addr) |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 420 | addr_cache[target_addr] = result[target_addr] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 421 | addr_index += 1 |
| 422 | if addr_index >= len(addrs): |
| 423 | break |
| 424 | stream.close() |
| 425 | |
| 426 | return result |
| 427 | |
| 428 | |
| 429 | def CallCppFilt(mangled_symbol): |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 430 | if mangled_symbol in _SYMBOL_DEMANGLING_CACHE: |
| 431 | return _SYMBOL_DEMANGLING_CACHE[mangled_symbol] |
| 432 | |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 433 | global _CACHED_CXX_FILT |
| 434 | if not _CACHED_CXX_FILT: |
Julien Desprez | fd06c73 | 2021-04-20 14:31:19 -0700 | [diff] [blame] | 435 | toolchains = None |
Pirama Arumuga Nainar | 8e96f31 | 2021-06-24 15:53:09 -0700 | [diff] [blame] | 436 | clang_dir = FindClangDir() |
| 437 | if clang_dir: |
| 438 | if os.path.exists(clang_dir + "/bin/llvm-cxxfilt"): |
| 439 | toolchains = [clang_dir + "/bin/llvm-cxxfilt"] |
| 440 | else: |
| 441 | raise Exception("bin/llvm-cxxfilt missing from " + clang_dir) |
| 442 | else: |
| 443 | # When run in CI, we don't have a way to find the clang version. But |
| 444 | # llvm-cxxfilt should be available in the following relative path. |
| 445 | toolchains = glob.glob("./clang-r*/bin/llvm-cxxfilt") |
| 446 | if toolchains and len(toolchains) != 1: |
| 447 | raise Exception("Expected one llvm-cxxfilt but found many: " + \ |
| 448 | ", ".join(toolchains)) |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 449 | if not toolchains: |
Julien Desprez | fd06c73 | 2021-04-20 14:31:19 -0700 | [diff] [blame] | 450 | raise Exception("Could not find llvm-cxxfilt tool") |
Christopher Ferris | 49eda0e | 2020-12-09 14:34:01 -0800 | [diff] [blame] | 451 | _CACHED_CXX_FILT = sorted(toolchains)[-1] |
| 452 | |
| 453 | cmd = [_CACHED_CXX_FILT] |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 454 | process = _PIPE_CPPFILT_CACHE.GetProcess(cmd) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 455 | process.stdin.write(mangled_symbol) |
| 456 | process.stdin.write("\n") |
Andreas Gampe | 46b00d6 | 2017-05-17 15:12:27 -0700 | [diff] [blame] | 457 | process.stdin.flush() |
| 458 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 459 | demangled_symbol = process.stdout.readline().strip() |
Andreas Gampe | 3d97a46 | 2017-05-17 14:16:45 -0700 | [diff] [blame] | 460 | |
| 461 | _SYMBOL_DEMANGLING_CACHE[mangled_symbol] = demangled_symbol |
| 462 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 463 | return demangled_symbol |
| 464 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 465 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 466 | def FormatSymbolWithOffset(symbol, offset): |
| 467 | if offset == 0: |
| 468 | return symbol |
| 469 | return "%s+%d" % (symbol, offset) |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 470 | |
David Srbecky | 80547ae | 2021-11-01 21:59:59 +0000 | [diff] [blame] | 471 | def FormatSymbolWithoutParameters(symbol): |
| 472 | """Remove parameters from function. |
| 473 | |
| 474 | Rather than trying to parse the demangled C++ signature, |
| 475 | it just removes matching top level parenthesis. |
| 476 | """ |
| 477 | if not symbol: |
| 478 | return symbol |
| 479 | |
| 480 | result = symbol |
| 481 | result = result.replace(") const", ")") # Strip const keyword. |
| 482 | result = result.replace("operator<<", "operator\u00AB") # Avoid unmatched '<'. |
| 483 | result = result.replace("operator>>", "operator\u00BB") # Avoid unmatched '>'. |
| 484 | result = result.replace("operator->", "operator\u2192") # Avoid unmatched '>'. |
| 485 | |
| 486 | nested = [] # Keeps tract of current nesting level of parenthesis. |
| 487 | for i in reversed(range(len(result))): # Iterate backward to make cutting easier. |
| 488 | c = result[i] |
| 489 | if c == ')' or c == '>': |
| 490 | if len(nested) == 0: |
| 491 | end = i + 1 # Mark the end of top-level pair. |
| 492 | nested.append(c) |
| 493 | if c == '(' or c == '<': |
| 494 | if len(nested) == 0 or {')':'(', '>':'<'}[nested.pop()] != c: |
| 495 | return symbol # Malformed: character does not match its pair. |
| 496 | if len(nested) == 0 and c == '(' and (end - i) > 2: |
| 497 | result = result[:i] + result[end:] # Remove substring (i, end). |
| 498 | if len(nested) > 0: |
| 499 | return symbol # Malformed: missing pair. |
| 500 | |
| 501 | return result.strip() |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 502 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 503 | def SetBitness(lines): |
| 504 | global ARCH_IS_32BIT |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 505 | |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 506 | trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 507 | asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+") |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 508 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 509 | ARCH_IS_32BIT = False |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 510 | for line in lines: |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 511 | trace_match = trace_line.search(line) |
| 512 | if trace_match: |
| 513 | # Try to guess the arch, we know the bitness. |
| 514 | if len(trace_match.group(1)) == 16: |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 515 | ARCH_IS_32BIT = False |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 516 | else: |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 517 | ARCH_IS_32BIT = True |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 518 | break |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 519 | asan_trace_match = asan_trace_line.search(line) |
| 520 | if asan_trace_match: |
| 521 | # We might be able to guess the bitness by the length of the address. |
| 522 | if len(asan_trace_match.group(1)) > 8: |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 523 | ARCH_IS_32BIT = False |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 524 | # We know for a fact this is 64 bit, so we are done. |
| 525 | break |
| 526 | else: |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 527 | # This might be 32 bit, or just a small address. Keep going in this |
| 528 | # case, but if we couldn't figure anything else out, go with 32 bit. |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 529 | ARCH_IS_32BIT = True |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 530 | |
Pirama Arumuga Nainar | 8e96f31 | 2021-06-24 15:53:09 -0700 | [diff] [blame] | 531 | class FindClangDirTests(unittest.TestCase): |
| 532 | @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') |
| 533 | def test_clang_dir_found(self): |
| 534 | self.assertIsNotNone(FindClangDir()) |
| 535 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 536 | class SetBitnessTests(unittest.TestCase): |
| 537 | def test_32bit_check(self): |
| 538 | global ARCH_IS_32BIT |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 539 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 540 | SetBitness(["#00 pc 000374e0"]) |
| 541 | self.assertTrue(ARCH_IS_32BIT) |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 542 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 543 | def test_64bit_check(self): |
| 544 | global ARCH_IS_32BIT |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 545 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 546 | SetBitness(["#00 pc 00000000000374e0"]) |
| 547 | self.assertFalse(ARCH_IS_32BIT) |
Christopher Ferris | bf8a940 | 2016-03-11 15:50:46 -0800 | [diff] [blame] | 548 | |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 549 | def test_32bit_asan_trace_line_toolchain(self): |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 550 | global ARCH_IS_32BIT |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 551 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 552 | SetBitness(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) |
| 553 | self.assertTrue(ARCH_IS_32BIT) |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 554 | |
| 555 | def test_64bit_asan_trace_line_toolchain(self): |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 556 | global ARCH_IS_32BIT |
Christopher Ferris | 5b820ba | 2016-09-06 14:07:29 -0700 | [diff] [blame] | 557 | |
Christopher Ferris | f62a3be | 2023-03-09 16:13:57 -0800 | [diff] [blame] | 558 | SetBitness(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", |
| 559 | "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) |
| 560 | self.assertFalse(ARCH_IS_32BIT) |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 561 | |
David Srbecky | 80547ae | 2021-11-01 21:59:59 +0000 | [diff] [blame] | 562 | class FormatSymbolWithoutParametersTests(unittest.TestCase): |
| 563 | def test_c(self): |
| 564 | self.assertEqual(FormatSymbolWithoutParameters("foo"), "foo") |
| 565 | self.assertEqual(FormatSymbolWithoutParameters("foo+42"), "foo+42") |
| 566 | |
| 567 | def test_simple(self): |
| 568 | self.assertEqual(FormatSymbolWithoutParameters("foo(int i)"), "foo") |
| 569 | self.assertEqual(FormatSymbolWithoutParameters("foo(int i)+42"), "foo+42") |
| 570 | self.assertEqual(FormatSymbolWithoutParameters("bar::foo(int i)+42"), "bar::foo+42") |
| 571 | self.assertEqual(FormatSymbolWithoutParameters("operator()"), "operator()") |
| 572 | |
| 573 | def test_templates(self): |
| 574 | self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T>& v)"), "bar::foo<T>") |
| 575 | self.assertEqual(FormatSymbolWithoutParameters("bar<T>::foo(vector<T>& v)"), "bar<T>::foo") |
| 576 | self.assertEqual(FormatSymbolWithoutParameters("bar::foo<T>(vector<T<U>>& v)"), "bar::foo<T>") |
| 577 | self.assertEqual(FormatSymbolWithoutParameters("bar::foo<(EnumType)0>(vector<(EnumType)0>& v)"), |
| 578 | "bar::foo<(EnumType)0>") |
| 579 | |
| 580 | def test_nested(self): |
| 581 | self.assertEqual(FormatSymbolWithoutParameters("foo(int i)::bar(int j)"), "foo::bar") |
| 582 | |
Christopher Ferris | a47d6d0 | 2023-03-13 14:50:48 -0700 | [diff] [blame] | 583 | def test_unbalanced(self): |
David Srbecky | 80547ae | 2021-11-01 21:59:59 +0000 | [diff] [blame] | 584 | self.assertEqual(FormatSymbolWithoutParameters("foo(bar(int i)"), "foo(bar(int i)") |
| 585 | self.assertEqual(FormatSymbolWithoutParameters("foo)bar(int i)"), "foo)bar(int i)") |
| 586 | self.assertEqual(FormatSymbolWithoutParameters("foo<bar(int i)"), "foo<bar(int i)") |
| 587 | self.assertEqual(FormatSymbolWithoutParameters("foo>bar(int i)"), "foo>bar(int i)") |
| 588 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 589 | if __name__ == '__main__': |
Andreas Gampe | 9240b45 | 2018-10-26 14:17:30 -0700 | [diff] [blame] | 590 | unittest.main(verbosity=2) |