blob: 5669b9db27d01435ce6369416437d048fb620ebf [file] [log] [blame]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -07001#!/usr/bin/python
2#
Ben Chengb42dad02013-04-25 15:14:04 -07003# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070016
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
Elliott Hughes08365932014-06-13 18:12:25 -070022import glob
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070023import os
24import re
25import subprocess
26
27ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
28if not ANDROID_BUILD_TOP:
29 ANDROID_BUILD_TOP = "."
30
31def FindSymbolsDir():
32 saveddir = os.getcwd()
33 os.chdir(ANDROID_BUILD_TOP)
34 try:
35 cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
36 "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
37 "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
38 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
39 return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
40 finally:
41 os.chdir(saveddir)
42
43SYMBOLS_DIR = FindSymbolsDir()
44
Ben Chengb42dad02013-04-25 15:14:04 -070045ARCH = "arm"
46
Elliott Hughes08365932014-06-13 18:12:25 -070047TOOLCHAIN = None
Ben Chengb42dad02013-04-25 15:14:04 -070048
Elliott Hughes08365932014-06-13 18:12:25 -070049def ToolPath(tool, toolchain=None):
50 """Return a fully-qualified path to the specified tool"""
51 if not toolchain:
52 toolchain = FindToolchain()
53 return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070054
55def FindToolchain():
Elliott Hughes08365932014-06-13 18:12:25 -070056 """Returns the toolchain matching ARCH. Assumes that you're lunched
57 such that the necessary toolchain is either your primary or secondary.
58 TODO: we could make this 'just work' for most users by just globbing the
59 newest toolchains for every architecture out of prebuilts/, but other
60 parts of this tool assume you're lunched correctly anyway."""
61 global TOOLCHAIN
62 if TOOLCHAIN is not None:
63 return TOOLCHAIN
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070064
Elliott Hughesc3166be2014-07-07 15:06:28 -070065 # We use slightly different names from GCC, and there's only one toolchain
66 # for x86/x86_64.
Elliott Hughes08365932014-06-13 18:12:25 -070067 gcc_arch = ARCH
68 if gcc_arch == "arm64":
69 gcc_arch = "aarch64"
Elliott Hughes1ba94df2014-07-01 12:22:06 -070070 elif gcc_arch == "mips":
71 gcc_arch = "mipsel"
Elliott Hughesc3166be2014-07-07 15:06:28 -070072 elif gcc_arch == "x86":
73 gcc_arch = "x86_64"
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070074
Elliott Hughes08365932014-06-13 18:12:25 -070075 tc1 = os.environ["ANDROID_TOOLCHAIN"]
76 tc2 = os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070077
Elliott Hughes1ba94df2014-07-01 12:22:06 -070078 if ("/" + gcc_arch + "-linux-") in tc1:
Elliott Hughes08365932014-06-13 18:12:25 -070079 toolchain = tc1
Elliott Hughes1ba94df2014-07-01 12:22:06 -070080 elif ("/" + gcc_arch + "-linux-") in tc2:
Elliott Hughes08365932014-06-13 18:12:25 -070081 toolchain = tc2
Ben Chengb42dad02013-04-25 15:14:04 -070082 else:
Elliott Hughes08365932014-06-13 18:12:25 -070083 raise Exception("Could not find tool chain for %s" % (gcc_arch))
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070084
Elliott Hughes08365932014-06-13 18:12:25 -070085 if not os.path.exists(ToolPath("addr2line", toolchain)):
86 raise Exception("No addr2line for %s" % (toolchain))
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070087
Elliott Hughes08365932014-06-13 18:12:25 -070088 TOOLCHAIN = toolchain
89 print "Using toolchain from: %s" % TOOLCHAIN
90 return TOOLCHAIN
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070091
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070092def SymbolInformation(lib, addr):
93 """Look up symbol information about an address.
94
95 Args:
96 lib: library (or executable) pathname containing symbols
97 addr: string hexidecimal address
98
99 Returns:
Ben Chengb42dad02013-04-25 15:14:04 -0700100 A list of the form [(source_symbol, source_location,
101 object_symbol_with_offset)].
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700102
Ben Chengb42dad02013-04-25 15:14:04 -0700103 If the function has been inlined then the list may contain
104 more than one element with the symbols for the most deeply
105 nested inlined location appearing first. The list is
106 always non-empty, even if no information is available.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700107
Ben Chengb42dad02013-04-25 15:14:04 -0700108 Usually you want to display the source_location and
109 object_symbol_with_offset from the last element in the list.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700110 """
111 info = SymbolInformationForSet(lib, set([addr]))
Ben Chengb42dad02013-04-25 15:14:04 -0700112 return (info and info.get(addr)) or [(None, None, None)]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700113
114
115def SymbolInformationForSet(lib, unique_addrs):
116 """Look up symbol information for a set of addresses from the given library.
117
118 Args:
119 lib: library (or executable) pathname containing symbols
120 unique_addrs: set of hexidecimal addresses
121
122 Returns:
Ben Chengb42dad02013-04-25 15:14:04 -0700123 A dictionary of the form {addr: [(source_symbol, source_location,
124 object_symbol_with_offset)]} where each address has a list of
125 associated symbols and locations. The list is always non-empty.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700126
Ben Chengb42dad02013-04-25 15:14:04 -0700127 If the function has been inlined then the list may contain
128 more than one element with the symbols for the most deeply
129 nested inlined location appearing first. The list is
130 always non-empty, even if no information is available.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700131
Ben Chengb42dad02013-04-25 15:14:04 -0700132 Usually you want to display the source_location and
133 object_symbol_with_offset from the last element in the list.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700134 """
135 if not lib:
136 return None
137
138 addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
139 if not addr_to_line:
140 return None
141
142 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
143 if not addr_to_objdump:
144 return None
145
146 result = {}
147 for addr in unique_addrs:
Ben Chengb42dad02013-04-25 15:14:04 -0700148 source_info = addr_to_line.get(addr)
149 if not source_info:
150 source_info = [(None, None)]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700151 if addr in addr_to_objdump:
152 (object_symbol, object_offset) = addr_to_objdump.get(addr)
153 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
154 object_offset)
155 else:
156 object_symbol_with_offset = None
Ben Chengb42dad02013-04-25 15:14:04 -0700157 result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
158 for (source_symbol, source_location) in source_info]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700159
160 return result
161
162
163def CallAddr2LineForSet(lib, unique_addrs):
164 """Look up line and symbol information for a set of addresses.
165
166 Args:
167 lib: library (or executable) pathname containing symbols
168 unique_addrs: set of string hexidecimal addresses look up.
169
170 Returns:
Ben Chengb42dad02013-04-25 15:14:04 -0700171 A dictionary of the form {addr: [(symbol, file:line)]} where
172 each address has a list of associated symbols and locations
173 or an empty list if no symbol information was found.
174
175 If the function has been inlined then the list may contain
176 more than one element with the symbols for the most deeply
177 nested inlined location appearing first.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700178 """
179 if not lib:
180 return None
181
182
183 symbols = SYMBOLS_DIR + lib
184 if not os.path.exists(symbols):
185 return None
186
Ben Chengb42dad02013-04-25 15:14:04 -0700187 cmd = [ToolPath("addr2line"), "--functions", "--inlines",
188 "--demangle", "--exe=" + symbols]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700189 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
190
191 result = {}
192 addrs = sorted(unique_addrs)
193 for addr in addrs:
194 child.stdin.write("0x%s\n" % addr)
195 child.stdin.flush()
Ben Chengb42dad02013-04-25 15:14:04 -0700196 records = []
197 first = True
198 while True:
199 symbol = child.stdout.readline().strip()
200 if symbol == "??":
201 symbol = None
202 location = child.stdout.readline().strip()
203 if location == "??:0":
204 location = None
205 if symbol is None and location is None:
206 break
207 records.append((symbol, location))
208 if first:
209 # Write a blank line as a sentinel so we know when to stop
210 # reading inlines from the output.
211 # The blank line will cause addr2line to emit "??\n??:0\n".
212 child.stdin.write("\n")
213 first = False
214 result[addr] = records
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700215 child.stdin.close()
216 child.stdout.close()
217 return result
218
219
Ben Chengb42dad02013-04-25 15:14:04 -0700220def StripPC(addr):
221 """Strips the Thumb bit a program counter address when appropriate.
222
223 Args:
224 addr: the program counter address
225
226 Returns:
227 The stripped program counter address.
228 """
229 global ARCH
230
231 if ARCH == "arm":
232 return addr & ~1
233 return addr
234
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700235def CallObjdumpForSet(lib, unique_addrs):
236 """Use objdump to find out the names of the containing functions.
237
238 Args:
239 lib: library (or executable) pathname containing symbols
240 unique_addrs: set of string hexidecimal addresses to find the functions for.
241
242 Returns:
243 A dictionary of the form {addr: (string symbol, offset)}.
244 """
245 if not lib:
246 return None
247
248 symbols = SYMBOLS_DIR + lib
249 if not os.path.exists(symbols):
250 return None
251
252 symbols = SYMBOLS_DIR + lib
253 if not os.path.exists(symbols):
254 return None
255
256 addrs = sorted(unique_addrs)
Ben Chengb42dad02013-04-25 15:14:04 -0700257 start_addr_dec = str(StripPC(int(addrs[0], 16)))
258 stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700259 cmd = [ToolPath("objdump"),
260 "--section=.text",
261 "--demangle",
262 "--disassemble",
Ben Chengb42dad02013-04-25 15:14:04 -0700263 "--start-address=" + start_addr_dec,
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700264 "--stop-address=" + stop_addr_dec,
265 symbols]
266
267 # Function lines look like:
268 # 000177b0 <android::IBinder::~IBinder()+0x2c>:
269 # We pull out the address and function first. Then we check for an optional
270 # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
271 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
272 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
273
274 # A disassembly line looks like:
275 # 177b2: b510 push {r4, lr}
276 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
277
278 current_symbol = None # The current function symbol in the disassembly.
279 current_symbol_addr = 0 # The address of the current function.
280 addr_index = 0 # The address that we are currently looking for.
281
282 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
283 result = {}
284 for line in stream:
285 # Is it a function line like:
286 # 000177b0 <android::IBinder::~IBinder()>:
287 components = func_regexp.match(line)
288 if components:
289 # This is a new function, so record the current function and its address.
290 current_symbol_addr = int(components.group(1), 16)
291 current_symbol = components.group(2)
292
293 # Does it have an optional offset like: "foo(..)+0x2c"?
294 components = offset_regexp.match(current_symbol)
295 if components:
296 current_symbol = components.group(1)
297 offset = components.group(2)
298 if offset:
299 current_symbol_addr -= int(offset, 16)
300
301 # Is it an disassembly line like:
302 # 177b2: b510 push {r4, lr}
303 components = asm_regexp.match(line)
304 if components:
305 addr = components.group(1)
306 target_addr = addrs[addr_index]
307 i_addr = int(addr, 16)
Ben Chengb42dad02013-04-25 15:14:04 -0700308 i_target = StripPC(int(target_addr, 16))
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700309 if i_addr == i_target:
310 result[target_addr] = (current_symbol, i_target - current_symbol_addr)
311 addr_index += 1
312 if addr_index >= len(addrs):
313 break
314 stream.close()
315
316 return result
317
318
319def CallCppFilt(mangled_symbol):
320 cmd = [ToolPath("c++filt")]
321 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
322 process.stdin.write(mangled_symbol)
323 process.stdin.write("\n")
324 process.stdin.close()
325 demangled_symbol = process.stdout.readline().strip()
326 process.stdout.close()
327 return demangled_symbol
328
329def FormatSymbolWithOffset(symbol, offset):
330 if offset == 0:
331 return symbol
332 return "%s+%d" % (symbol, offset)