blob: cadc34dc82eb43243ddda3492a6cdcc905a8cf2d [file] [log] [blame]
Greg Hartmana6e55202016-03-09 21:55:40 -08001#!/usr/bin/python
2
3"""Disassemble the code stored in a tombstone.
4
5The classes in this module use an interface, ProcessLine, so that they can be
6chained together to do arbitrary procerssing. The current classes support
7disassembling the bytes embedded in tombstones and printing output to stdout.
8"""
9
10
11import re
12import subprocess
13import sys
14import tempfile
15import architecture
16
17
18STANDARD_PROLOGUE = """
19 .type _start, %function
20 .globl _start
21_start:
22"""
23
24
25THUMB_PROLOGUE = STANDARD_PROLOGUE + """
26 .code 16
27 .thumb_func
28 .type thumb_start, %function
29thumb_start:
30"""
31
32
33def Disassemble(line_generator):
34 abi_line = re.compile("(ABI: \'(.*)\')")
35 abi = None
36 tools = None
37 # Process global headers
38 for line in line_generator:
39 yield line
40 abi_header = abi_line.search(line)
41 if abi_header:
42 abi = abi_header.group(2)
43 # Look up the tools here so we don't do a lookup for each code block.
44 tools = architecture.Architecture(abi)
45 break
46 # The rest of the file consists of:
47 # o Lines that should pass through unchanged
48 # o Blocks of register values, which follow a 'pid: ...' line and end with
49 # 'backtrace:' line
50 # o Blocks of code represented as words, which start with 'code around ...'
51 # and end with a line that doesn't look like a list of words.
52 #
53 # The only constraint on the ordering of these blocks is that the register
54 # values must come before the first code block.
55 #
56 # It's easiest to nest register processing in the codeblock search loop.
57 register_list_re = re.compile('^pid: ')
58 codeblock_re = re.compile('^code around ([a-z0-9]+)')
59 register_text = {}
60 for line in line_generator:
61 yield line
62 if register_list_re.search(line):
63 register_text = {}
64 for output in ProcessRegisterList(line_generator, register_text):
65 yield output
66 code_match = codeblock_re.search(line)
67 if code_match:
68 for output in ProcessCodeBlock(
69 abi, tools, code_match.group(1), register_text, line_generator):
70 yield output
71
72
73def ProcessRegisterList(line_generator, rval):
74 for line in line_generator:
75 yield line
76 if line.startswith('backtrace:'):
77 return
78 # The register list is indented and consists of alternating name, value
79 # pairs.
80 if line.startswith(' '):
81 words = line.split()
82 assert len(words) % 2 == 0
83 for index in range(0, len(words), 2):
84 rval[words[index]] = words[index + 1]
85
86
87def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator):
88 program_counter = register_text[register_name]
89 program_counter_val = int(program_counter, 16)
90 scratch_file = tempfile.NamedTemporaryFile(suffix='.s')
91 # ARM code comes in two flavors: arm and thumb. Figure out the one
92 # to use by peeking in the cpsr.
93 if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20:
94 scratch_file.write(THUMB_PROLOGUE)
95 else:
96 scratch_file.write(STANDARD_PROLOGUE)
97 # Retains the hexadecimal text for the start of the block
98 start_address = None
99 # Maintains a numeric counter for the address of the current byte
100 current_address = None
101 # Handle the 3 differnt file formats that we've observerd.
102 if len(program_counter) == 8:
103 block_line_len = [67]
104 block_num_words = 4
105 else:
106 assert len(program_counter) == 16
107 block_line_len = [57, 73]
108 block_num_words = 2
109 # Now generate assembly from the bytes in the code block.
110 for line in line_generator:
111 words = line.split()
112 # Be conservative and stop interpreting if the line length is wrong
113 # We can't count words because spaces can appear in the text representation
114 # of the memory.
115 if len(line) not in block_line_len:
116 break
117 # Double check the address at the start of each line
118 if current_address is None:
119 start_address = words[0]
120 current_address = int(start_address, 16)
121 else:
122 assert current_address == int(words[0], 16)
123 for word in words[1:block_num_words+1]:
124 # Handle byte swapping
125 for byte in tools.WordToBytes(word):
126 # Emit a label at the desired program counter.
127 # This will cause the disassembler to resynchronize at this point,
128 # allowing us to position the arrow and also ensuring that we decode
129 # the instruction properly.
130 if current_address == program_counter_val:
131 scratch_file.write('program_counter_was_here:\n')
132 scratch_file.write(' .byte 0x%s\n' % byte)
133 current_address += 1
134 scratch_file.flush()
135 # Assemble the scratch file and relocate it to the block address with the
136 # linker.
137 object_file = tempfile.NamedTemporaryFile(suffix='.o')
138 subprocess.check_call(tools.Assemble([
139 '-o', object_file.name, scratch_file.name]))
140 scratch_file.close()
141 linked_file = tempfile.NamedTemporaryFile(suffix='.o')
142 cmd = tools.Link([
143 '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name])
144 subprocess.check_call(cmd)
145 object_file.close()
146 disassembler = subprocess.Popen(tools.Disassemble([
147 '-S', linked_file.name]), stdout=subprocess.PIPE)
148 # Skip some of the annoying assembler headers.
149 emit = False
150 start_pattern = start_address + ' '
151 # objdump padding varies between 32 bit and 64 bit architectures
152 arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val)
153 for line in disassembler.stdout:
154 emit = emit or line.startswith(start_pattern)
155 if emit and len(line) > 1 and line.find('program_counter_was_here') == -1:
156 if arrow_pattern.search(line):
157 yield '--->' + line
158 else:
159 yield ' ' + line
160 linked_file.close()
161 yield '\n'
162
163
164def main(argv):
165 for fn in argv[1:]:
166 for line in Disassemble(open(fn, 'r')):
167 print line,
168
169
170if __name__ == '__main__':
171 main(sys.argv)