Zachary Turner | cf77b92 | 2018-08-30 20:53:48 +0000 | [diff] [blame] | 1 | # Given a path to llvm-objdump and a directory tree, spider the directory tree |
| 2 | # dumping every object file encountered with correct options needed to demangle |
| 3 | # symbols in the object file, and collect statistics about failed / crashed |
| 4 | # demanglings. Useful for stress testing the demangler against a large corpus |
| 5 | # of inputs. |
| 6 | |
Serge Guelton | 60ccceb | 2019-01-03 14:11:33 +0000 | [diff] [blame] | 7 | from __future__ import print_function |
| 8 | |
Zachary Turner | cf77b92 | 2018-08-30 20:53:48 +0000 | [diff] [blame] | 9 | import argparse |
| 10 | import functools |
| 11 | import os |
| 12 | import re |
| 13 | import sys |
| 14 | import subprocess |
| 15 | import traceback |
| 16 | from multiprocessing import Pool |
| 17 | import multiprocessing |
| 18 | |
| 19 | args = None |
| 20 | |
| 21 | def parse_line(line): |
| 22 | question = line.find('?') |
| 23 | if question == -1: |
| 24 | return None, None |
| 25 | |
| 26 | open_paren = line.find('(', question) |
| 27 | if open_paren == -1: |
| 28 | return None, None |
| 29 | close_paren = line.rfind(')', open_paren) |
| 30 | if open_paren == -1: |
| 31 | return None, None |
| 32 | mangled = line[question : open_paren] |
| 33 | demangled = line[open_paren+1 : close_paren] |
| 34 | return mangled.strip(), demangled.strip() |
| 35 | |
| 36 | class Result(object): |
| 37 | def __init__(self): |
| 38 | self.crashed = [] |
| 39 | self.file = None |
| 40 | self.nsymbols = 0 |
| 41 | self.errors = set() |
| 42 | self.nfiles = 0 |
| 43 | |
| 44 | class MapContext(object): |
| 45 | def __init__(self): |
| 46 | self.rincomplete = None |
| 47 | self.rcumulative = Result() |
| 48 | self.pending_objs = [] |
| 49 | self.npending = 0 |
| 50 | |
| 51 | def process_file(path, objdump): |
| 52 | r = Result() |
| 53 | r.file = path |
| 54 | |
| 55 | popen_args = [objdump, '-t', '-demangle', path] |
| 56 | p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 57 | stdout, stderr = p.communicate() |
| 58 | if p.returncode != 0: |
| 59 | r.crashed = [r.file] |
| 60 | return r |
| 61 | |
| 62 | output = stdout.decode('utf-8') |
| 63 | |
| 64 | for line in output.splitlines(): |
| 65 | mangled, demangled = parse_line(line) |
| 66 | if mangled is None: |
| 67 | continue |
| 68 | r.nsymbols += 1 |
| 69 | if "invalid mangled name" in demangled: |
| 70 | r.errors.add(mangled) |
| 71 | return r |
| 72 | |
| 73 | def add_results(r1, r2): |
| 74 | r1.crashed.extend(r2.crashed) |
| 75 | r1.errors.update(r2.errors) |
| 76 | r1.nsymbols += r2.nsymbols |
| 77 | r1.nfiles += r2.nfiles |
| 78 | |
| 79 | def print_result_row(directory, result): |
| 80 | print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( |
| 81 | result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory)) |
| 82 | |
| 83 | def process_one_chunk(pool, chunk_size, objdump, context): |
| 84 | objs = [] |
| 85 | |
| 86 | incomplete = False |
| 87 | dir_results = {} |
| 88 | ordered_dirs = [] |
| 89 | while context.npending > 0 and len(objs) < chunk_size: |
| 90 | this_dir = context.pending_objs[0][0] |
| 91 | ordered_dirs.append(this_dir) |
| 92 | re = Result() |
| 93 | if context.rincomplete is not None: |
| 94 | re = context.rincomplete |
| 95 | context.rincomplete = None |
| 96 | |
| 97 | dir_results[this_dir] = re |
| 98 | re.file = this_dir |
| 99 | |
| 100 | nneeded = chunk_size - len(objs) |
| 101 | objs_this_dir = context.pending_objs[0][1] |
| 102 | navail = len(objs_this_dir) |
| 103 | ntaken = min(nneeded, navail) |
| 104 | objs.extend(objs_this_dir[0:ntaken]) |
| 105 | remaining_objs_this_dir = objs_this_dir[ntaken:] |
| 106 | context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) |
| 107 | context.npending -= ntaken |
| 108 | if ntaken == navail: |
| 109 | context.pending_objs.pop(0) |
| 110 | else: |
| 111 | incomplete = True |
| 112 | |
| 113 | re.nfiles += ntaken |
| 114 | |
| 115 | assert(len(objs) == chunk_size or context.npending == 0) |
| 116 | |
| 117 | copier = functools.partial(process_file, objdump=objdump) |
| 118 | mapped_results = list(pool.map(copier, objs)) |
| 119 | |
| 120 | for mr in mapped_results: |
| 121 | result_dir = os.path.dirname(mr.file) |
| 122 | result_entry = dir_results[result_dir] |
| 123 | add_results(result_entry, mr) |
| 124 | |
| 125 | # It's only possible that a single item is incomplete, and it has to be the |
| 126 | # last item. |
| 127 | if incomplete: |
| 128 | context.rincomplete = dir_results[ordered_dirs[-1]] |
| 129 | ordered_dirs.pop() |
| 130 | |
| 131 | # Now ordered_dirs contains a list of all directories which *did* complete. |
| 132 | for c in ordered_dirs: |
| 133 | re = dir_results[c] |
| 134 | add_results(context.rcumulative, re) |
| 135 | print_result_row(c, re) |
| 136 | |
| 137 | def process_pending_files(pool, chunk_size, objdump, context): |
| 138 | while context.npending >= chunk_size: |
| 139 | process_one_chunk(pool, chunk_size, objdump, context) |
| 140 | |
| 141 | def go(): |
| 142 | global args |
| 143 | |
| 144 | obj_dir = args.dir |
| 145 | extensions = args.extensions.split(',') |
| 146 | extensions = [x if x[0] == '.' else '.' + x for x in extensions] |
| 147 | |
| 148 | |
| 149 | pool_size = 48 |
| 150 | pool = Pool(processes=pool_size) |
| 151 | |
| 152 | try: |
| 153 | nfiles = 0 |
| 154 | context = MapContext() |
| 155 | |
| 156 | for root, dirs, files in os.walk(obj_dir): |
| 157 | root = os.path.normpath(root) |
| 158 | pending = [] |
| 159 | for f in files: |
| 160 | file, ext = os.path.splitext(f) |
| 161 | if not ext in extensions: |
| 162 | continue |
| 163 | |
| 164 | nfiles += 1 |
| 165 | full_path = os.path.join(root, f) |
| 166 | full_path = os.path.normpath(full_path) |
| 167 | pending.append(full_path) |
| 168 | |
| 169 | # If this directory had no object files, just print a default |
| 170 | # status line and continue with the next dir |
| 171 | if len(pending) == 0: |
| 172 | print_result_row(root, Result()) |
| 173 | continue |
| 174 | |
| 175 | context.npending += len(pending) |
| 176 | context.pending_objs.append((root, pending)) |
| 177 | # Drain the tasks, `pool_size` at a time, until we have less than |
| 178 | # `pool_size` tasks remaining. |
| 179 | process_pending_files(pool, pool_size, args.objdump, context) |
| 180 | |
| 181 | assert(context.npending < pool_size); |
| 182 | process_one_chunk(pool, pool_size, args.objdump, context) |
| 183 | |
| 184 | total = context.rcumulative |
| 185 | nfailed = len(total.errors) |
| 186 | nsuccess = total.nsymbols - nfailed |
| 187 | ncrashed = len(total.crashed) |
| 188 | |
| 189 | if (nfailed > 0): |
| 190 | print("Failures:") |
| 191 | for m in sorted(total.errors): |
| 192 | print(" " + m) |
| 193 | if (ncrashed > 0): |
| 194 | print("Crashes:") |
| 195 | for f in sorted(total.crashed): |
| 196 | print(" " + f) |
| 197 | print("Summary:") |
| 198 | spct = float(nsuccess)/float(total.nsymbols) |
| 199 | fpct = float(nfailed)/float(total.nsymbols) |
| 200 | cpct = float(ncrashed)/float(nfiles) |
| 201 | print("Processed {0} object files.".format(nfiles)) |
| 202 | print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct)) |
| 203 | print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) |
| 204 | print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) |
| 205 | |
| 206 | except: |
| 207 | traceback.print_exc() |
| 208 | |
| 209 | pool.close() |
| 210 | pool.join() |
| 211 | |
| 212 | if __name__ == "__main__": |
| 213 | def_obj = 'obj' if sys.platform == 'win32' else 'o' |
| 214 | |
| 215 | parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.') |
| 216 | parser.add_argument('dir', type=str, help='the root directory at which to start crawling') |
| 217 | parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' + |
| 218 | 'the tool is located as if by `which llvm-objdump`.') |
| 219 | parser.add_argument('--extensions', type=str, default=def_obj, |
| 220 | help='comma separated list of extensions to demangle (e.g. `o,obj`). ' + |
| 221 | 'By default this will be `obj` on Windows and `o` otherwise.') |
| 222 | |
| 223 | args = parser.parse_args() |
| 224 | |
| 225 | |
| 226 | multiprocessing.freeze_support() |
| 227 | go() |
| 228 | |