Joe Onorato | 75f444e | 2017-04-01 16:26:17 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python2.7 |
| 2 | |
| 3 | import argparse |
| 4 | import datetime |
| 5 | import re |
| 6 | import subprocess |
| 7 | import sys |
| 8 | |
| 9 | import logs |
| 10 | import ps |
| 11 | |
| 12 | DURATION_RE = re.compile("((\\d+)w)?((\\d+)d)?((\\d+)h)?((\\d+)m)?((\\d+)s)?") |
| 13 | |
| 14 | class Bucket(object): |
| 15 | """Bucket of stats for a particular key managed by the Stats object.""" |
| 16 | def __init__(self): |
| 17 | self.count = 0 |
| 18 | self.memory = 0 |
| 19 | self.lines = [] |
| 20 | |
| 21 | def __str__(self): |
| 22 | return "(%s,%s)" % (self.count, self.memory) |
| 23 | |
| 24 | |
| 25 | class Stats(object): |
| 26 | """A group of stats with a particular key, where both memory and count are tracked.""" |
| 27 | def __init__(self): |
| 28 | self._data = dict() |
| 29 | |
| 30 | def add(self, key, logLine): |
| 31 | bucket = self._data.get(key) |
| 32 | if not bucket: |
| 33 | bucket = Bucket() |
| 34 | self._data[key] = bucket |
| 35 | bucket.count += 1 |
| 36 | bucket.memory += logLine.memory() |
| 37 | bucket.lines.append(logLine) |
| 38 | |
| 39 | def __iter__(self): |
| 40 | return self._data.iteritems() |
| 41 | |
| 42 | def data(self): |
| 43 | return [(key, bucket) for key, bucket in self._data.iteritems()] |
| 44 | |
| 45 | def byCount(self): |
| 46 | result = self.data() |
| 47 | result.sort(lambda a, b: -cmp(a[1].count, b[1].count)) |
| 48 | return result |
| 49 | |
| 50 | def byMemory(self): |
| 51 | result = self.data() |
| 52 | result.sort(lambda a, b: -cmp(a[1].memory, b[1].memory)) |
| 53 | return result |
| 54 | |
| 55 | |
| 56 | def ParseDuration(s): |
| 57 | """Parse a date of the format .w.d.h.m.s into the number of seconds.""" |
| 58 | def make_int(index): |
| 59 | val = m.group(index) |
| 60 | if val: |
| 61 | return int(val) |
| 62 | else: |
| 63 | return 0 |
| 64 | m = DURATION_RE.match(s) |
| 65 | if m: |
| 66 | weeks = make_int(2) |
| 67 | days = make_int(4) |
| 68 | hours = make_int(6) |
| 69 | minutes = make_int(8) |
| 70 | seconds = make_int(10) |
| 71 | return (weeks * 604800) + (days * 86400) + (hours * 3600) + (minutes * 60) + seconds |
| 72 | return 0 |
| 73 | |
| 74 | def FormatMemory(n): |
| 75 | """Prettify the number of bytes into gb, mb, etc.""" |
| 76 | if n >= 1024 * 1024 * 1024: |
| 77 | return "%10d gb" % (n / (1024 * 1024 * 1024)) |
| 78 | elif n >= 1024 * 1024: |
| 79 | return "%10d mb" % (n / (1024 * 1024)) |
| 80 | elif n >= 1024: |
| 81 | return "%10d kb" % (n / 1024) |
| 82 | else: |
| 83 | return "%10d b " % n |
| 84 | |
| 85 | def FormateTimeDelta(td): |
| 86 | """Format a time duration into the same format we accept on the commandline.""" |
| 87 | seconds = (td.days * 86400) + (td.seconds) + int(td.microseconds / 1000000) |
| 88 | if seconds == 0: |
| 89 | return "0s" |
| 90 | result = "" |
| 91 | if seconds >= 604800: |
| 92 | weeks = int(seconds / 604800) |
| 93 | seconds -= weeks * 604800 |
| 94 | result += "%dw" % weeks |
| 95 | if seconds >= 86400: |
| 96 | days = int(seconds / 86400) |
| 97 | seconds -= days * 86400 |
| 98 | result += "%dd" % days |
| 99 | if seconds >= 3600: |
| 100 | hours = int(seconds / 3600) |
| 101 | seconds -= hours * 3600 |
Joe Onorato | 5d02270 | 2017-05-30 16:39:19 -0700 | [diff] [blame] | 102 | result += "%dh" % hours |
Joe Onorato | 75f444e | 2017-04-01 16:26:17 -0700 | [diff] [blame] | 103 | if seconds >= 60: |
| 104 | minutes = int(seconds / 60) |
| 105 | seconds -= minutes * 60 |
Joe Onorato | 5d02270 | 2017-05-30 16:39:19 -0700 | [diff] [blame] | 106 | result += "%dm" % minutes |
Joe Onorato | 75f444e | 2017-04-01 16:26:17 -0700 | [diff] [blame] | 107 | if seconds > 0: |
| 108 | result += "%ds" % seconds |
| 109 | return result |
| 110 | |
| 111 | |
| 112 | def WriteResult(totalCount, totalMemory, bucket, text): |
| 113 | """Write a bucket in the normalized format.""" |
| 114 | print "%7d (%2d%%) %s (%2d%%) %s" % (bucket.count, (100 * bucket.count / totalCount), |
| 115 | FormatMemory(bucket.memory), (100 * bucket.memory / totalMemory), text) |
| 116 | |
| 117 | |
| 118 | def ParseArgs(argv): |
| 119 | parser = argparse.ArgumentParser(description="Process some integers.") |
| 120 | parser.add_argument("input", type=str, nargs="?", |
| 121 | help="the logs file to read") |
| 122 | parser.add_argument("--clear", action="store_true", |
| 123 | help="clear the log buffer before running logcat") |
| 124 | parser.add_argument("--duration", type=str, nargs=1, |
| 125 | help="how long to run for (XdXhXmXs)") |
| 126 | parser.add_argument("--rawlogs", type=str, nargs=1, |
| 127 | help="file to put the rawlogs into") |
| 128 | |
| 129 | args = parser.parse_args() |
| 130 | |
| 131 | args.durationSec = ParseDuration(args.duration[0]) if args.duration else 0 |
| 132 | |
| 133 | return args |
| 134 | |
| 135 | |
| 136 | def main(argv): |
| 137 | args = ParseArgs(argv) |
| 138 | |
| 139 | processes = ps.ProcessSet() |
| 140 | |
| 141 | if args.rawlogs: |
| 142 | rawlogs = file(args.rawlogs[0], "w") |
| 143 | else: |
| 144 | rawlogs = None |
| 145 | |
| 146 | # Choose the input |
| 147 | if args.input: |
| 148 | # From a file of raw logs |
| 149 | try: |
| 150 | infile = file(args.input, "r") |
| 151 | except IOError: |
| 152 | sys.stderr.write("Error opening file for read: %s\n" % args.input[0]) |
| 153 | sys.exit(1) |
| 154 | else: |
| 155 | # From running adb logcat on an attached device |
| 156 | if args.clear: |
| 157 | subprocess.check_call(["adb", "logcat", "-c"]) |
| 158 | cmd = ["adb", "logcat", "-v", "long", "-D", "-v", "uid"] |
| 159 | if not args.durationSec: |
| 160 | cmd.append("-d") |
| 161 | logcat = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| 162 | infile = logcat.stdout |
| 163 | |
| 164 | # Do one update because we know we'll need it, but then don't do it again |
| 165 | # if we're not streaming them. |
| 166 | processes.Update(True) |
| 167 | if args.durationSec: |
| 168 | processes.doUpdates = True |
| 169 | |
| 170 | totalCount = 0 |
| 171 | totalMemory = 0 |
| 172 | byTag = Stats() |
| 173 | byPid = Stats() |
| 174 | byText = Stats() |
| 175 | |
| 176 | startTime = datetime.datetime.now() |
| 177 | |
| 178 | # Read the log lines from the parser and build a big mapping of everything |
| 179 | for logLine in logs.ParseLogcat(infile, processes, args.durationSec): |
| 180 | if rawlogs: |
| 181 | rawlogs.write("%-10s %s %-6s %-6s %-6s %s/%s: %s\n" %(logLine.buf, logLine.timestamp, |
| 182 | logLine.uid, logLine.pid, logLine.tid, logLine.level, logLine.tag, logLine.text)) |
| 183 | |
| 184 | totalCount += 1 |
| 185 | totalMemory += logLine.memory() |
| 186 | byTag.add(logLine.tag, logLine) |
| 187 | byPid.add(logLine.pid, logLine) |
| 188 | byText.add(logLine.text, logLine) |
| 189 | |
| 190 | endTime = datetime.datetime.now() |
| 191 | |
| 192 | # Print the log analysis |
| 193 | |
| 194 | # At this point, everything is loaded, don't bother looking |
| 195 | # for new processes |
| 196 | processes.doUpdates = False |
| 197 | |
| 198 | print "Top tags by count" |
| 199 | print "-----------------" |
| 200 | i = 0 |
| 201 | for k,v in byTag.byCount(): |
| 202 | WriteResult(totalCount, totalMemory, v, k) |
| 203 | if i >= 10: |
| 204 | break |
| 205 | i += 1 |
| 206 | |
| 207 | print |
| 208 | print "Top tags by memory" |
| 209 | print "------------------" |
| 210 | i = 0 |
| 211 | for k,v in byTag.byMemory(): |
| 212 | WriteResult(totalCount, totalMemory, v, k) |
| 213 | if i >= 10: |
| 214 | break |
| 215 | i += 1 |
| 216 | |
| 217 | print |
| 218 | print "Top Processes by memory" |
| 219 | print "-----------------------" |
| 220 | i = 0 |
| 221 | for k,v in byPid.byMemory(): |
| 222 | WriteResult(totalCount, totalMemory, v, |
| 223 | "%-8s %s" % (k, processes.FindPid(k).DisplayName())) |
| 224 | if i >= 10: |
| 225 | break |
| 226 | i += 1 |
| 227 | |
| 228 | print |
| 229 | print "Top Duplicates by count" |
| 230 | print "-----------------------" |
| 231 | i = 0 |
| 232 | for k,v in byText.byCount(): |
| 233 | logLine = v.lines[0] |
| 234 | WriteResult(totalCount, totalMemory, v, |
| 235 | "%s/%s: %s" % (logLine.level, logLine.tag, logLine.text)) |
| 236 | if i >= 10: |
| 237 | break |
| 238 | i += 1 |
| 239 | |
| 240 | print |
| 241 | print "Top Duplicates by memory" |
| 242 | print "-----------------------" |
| 243 | i = 0 |
| 244 | for k,v in byText.byCount(): |
| 245 | logLine = v.lines[0] |
| 246 | WriteResult(totalCount, totalMemory, v, |
| 247 | "%s/%s: %s" % (logLine.level, logLine.tag, logLine.text)) |
| 248 | if i >= 10: |
| 249 | break |
| 250 | i += 1 |
| 251 | |
| 252 | print |
| 253 | print "Totals" |
| 254 | print "------" |
| 255 | print "%7d %s" % (totalCount, FormatMemory(totalMemory)) |
| 256 | |
| 257 | print "Actual duration: %s" % FormateTimeDelta(endTime-startTime) |
| 258 | |
| 259 | if __name__ == "__main__": |
| 260 | main(sys.argv) |
| 261 | |
| 262 | # vim: set ts=2 sw=2 sts=2 tw=100 nocindent autoindent smartindent expandtab: |