Merge "simpleperf: add binary_cache_builder script."
am: a5a101294e

Change-Id: I0c2a0c8037cd95e25dab555e9c348ab7270cfaf7
diff --git a/simpleperf/scripts/binary_cache_builder.config b/simpleperf/scripts/binary_cache_builder.config
new file mode 100644
index 0000000..49cc5ae
--- /dev/null
+++ b/simpleperf/scripts/binary_cache_builder.config
@@ -0,0 +1,27 @@
+# This configuration is written in python and used by binary_cache_builder.py.
+
+import os
+import os.path
+
+# path of profiling record data.
+perf_data_path = "perf.data"
+
+
+# directories to find binaries with symbols and debug information.
+# If binaries are found in any of these directories, having the same build_id
+# as the one recorded in perf.data, then we copy the binary in the directory
+# instead of pulling the binary from device.
+symfs_dirs = []
+
+
+# directory to cache binaries. To report precisely, we pull needed binaries
+# to host. However, We don't need to pull a binary if there is already a binary
+# in binary_cache_dir having the same build_id as the one on device.
+binary_cache_dir = "binary_cache"
+
+
+# path of adb.
+adb_path = "adb"
+
+# path of readelf, set to "" if not available.
+readelf_path = "readelf"
\ No newline at end of file
diff --git a/simpleperf/scripts/binary_cache_builder.py b/simpleperf/scripts/binary_cache_builder.py
new file mode 100644
index 0000000..649d92b
--- /dev/null
+++ b/simpleperf/scripts/binary_cache_builder.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""binary_cache_builder.py: read perf.data, collect binaries needed by
+    it, and put them in binary_cache.
+"""
+
+from __future__ import print_function
+import argparse
+import os
+import os.path
+import re
+import shutil
+import subprocess
+import sys
+import time
+
+from simpleperf_report_lib import *
+from utils import *
+
+
+class BinaryCacheBuilder(object):
+    """Collect all binaries needed by perf.data in binary_cache."""
+    def __init__(self, config):
+        config_names = ['perf_data_path', 'symfs_dirs', 'adb_path',
+                        'readelf_path', 'binary_cache_dir']
+        for name in config_names:
+            if not config.has_key(name):
+                log_fatal('config for "%s" is missing' % name)
+
+        self.perf_data_path = config.get('perf_data_path')
+        if not os.path.isfile(self.perf_data_path):
+            log_fatal("can't find file %s" % self.perf_data_path)
+        self.symfs_dirs = config.get('symfs_dirs')
+        for symfs_dir in self.symfs_dirs:
+            if not os.path.isdir(symfs_dir):
+                log_fatal("symfs_dir '%s' is not a directory" % symfs_dir)
+        self.adb = AdbHelper(config['adb_path'])
+        self.readelf_path = config['readelf_path']
+        self.binary_cache_dir = config['binary_cache_dir']
+        if not os.path.isdir(self.binary_cache_dir):
+            os.makedirs(self.binary_cache_dir)
+
+
+    def build_binary_cache(self):
+        self._collect_used_binaries()
+        self._copy_binaries_from_symfs_dirs()
+        self._pull_binaries_from_device()
+        self._pull_kernel_symbols()
+
+
+    def _collect_used_binaries(self):
+        """read perf.data, collect all used binaries and their build id (if available)."""
+        # A dict mapping from binary name to build_id
+        binaries = dict()
+        lib = ReportLib()
+        lib.SetRecordFile(self.perf_data_path)
+        lib.SetLogSeverity('error')
+        while True:
+            sample = lib.GetNextSample()
+            if sample is None:
+                lib.Close()
+                break
+            symbols = [lib.GetSymbolOfCurrentSample()]
+            callchain = lib.GetCallChainOfCurrentSample()
+            for i in range(callchain.nr):
+                symbols.append(callchain.entries[i].symbol)
+
+            for symbol in symbols:
+                dso_name = symbol.dso_name
+                if not binaries.has_key(dso_name):
+                    binaries[dso_name] = lib.GetBuildIdForPath(dso_name)
+        self.binaries = binaries
+
+
+    def _copy_binaries_from_symfs_dirs(self):
+        """collect all files in symfs_dirs."""
+        if len(self.symfs_dirs) == 0:
+            return
+
+        # It is possible that the path of the binary in symfs_dirs doesn't match
+        # the one recorded in perf.data. For example, a file in symfs_dirs might
+        # be "debug/arm/obj/armeabi-v7a/libsudo-game-jni.so", but the path in
+        # perf.data is "/data/app/xxxx/lib/arm/libsudo-game-jni.so". So we match
+        # binaries if they have the same filename (like libsudo-game-jni.so)
+        # and same build_id.
+
+        # Map from filename to binary paths.
+        filename_dict = dict()
+        for binary in self.binaries:
+            index = binary.rfind('/')
+            filename = binary[index+1:]
+            paths = filename_dict.get(filename)
+            if paths is None:
+                filename_dict[filename] = paths = []
+            paths.append(binary)
+
+        # Walk through all files in symfs_dirs, and copy matching files to build_cache.
+        for symfs_dir in self.symfs_dirs:
+            for root, _, files in os.walk(symfs_dir):
+                for file in files:
+                    paths = filename_dict.get(file)
+                    if paths is not None:
+                        build_id = self.read_build_id(os.path.join(root, file))
+                        if len(build_id) == 0:
+                            continue
+                        for binary in paths:
+                            expected_build_id = self.binaries.get(binary)
+                            if expected_build_id == build_id:
+                                self._copy_to_binary_cache(os.path.join(root, file), binary)
+                                del self.binaries[binary]
+
+
+    def _copy_to_binary_cache(self, from_path, target_file):
+        if target_file[0] == '/':
+            target_file = target_file[1:]
+        target_file = target_file.replace('/', os.sep)
+        target_file = os.path.join(self.binary_cache_dir, target_file)
+        target_dir = os.path.dirname(target_file)
+        if not os.path.isdir(target_dir):
+            os.makedirs(target_dir)
+        log_info('copy to binary_cache: %s to %s' % (from_path, target_file))
+        shutil.copy(from_path, target_file)
+
+
+    def _pull_binaries_from_device(self):
+        """pull binaries needed in perf.data to binary_cache."""
+        for binary in self.binaries:
+            build_id = self.binaries[binary]
+            if binary[0] != '/' or binary == "//anon":
+                # [kernel.kallsyms] or unknown, or something we can't find binary.
+                continue
+            binary_cache_file = binary[1:].replace('/', os.sep)
+            binary_cache_file = os.path.join(self.binary_cache_dir, binary_cache_file)
+            self._check_and_pull_binary(binary, build_id, binary_cache_file)
+
+
+    def _check_and_pull_binary(self, binary, expected_build_id, binary_cache_file):
+        """If the binary_cache_file exists and has the expected_build_id, there
+           is no need to pull the binary from device. Otherwise, pull it.
+        """
+        need_pull = True
+        if os.path.isfile(binary_cache_file):
+            need_pull = False
+            if len(expected_build_id) > 0:
+                build_id = self.read_build_id(binary_cache_file)
+                if expected_build_id != build_id:
+                    need_pull = True
+        if need_pull:
+            target_dir = os.path.dirname(binary_cache_file)
+            if not os.path.isdir(target_dir):
+                os.makedirs(target_dir)
+            if os.path.isfile(binary_cache_file):
+                os.remove(binary_cache_file)
+            log_info('pull file to binary_cache: %s to %s' % (binary, binary_cache_file))
+            self._pull_file_from_device(binary, binary_cache_file)
+        else:
+            log_info('use current file in binary_cache: %s' % binary_cache_file)
+
+
+    def read_build_id(self, file):
+        """read build id of a binary on host."""
+        if len(self.readelf_path) == 0:
+            return ""
+        output = subprocess.check_output([self.readelf_path, '-n', file])
+        result = re.search(r'Build ID:\s*(\S+)', output)
+        if result:
+            build_id = result.group(1)
+            if len(build_id) < 40:
+                build_id += '0' * (40 - len(build_id))
+            build_id = '0x' + build_id
+            return build_id
+        return ""
+
+
+    def _pull_file_from_device(self, device_path, host_path):
+        if self.adb.run(['pull', device_path, host_path]):
+            return True
+        # In non-root device, we can't pull /data/app/XXX/base.odex directly.
+        # Instead, we can first copy the file to /data/local/tmp, then pull it.
+        filename = device_path[device_path.rfind('/')+1:]
+        if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and
+            self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])):
+            self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename])
+            return True
+        log_warning('failed to pull %s from device' % device_path)
+        return False
+
+
+    def _pull_kernel_symbols(self):
+        file = os.path.join(self.binary_cache_dir, 'kallsyms')
+        if os.path.isfile(file):
+            os.remove(file)
+        if self.adb.switch_to_root():
+            self.adb.run(['shell', '"echo 0>/proc/sys/kernel/kptr_restrict"'])
+            self.adb.run(['pull', '/proc/kallsyms', file])
+
+
+def load_config(config_file):
+    """See annotate_source_file.config for explanation of configurations."""
+    if not os.path.exists(config_file):
+        log_fatal("can't find config_file: %s" % config_file)
+    config = {}
+    execfile(config_file, config)
+    return config
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Pull binaries needed by perf.data from device to binary_cache.")
+    parser.add_argument('--config', default='binary_cache_builder.config',
+                        help='Set configuration file. Default is binary_cache_builder.config.')
+    args = parser.parse_args()
+    config = load_config(args.config)
+    builder = BinaryCacheBuilder(config)
+    builder.build_binary_cache()
\ No newline at end of file
diff --git a/simpleperf/scripts/report_sample.py b/simpleperf/scripts/report_sample.py
index 0da3797..45e884a 100644
--- a/simpleperf/scripts/report_sample.py
+++ b/simpleperf/scripts/report_sample.py
@@ -19,18 +19,11 @@
 """
 
 from __future__ import print_function
+import argparse
 import sys
 from simpleperf_report_lib import *
 
 
-def usage():
-    print('python report_sample.py [options] <record_file>')
-    print('-h/--help print this help message')
-    print('--symfs <symfs_dir>  Set the path to looking for symbols')
-    print('--kallsyms <kallsyms_file>  Set the path to a kallsyms file')
-    print('If record file is not given, use default file perf.data.')
-
-
 def report_sample(record_file, symfs_dir, kallsyms_file=None):
     """ read record_file, and print each sample"""
     lib = ReportLib()
@@ -65,30 +58,11 @@
 
 
 if __name__ == '__main__':
-    record_file = 'perf.data'
-    symfs_dir = None
-    kallsyms_file = None
-    i = 1
-    while i < len(sys.argv):
-        if sys.argv[i] == '-h' or sys.argv[i] == '--help':
-            usage()
-            sys.exit(0)
-        elif sys.argv[i] == '--symfs':
-            if i + 1 < len(sys.argv):
-                symfs_dir = sys.argv[i + 1]
-                i += 1
-            else:
-                print('argument for --symfs is missing')
-                sys.exit(1)
-        elif sys.argv[i] == '--kallsyms':
-            if i + 1 < len(sys.argv):
-                kallsyms_file = sys.argv[i + 1]
-                i += 1
-            else:
-                print('argument for --kallsyms is missing')
-                sys.exit(1)
-        else:
-          record_file = sys.argv[i]
-        i += 1
-
-    report_sample(record_file, symfs_dir, kallsyms_file)
+    parser = argparse.ArgumentParser(description='Report samples in perf.data.')
+    parser.add_argument('--symfs',
+                        help='Set the path to find binaries with symbols and debug info.')
+    parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.')
+    parser.add_argument('record_file', nargs='?', default='perf.data',
+                        help='Default is perf.data.')
+    args = parser.parse_args()
+    report_sample(args.record_file, args.symfs, args.kallsyms)
diff --git a/simpleperf/scripts/libsimpleperf_report.so b/simpleperf/scripts/shared_libraries/host/linux/libsimpleperf_report.so
similarity index 100%
rename from simpleperf/scripts/libsimpleperf_report.so
rename to simpleperf/scripts/shared_libraries/host/linux/libsimpleperf_report.so
Binary files differ
diff --git a/simpleperf/scripts/simpleperf_report_lib.py b/simpleperf/scripts/simpleperf_report_lib.py
index 46264be..61dad08 100644
--- a/simpleperf/scripts/simpleperf_report_lib.py
+++ b/simpleperf/scripts/simpleperf_report_lib.py
@@ -24,25 +24,11 @@
 import subprocess
 import sys
 import unittest
-
-
-def _isWindows():
-    return sys.platform == 'win32' or sys.platform == 'cygwin'
-
-
-def _get_script_dir():
-    return os.path.dirname(os.path.realpath(__file__))
+from utils import *
 
 
 def _get_native_lib():
-    if _isWindows():
-        so_name = 'libsimpleperf_report.dll'
-    elif sys.platform == 'darwin': # OSX
-        so_name = 'libsimpleperf_report.dylib'
-    else:
-        so_name = 'libsimpleperf_report.so'
-
-    return os.path.join(_get_script_dir(), so_name)
+    return get_host_binary_path('libsimpleperf_report.so')
 
 
 def _is_null(p):
@@ -174,8 +160,8 @@
     def _load_dependent_lib(self):
         # As the windows dll is built with mingw we need to also find "libwinpthread-1.dll".
         # Load it before libsimpleperf_report.dll if it does exist in the same folder as this script.
-        if _isWindows():
-            libwinpthread_path = os.path.join(_get_script_path(), "libwinpthread-1.dll")
+        if is_windows():
+            libwinpthread_path = os.path.join(get_script_dir(), "libwinpthread-1.dll")
             if os.path.exists(libwinpthread_path):
                 self._libwinpthread = ct.CDLL(libwinpthread_path)
 
@@ -254,7 +240,7 @@
 
 class TestReportLib(unittest.TestCase):
     def setUp(self):
-        self.perf_data_path = os.path.join(os.path.dirname(_get_script_dir()),
+        self.perf_data_path = os.path.join(os.path.dirname(get_script_dir()),
                                            'testdata', 'perf_with_symbols.data')
         if not os.path.isfile(self.perf_data_path):
             raise Exception("can't find perf_data at %s" % self.perf_data_path)
diff --git a/simpleperf/scripts/utils.py b/simpleperf/scripts/utils.py
new file mode 100644
index 0000000..1d3cbc1
--- /dev/null
+++ b/simpleperf/scripts/utils.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""utils.py: export utility functions.
+"""
+
+from __future__ import print_function
+import logging
+import os.path
+import subprocess
+import sys
+
+def get_script_dir():
+    return os.path.dirname(os.path.realpath(__file__))
+
+
+def is_windows():
+    return sys.platform == 'win32' or sys.platform == 'cygwin'
+
+
+def log_debug(msg):
+    logging.debug(msg)
+
+
+def log_info(msg):
+    logging.info(msg)
+
+
+def log_warning(msg):
+    logging.warning(msg)
+
+
+def log_fatal(msg):
+    raise Exception(msg)
+
+
+def get_target_binary_path(arch, binary_name):
+    arch_dir = os.path.join(get_script_path(), "shared_libraries", "target", arch)
+    if not os.path.isdir(arch_dir):
+        log_fatal("can't find arch directory: %s" % arch_dir)
+    binary_path = os.path.join(arch_dir, binary_name)
+    if not os.path.isfile(binary_path):
+        log_fatal("can't find binary: %s" % binary_path)
+    return binary_path
+
+
+def get_host_binary_path(binary_name):
+    dir = os.path.join(get_script_dir(), 'shared_libraries', 'host')
+    if not os.path.isdir(dir):
+        log_fatal("can't find directory: %s" % dir)
+    if is_windows():
+        if so_name.endswith('.so'):
+            so_name = so_name[0:-3] + '.dll'
+        dir = os.path.join(dir, 'windows')
+    elif sys.platform == 'darwin': # OSX
+        if so_name.endswith('.so'):
+            so_name = so_name[0:-3] + '.dylib'
+        dir = os.path.join(dir, 'darwin')
+    else:
+        dir = os.path.join(dir, 'linux')
+    if not os.path.isdir(dir):
+        log_fatal("can't find directory: %s" % dir)
+    binary_path = os.path.join(dir, binary_name)
+    if not os.path.isfile(binary_path):
+        log_fatal("can't find binary: %s" % binary_path)
+    return binary_path
+
+
+class AdbHelper(object):
+    def __init__(self, adb_path):
+        self.adb_path = adb_path
+
+    def run(self, adb_args):
+        return self.run_and_return_output(adb_args)[0]
+
+    def run_and_return_output(self, adb_args):
+        adb_args = [self.adb_path] + adb_args
+        log_debug('run adb cmd: %s' % adb_args)
+        subproc = subprocess.Popen(adb_args, stdout=subprocess.PIPE)
+        (stdoutdata, _) = subproc.communicate()
+        result = (subproc.returncode == 0)
+        if len(stdoutdata) > 0:
+            log_debug(stdoutdata)
+        log_debug('run adb cmd: %s  [result %s]' % (adb_args, result))
+        return (result, stdoutdata)
+
+    def switch_to_root(self):
+        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
+        if not result:
+            return False
+        if stdoutdata.find('root') != -1:
+            return True
+        result, stdoutdata = self.run_and_return_output(['shell', 'getprop', 'ro.build.type'])
+        if not result:
+            return False
+        if stdoutdata.strip() == 'user':
+            return False
+        self.run(['root'])
+        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
+        if result and stdoutdata.find('root') != -1:
+            return True
+        return False
+
+
+logging.getLogger().setLevel(logging.DEBUG)
\ No newline at end of file