blob: ff3b0ae6b2c0d187f2aaca5913f992ccb37f0979 [file] [log] [blame]
#
# Copyright (C) 2019 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import subprocess
import sys
from collections import defaultdict
from pathlib import Path
import hashlib
import argparse
import zipfile
import fnmatch
import tempfile
def silent_call(cmd):
return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
def sha1sum(f):
with open(f, 'rb') as fin:
return hashlib.sha1(fin.read()).hexdigest()
def sha1sum_without_signing_key(filepath):
apk = zipfile.ZipFile(filepath)
l = []
for f in sorted(apk.namelist()):
if f.startswith('META-INF/'):
continue
l.append(hashlib.sha1(apk.read(f)).hexdigest())
l.append(f)
return hashlib.sha1(",".join(l).encode()).hexdigest()
def strip_and_sha1sum(filepath):
# TODO: save striped file in tmp directory to support readonly directory.
tmp_filepath = filepath + '.tmp.no-build-id'
strip_all_and_remove_build_id = lambda: silent_call(
["llvm-strip", "--strip-all", "--keep-section=.ARM.attributes",
"--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath])
try:
if strip_all_and_remove_build_id():
return sha1sum(tmp_filepath)
else:
return sha1sum(filepath)
finally:
if os.path.exists(tmp_filepath):
os.remove(tmp_filepath)
return sha1sum(filepath)
def make_filter_from_whitelists(whitelists, all_targets):
"""Creates a callable filter from a list of whitelist files.
Whitelist can contain pathname patterns or ignored lines. Pathnames are case
insensitive.
For example, this ignores the file "system/build.prop":
SYSTEM/build.prop
This ignores txt files:
*.txt
This ignores files in directory "system/dontcare/"
SYSTEM/dontcare/*
This ignores lines prefixed with pat1 or pat2 in file "system/build.prop":
SYSTEM/build.prop=pat1 pat2
Args:
whitelists: A list of whitelist filenames.
all_targets: A list of targets to compare.
Returns:
A callable object that accepts a file pathname and returns True if the file
is ignored by the whitelists and False when it is not.
"""
ignored_patterns = set()
ignored_lines = defaultdict(list)
for whitelist in whitelists:
if not os.path.isfile(whitelist):
continue
with open(whitelist, 'rb') as f:
for line in f:
pat = line.strip().decode()
if pat and pat[-1] == '\\':
pat = pat.rstrip('\\')
if '=' in pat:
filename, prefixes = pat.split('=', 1)
prefixes = prefixes.split()
if prefixes:
ignored_lines[filename.lower()].extend(prefixes)
elif pat:
ignored_patterns.add(pat.lower())
def diff_with_ignored_lines(filename, prefixes):
"""Compares sha1 digest of file while ignoring lines.
Args:
filename: File to compare among each target.
prefixes: A list of prefixes. Lines that start with prefix are ignored.
Returns:
True if file is identical among each target.
"""
file_digest_respect_ignore = []
for target in all_targets:
pathname = os.path.join(target, filename)
if not os.path.isfile(pathname):
return False
sha1 = hashlib.sha1()
with open(pathname, 'rb') as f:
for line in f:
line_text = line.decode()
if not any(line_text.startswith(prefix) for prefix in prefixes):
sha1.update(line)
file_digest_respect_ignore.append(sha1.hexdigest())
return (len(file_digest_respect_ignore) == len(all_targets) and
len(set(file_digest_respect_ignore)) == 1)
def whitelist_filter(filename):
norm_filename = filename.lower()
for pattern in ignored_patterns:
if fnmatch.fnmatch(norm_filename, pattern):
return True
if norm_filename in ignored_lines:
ignored_prefixes = ignored_lines[norm_filename]
return diff_with_ignored_lines(filename, ignored_prefixes)
return False
return whitelist_filter
def main(all_targets, search_paths, whitelists, ignore_signing_key=False):
def run(path):
is_native_component = silent_call(["llvm-objdump", "-a", path])
is_apk = path.endswith('.apk')
if is_native_component:
return strip_and_sha1sum(path)
elif is_apk and ignore_signing_key:
return sha1sum_without_signing_key(path)
else:
return sha1sum(path)
# artifact_sha1_target_map[filename][sha1] = list of targets
artifact_sha1_target_map = defaultdict(lambda: defaultdict(list))
for target in all_targets:
paths = []
for search_path in search_paths:
for path in Path(target, search_path).glob('**/*'):
if path.exists() and not path.is_dir():
paths.append((str(path), str(path.relative_to(target))))
target_basename = os.path.basename(os.path.normpath(target))
for path, filename in paths:
sha1 = run(path)
artifact_sha1_target_map[filename][sha1].append(target_basename)
def pretty_print(sha1, filename, targets):
return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets))
def is_common(sha1_target_map):
for sha1, targets in sha1_target_map.items():
return len(sha1_target_map) == 1 and len(targets) == len(all_targets)
return False
whitelist_filter = make_filter_from_whitelists(whitelists, all_targets)
common = []
diff = []
whitelisted_diff = []
for filename, sha1_target_map in artifact_sha1_target_map.items():
if is_common(sha1_target_map):
for sha1, targets in sha1_target_map.items():
common.append(pretty_print(sha1, filename, targets))
else:
if whitelist_filter(filename):
for sha1, targets in sha1_target_map.items():
whitelisted_diff.append(pretty_print(sha1, filename, targets))
else:
for sha1, targets in sha1_target_map.items():
diff.append(pretty_print(sha1, filename, targets))
common = sorted(common)
diff = sorted(diff)
whitelisted_diff = sorted(whitelisted_diff)
header = "filename, sha1sum, targets\n"
with open("common.csv", 'w') as fout:
fout.write(header)
fout.writelines(common)
with open("diff.csv", 'w') as fout:
fout.write(header)
fout.writelines(diff)
with open("whitelisted_diff.csv", 'w') as fout:
fout.write(header)
fout.writelines(whitelisted_diff)
def main_with_zip(extracted_paths, args):
for origin_path, tmp_path in zip(args.target, extracted_paths):
unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")]
unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path])
subprocess.call(unzip_cmd)
main(extracted_paths, args.search_path, args.whitelist, args.ignore_signing_key)
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w whitelist1] [-w whitelist2]")
parser.add_argument("-t", "--target", nargs='+', required=True)
parser.add_argument("-s", "--search_path", nargs='+', required=True)
parser.add_argument("-i", "--ignore_signing_key", action='store_true')
parser.add_argument("-u", "--unzip", action='store_true')
parser.add_argument("-p", "--preserve_extracted_files", action='store_true')
parser.add_argument("-w", "--whitelist", action="append", default=[])
args = parser.parse_args()
if len(args.target) < 2:
parser.error("The number of targets has to be at least two.")
if args.unzip:
if args.preserve_extracted_files:
main_with_zip(args.target, args)
else:
with tempfile.TemporaryDirectory() as tmpdir:
target_in_tmp = [os.path.join(tmpdir, t) for t in args.target]
for p in target_in_tmp:
os.makedirs(p)
main_with_zip(target_in_tmp, args)
else:
main(args.target, args.search_path, args.whitelist, args.ignore_signing_key)