blob: ff3b0ae6b2c0d187f2aaca5913f992ccb37f0979 [file] [log] [blame]
Jeongik Cha062f2b02019-06-13 18:19:30 +09001#
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import os
17import subprocess
18import sys
19from collections import defaultdict
Jeongik Cha717fdcd2019-08-05 15:30:02 +090020from pathlib import Path
Jeongik Cha062f2b02019-06-13 18:19:30 +090021import hashlib
22import argparse
Jeongik Cha58649072019-07-12 22:40:03 +090023import zipfile
Yo Chiang9039ad42019-11-11 17:07:45 +080024import fnmatch
Jeongik Chaaf646f72019-12-11 21:55:50 +090025import tempfile
Jeongik Cha062f2b02019-06-13 18:19:30 +090026
Jeongik Cha58649072019-07-12 22:40:03 +090027def silent_call(cmd):
28 return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
29
30def sha1sum(f):
31 with open(f, 'rb') as fin:
32 return hashlib.sha1(fin.read()).hexdigest()
33
Jeongik Chab6840822019-07-25 19:05:50 +090034def sha1sum_without_signing_key(filepath):
Jeongik Cha58649072019-07-12 22:40:03 +090035 apk = zipfile.ZipFile(filepath)
36 l = []
37 for f in sorted(apk.namelist()):
38 if f.startswith('META-INF/'):
39 continue
40 l.append(hashlib.sha1(apk.read(f)).hexdigest())
41 l.append(f)
42 return hashlib.sha1(",".join(l).encode()).hexdigest()
Jeongik Cha062f2b02019-06-13 18:19:30 +090043
44def strip_and_sha1sum(filepath):
Jeongik Chaaf646f72019-12-11 21:55:50 +090045 # TODO: save striped file in tmp directory to support readonly directory.
Yo Chiange0765b42019-11-08 13:57:53 +080046 tmp_filepath = filepath + '.tmp.no-build-id'
Yo Chiangb783c492019-11-11 11:48:50 +080047 strip_all_and_remove_build_id = lambda: silent_call(
48 ["llvm-strip", "--strip-all", "--keep-section=.ARM.attributes",
49 "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath])
Jeongik Cha58649072019-07-12 22:40:03 +090050 try:
Yo Chiange0765b42019-11-08 13:57:53 +080051 if strip_all_and_remove_build_id():
52 return sha1sum(tmp_filepath)
Jeongik Cha58649072019-07-12 22:40:03 +090053 else:
54 return sha1sum(filepath)
55 finally:
Yo Chiange0765b42019-11-08 13:57:53 +080056 if os.path.exists(tmp_filepath):
57 os.remove(tmp_filepath)
Jeongik Cha062f2b02019-06-13 18:19:30 +090058
Jeongik Cha58649072019-07-12 22:40:03 +090059 return sha1sum(filepath)
Jeongik Cha062f2b02019-06-13 18:19:30 +090060
61
Yo Chiang9039ad42019-11-11 17:07:45 +080062def make_filter_from_whitelists(whitelists, all_targets):
63 """Creates a callable filter from a list of whitelist files.
Yo Chiangb783c492019-11-11 11:48:50 +080064
Yo Chiang9039ad42019-11-11 17:07:45 +080065 Whitelist can contain pathname patterns or ignored lines. Pathnames are case
66 insensitive.
67
68 For example, this ignores the file "system/build.prop":
69 SYSTEM/build.prop
70
71 This ignores txt files:
72 *.txt
73
74 This ignores files in directory "system/dontcare/"
75 SYSTEM/dontcare/*
76
77 This ignores lines prefixed with pat1 or pat2 in file "system/build.prop":
78 SYSTEM/build.prop=pat1 pat2
79
80 Args:
81 whitelists: A list of whitelist filenames.
82 all_targets: A list of targets to compare.
83
84 Returns:
85 A callable object that accepts a file pathname and returns True if the file
86 is ignored by the whitelists and False when it is not.
87 """
88 ignored_patterns = set()
89 ignored_lines = defaultdict(list)
90 for whitelist in whitelists:
91 if not os.path.isfile(whitelist):
92 continue
93 with open(whitelist, 'rb') as f:
94 for line in f:
95 pat = line.strip().decode()
96 if pat and pat[-1] == '\\':
97 pat = pat.rstrip('\\')
98 if '=' in pat:
99 filename, prefixes = pat.split('=', 1)
100 prefixes = prefixes.split()
101 if prefixes:
102 ignored_lines[filename.lower()].extend(prefixes)
103 elif pat:
104 ignored_patterns.add(pat.lower())
105
106 def diff_with_ignored_lines(filename, prefixes):
107 """Compares sha1 digest of file while ignoring lines.
108
109 Args:
110 filename: File to compare among each target.
111 prefixes: A list of prefixes. Lines that start with prefix are ignored.
112
113 Returns:
114 True if file is identical among each target.
115 """
116 file_digest_respect_ignore = []
117 for target in all_targets:
118 pathname = os.path.join(target, filename)
119 if not os.path.isfile(pathname):
120 return False
121 sha1 = hashlib.sha1()
122 with open(pathname, 'rb') as f:
123 for line in f:
124 line_text = line.decode()
125 if not any(line_text.startswith(prefix) for prefix in prefixes):
126 sha1.update(line)
127 file_digest_respect_ignore.append(sha1.hexdigest())
128 return (len(file_digest_respect_ignore) == len(all_targets) and
129 len(set(file_digest_respect_ignore)) == 1)
130
131 def whitelist_filter(filename):
132 norm_filename = filename.lower()
133 for pattern in ignored_patterns:
134 if fnmatch.fnmatch(norm_filename, pattern):
135 return True
136 if norm_filename in ignored_lines:
137 ignored_prefixes = ignored_lines[norm_filename]
138 return diff_with_ignored_lines(filename, ignored_prefixes)
139 return False
140
141 return whitelist_filter
142
143
144def main(all_targets, search_paths, whitelists, ignore_signing_key=False):
Yo Chiangb783c492019-11-11 11:48:50 +0800145 def run(path):
146 is_native_component = silent_call(["llvm-objdump", "-a", path])
147 is_apk = path.endswith('.apk')
148 if is_native_component:
149 return strip_and_sha1sum(path)
150 elif is_apk and ignore_signing_key:
151 return sha1sum_without_signing_key(path)
152 else:
153 return sha1sum(path)
154
Yo Chiang9039ad42019-11-11 17:07:45 +0800155 # artifact_sha1_target_map[filename][sha1] = list of targets
156 artifact_sha1_target_map = defaultdict(lambda: defaultdict(list))
Jeongik Chab6840822019-07-25 19:05:50 +0900157 for target in all_targets:
Yo Chiangb783c492019-11-11 11:48:50 +0800158 paths = []
159 for search_path in search_paths:
160 for path in Path(target, search_path).glob('**/*'):
161 if path.exists() and not path.is_dir():
162 paths.append((str(path), str(path.relative_to(target))))
Jeongik Cha062f2b02019-06-13 18:19:30 +0900163
Yo Chiang9039ad42019-11-11 17:07:45 +0800164 target_basename = os.path.basename(os.path.normpath(target))
165 for path, filename in paths:
166 sha1 = run(path)
167 artifact_sha1_target_map[filename][sha1].append(target_basename)
Jeongik Cha062f2b02019-06-13 18:19:30 +0900168
Jeongik Chab6840822019-07-25 19:05:50 +0900169 def pretty_print(sha1, filename, targets):
Yo Chiang9039ad42019-11-11 17:07:45 +0800170 return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets))
171
172 def is_common(sha1_target_map):
173 for sha1, targets in sha1_target_map.items():
174 return len(sha1_target_map) == 1 and len(targets) == len(all_targets)
175 return False
176
177 whitelist_filter = make_filter_from_whitelists(whitelists, all_targets)
178
179 common = []
180 diff = []
181 whitelisted_diff = []
182 for filename, sha1_target_map in artifact_sha1_target_map.items():
183 if is_common(sha1_target_map):
184 for sha1, targets in sha1_target_map.items():
185 common.append(pretty_print(sha1, filename, targets))
186 else:
187 if whitelist_filter(filename):
188 for sha1, targets in sha1_target_map.items():
189 whitelisted_diff.append(pretty_print(sha1, filename, targets))
190 else:
191 for sha1, targets in sha1_target_map.items():
192 diff.append(pretty_print(sha1, filename, targets))
193
194 common = sorted(common)
195 diff = sorted(diff)
196 whitelisted_diff = sorted(whitelisted_diff)
Jeongik Chab6840822019-07-25 19:05:50 +0900197
Jeongik Cha062f2b02019-06-13 18:19:30 +0900198 header = "filename, sha1sum, targets\n"
Jeongik Chab6840822019-07-25 19:05:50 +0900199
Jeongik Cha062f2b02019-06-13 18:19:30 +0900200 with open("common.csv", 'w') as fout:
201 fout.write(header)
202 fout.writelines(common)
203 with open("diff.csv", 'w') as fout:
204 fout.write(header)
205 fout.writelines(diff)
Yo Chiang9039ad42019-11-11 17:07:45 +0800206 with open("whitelisted_diff.csv", 'w') as fout:
207 fout.write(header)
208 fout.writelines(whitelisted_diff)
Jeongik Cha062f2b02019-06-13 18:19:30 +0900209
Jeongik Chae35e89d2019-12-16 19:59:56 +0900210def main_with_zip(extracted_paths, args):
211 for origin_path, tmp_path in zip(args.target, extracted_paths):
212 unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")]
213 unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path])
214 subprocess.call(unzip_cmd)
215 main(extracted_paths, args.search_path, args.whitelist, args.ignore_signing_key)
Jeongik Cha062f2b02019-06-13 18:19:30 +0900216
217if __name__ == "__main__":
Jeongik Chae35e89d2019-12-16 19:59:56 +0900218 parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w whitelist1] [-w whitelist2]")
Jeongik Cha062f2b02019-06-13 18:19:30 +0900219 parser.add_argument("-t", "--target", nargs='+', required=True)
220 parser.add_argument("-s", "--search_path", nargs='+', required=True)
Jeongik Cha58649072019-07-12 22:40:03 +0900221 parser.add_argument("-i", "--ignore_signing_key", action='store_true')
Jeongik Cha5e22d282019-08-07 10:43:20 +0900222 parser.add_argument("-u", "--unzip", action='store_true')
Jeongik Chae35e89d2019-12-16 19:59:56 +0900223 parser.add_argument("-p", "--preserve_extracted_files", action='store_true')
Yo Chiang9039ad42019-11-11 17:07:45 +0800224 parser.add_argument("-w", "--whitelist", action="append", default=[])
Jeongik Cha062f2b02019-06-13 18:19:30 +0900225 args = parser.parse_args()
226 if len(args.target) < 2:
227 parser.error("The number of targets has to be at least two.")
Jeongik Cha5e22d282019-08-07 10:43:20 +0900228 if args.unzip:
Jeongik Chae35e89d2019-12-16 19:59:56 +0900229 if args.preserve_extracted_files:
230 main_with_zip(args.target, args)
231 else:
232 with tempfile.TemporaryDirectory() as tmpdir:
233 target_in_tmp = [os.path.join(tmpdir, t) for t in args.target]
234 for p in target_in_tmp:
235 os.makedirs(p)
236 main_with_zip(target_in_tmp, args)
Jeongik Chaaf646f72019-12-11 21:55:50 +0900237 else:
238 main(args.target, args.search_path, args.whitelist, args.ignore_signing_key)