Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 1 | # |
| 2 | # Copyright (C) 2019 The Android Open Source Project |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | |
| 16 | import os |
| 17 | import subprocess |
| 18 | import sys |
| 19 | from collections import defaultdict |
Jeongik Cha | 717fdcd | 2019-08-05 15:30:02 +0900 | [diff] [blame] | 20 | from pathlib import Path |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 21 | import hashlib |
| 22 | import argparse |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 23 | import zipfile |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 24 | import fnmatch |
Jeongik Cha | af646f7 | 2019-12-11 21:55:50 +0900 | [diff] [blame] | 25 | import tempfile |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 26 | |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 27 | def silent_call(cmd): |
| 28 | return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0 |
| 29 | |
| 30 | def sha1sum(f): |
| 31 | with open(f, 'rb') as fin: |
| 32 | return hashlib.sha1(fin.read()).hexdigest() |
| 33 | |
Jeongik Cha | b684082 | 2019-07-25 19:05:50 +0900 | [diff] [blame] | 34 | def sha1sum_without_signing_key(filepath): |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 35 | apk = zipfile.ZipFile(filepath) |
| 36 | l = [] |
| 37 | for f in sorted(apk.namelist()): |
| 38 | if f.startswith('META-INF/'): |
| 39 | continue |
| 40 | l.append(hashlib.sha1(apk.read(f)).hexdigest()) |
| 41 | l.append(f) |
| 42 | return hashlib.sha1(",".join(l).encode()).hexdigest() |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 43 | |
| 44 | def strip_and_sha1sum(filepath): |
Jeongik Cha | af646f7 | 2019-12-11 21:55:50 +0900 | [diff] [blame] | 45 | # TODO: save striped file in tmp directory to support readonly directory. |
Yo Chiang | e0765b4 | 2019-11-08 13:57:53 +0800 | [diff] [blame] | 46 | tmp_filepath = filepath + '.tmp.no-build-id' |
Yo Chiang | b783c49 | 2019-11-11 11:48:50 +0800 | [diff] [blame] | 47 | strip_all_and_remove_build_id = lambda: silent_call( |
| 48 | ["llvm-strip", "--strip-all", "--keep-section=.ARM.attributes", |
| 49 | "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath]) |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 50 | try: |
Yo Chiang | e0765b4 | 2019-11-08 13:57:53 +0800 | [diff] [blame] | 51 | if strip_all_and_remove_build_id(): |
| 52 | return sha1sum(tmp_filepath) |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 53 | else: |
| 54 | return sha1sum(filepath) |
| 55 | finally: |
Yo Chiang | e0765b4 | 2019-11-08 13:57:53 +0800 | [diff] [blame] | 56 | if os.path.exists(tmp_filepath): |
| 57 | os.remove(tmp_filepath) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 58 | |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 59 | return sha1sum(filepath) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 60 | |
| 61 | |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 62 | def make_filter_from_whitelists(whitelists, all_targets): |
| 63 | """Creates a callable filter from a list of whitelist files. |
Yo Chiang | b783c49 | 2019-11-11 11:48:50 +0800 | [diff] [blame] | 64 | |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 65 | Whitelist can contain pathname patterns or ignored lines. Pathnames are case |
| 66 | insensitive. |
| 67 | |
| 68 | For example, this ignores the file "system/build.prop": |
| 69 | SYSTEM/build.prop |
| 70 | |
| 71 | This ignores txt files: |
| 72 | *.txt |
| 73 | |
| 74 | This ignores files in directory "system/dontcare/" |
| 75 | SYSTEM/dontcare/* |
| 76 | |
| 77 | This ignores lines prefixed with pat1 or pat2 in file "system/build.prop": |
| 78 | SYSTEM/build.prop=pat1 pat2 |
| 79 | |
| 80 | Args: |
| 81 | whitelists: A list of whitelist filenames. |
| 82 | all_targets: A list of targets to compare. |
| 83 | |
| 84 | Returns: |
| 85 | A callable object that accepts a file pathname and returns True if the file |
| 86 | is ignored by the whitelists and False when it is not. |
| 87 | """ |
| 88 | ignored_patterns = set() |
| 89 | ignored_lines = defaultdict(list) |
| 90 | for whitelist in whitelists: |
| 91 | if not os.path.isfile(whitelist): |
| 92 | continue |
| 93 | with open(whitelist, 'rb') as f: |
| 94 | for line in f: |
| 95 | pat = line.strip().decode() |
| 96 | if pat and pat[-1] == '\\': |
| 97 | pat = pat.rstrip('\\') |
| 98 | if '=' in pat: |
| 99 | filename, prefixes = pat.split('=', 1) |
| 100 | prefixes = prefixes.split() |
| 101 | if prefixes: |
| 102 | ignored_lines[filename.lower()].extend(prefixes) |
| 103 | elif pat: |
| 104 | ignored_patterns.add(pat.lower()) |
| 105 | |
| 106 | def diff_with_ignored_lines(filename, prefixes): |
| 107 | """Compares sha1 digest of file while ignoring lines. |
| 108 | |
| 109 | Args: |
| 110 | filename: File to compare among each target. |
| 111 | prefixes: A list of prefixes. Lines that start with prefix are ignored. |
| 112 | |
| 113 | Returns: |
| 114 | True if file is identical among each target. |
| 115 | """ |
| 116 | file_digest_respect_ignore = [] |
| 117 | for target in all_targets: |
| 118 | pathname = os.path.join(target, filename) |
| 119 | if not os.path.isfile(pathname): |
| 120 | return False |
| 121 | sha1 = hashlib.sha1() |
| 122 | with open(pathname, 'rb') as f: |
| 123 | for line in f: |
| 124 | line_text = line.decode() |
| 125 | if not any(line_text.startswith(prefix) for prefix in prefixes): |
| 126 | sha1.update(line) |
| 127 | file_digest_respect_ignore.append(sha1.hexdigest()) |
| 128 | return (len(file_digest_respect_ignore) == len(all_targets) and |
| 129 | len(set(file_digest_respect_ignore)) == 1) |
| 130 | |
| 131 | def whitelist_filter(filename): |
| 132 | norm_filename = filename.lower() |
| 133 | for pattern in ignored_patterns: |
| 134 | if fnmatch.fnmatch(norm_filename, pattern): |
| 135 | return True |
| 136 | if norm_filename in ignored_lines: |
| 137 | ignored_prefixes = ignored_lines[norm_filename] |
| 138 | return diff_with_ignored_lines(filename, ignored_prefixes) |
| 139 | return False |
| 140 | |
| 141 | return whitelist_filter |
| 142 | |
| 143 | |
| 144 | def main(all_targets, search_paths, whitelists, ignore_signing_key=False): |
Yo Chiang | b783c49 | 2019-11-11 11:48:50 +0800 | [diff] [blame] | 145 | def run(path): |
| 146 | is_native_component = silent_call(["llvm-objdump", "-a", path]) |
| 147 | is_apk = path.endswith('.apk') |
| 148 | if is_native_component: |
| 149 | return strip_and_sha1sum(path) |
| 150 | elif is_apk and ignore_signing_key: |
| 151 | return sha1sum_without_signing_key(path) |
| 152 | else: |
| 153 | return sha1sum(path) |
| 154 | |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 155 | # artifact_sha1_target_map[filename][sha1] = list of targets |
| 156 | artifact_sha1_target_map = defaultdict(lambda: defaultdict(list)) |
Jeongik Cha | b684082 | 2019-07-25 19:05:50 +0900 | [diff] [blame] | 157 | for target in all_targets: |
Yo Chiang | b783c49 | 2019-11-11 11:48:50 +0800 | [diff] [blame] | 158 | paths = [] |
| 159 | for search_path in search_paths: |
| 160 | for path in Path(target, search_path).glob('**/*'): |
| 161 | if path.exists() and not path.is_dir(): |
| 162 | paths.append((str(path), str(path.relative_to(target)))) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 163 | |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 164 | target_basename = os.path.basename(os.path.normpath(target)) |
| 165 | for path, filename in paths: |
| 166 | sha1 = run(path) |
| 167 | artifact_sha1_target_map[filename][sha1].append(target_basename) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 168 | |
Jeongik Cha | b684082 | 2019-07-25 19:05:50 +0900 | [diff] [blame] | 169 | def pretty_print(sha1, filename, targets): |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 170 | return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets)) |
| 171 | |
| 172 | def is_common(sha1_target_map): |
| 173 | for sha1, targets in sha1_target_map.items(): |
| 174 | return len(sha1_target_map) == 1 and len(targets) == len(all_targets) |
| 175 | return False |
| 176 | |
| 177 | whitelist_filter = make_filter_from_whitelists(whitelists, all_targets) |
| 178 | |
| 179 | common = [] |
| 180 | diff = [] |
| 181 | whitelisted_diff = [] |
| 182 | for filename, sha1_target_map in artifact_sha1_target_map.items(): |
| 183 | if is_common(sha1_target_map): |
| 184 | for sha1, targets in sha1_target_map.items(): |
| 185 | common.append(pretty_print(sha1, filename, targets)) |
| 186 | else: |
| 187 | if whitelist_filter(filename): |
| 188 | for sha1, targets in sha1_target_map.items(): |
| 189 | whitelisted_diff.append(pretty_print(sha1, filename, targets)) |
| 190 | else: |
| 191 | for sha1, targets in sha1_target_map.items(): |
| 192 | diff.append(pretty_print(sha1, filename, targets)) |
| 193 | |
| 194 | common = sorted(common) |
| 195 | diff = sorted(diff) |
| 196 | whitelisted_diff = sorted(whitelisted_diff) |
Jeongik Cha | b684082 | 2019-07-25 19:05:50 +0900 | [diff] [blame] | 197 | |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 198 | header = "filename, sha1sum, targets\n" |
Jeongik Cha | b684082 | 2019-07-25 19:05:50 +0900 | [diff] [blame] | 199 | |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 200 | with open("common.csv", 'w') as fout: |
| 201 | fout.write(header) |
| 202 | fout.writelines(common) |
| 203 | with open("diff.csv", 'w') as fout: |
| 204 | fout.write(header) |
| 205 | fout.writelines(diff) |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 206 | with open("whitelisted_diff.csv", 'w') as fout: |
| 207 | fout.write(header) |
| 208 | fout.writelines(whitelisted_diff) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 209 | |
Jeongik Cha | e35e89d | 2019-12-16 19:59:56 +0900 | [diff] [blame] | 210 | def main_with_zip(extracted_paths, args): |
| 211 | for origin_path, tmp_path in zip(args.target, extracted_paths): |
| 212 | unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")] |
| 213 | unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path]) |
| 214 | subprocess.call(unzip_cmd) |
| 215 | main(extracted_paths, args.search_path, args.whitelist, args.ignore_signing_key) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 216 | |
| 217 | if __name__ == "__main__": |
Jeongik Cha | e35e89d | 2019-12-16 19:59:56 +0900 | [diff] [blame] | 218 | parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w whitelist1] [-w whitelist2]") |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 219 | parser.add_argument("-t", "--target", nargs='+', required=True) |
| 220 | parser.add_argument("-s", "--search_path", nargs='+', required=True) |
Jeongik Cha | 5864907 | 2019-07-12 22:40:03 +0900 | [diff] [blame] | 221 | parser.add_argument("-i", "--ignore_signing_key", action='store_true') |
Jeongik Cha | 5e22d28 | 2019-08-07 10:43:20 +0900 | [diff] [blame] | 222 | parser.add_argument("-u", "--unzip", action='store_true') |
Jeongik Cha | e35e89d | 2019-12-16 19:59:56 +0900 | [diff] [blame] | 223 | parser.add_argument("-p", "--preserve_extracted_files", action='store_true') |
Yo Chiang | 9039ad4 | 2019-11-11 17:07:45 +0800 | [diff] [blame] | 224 | parser.add_argument("-w", "--whitelist", action="append", default=[]) |
Jeongik Cha | 062f2b0 | 2019-06-13 18:19:30 +0900 | [diff] [blame] | 225 | args = parser.parse_args() |
| 226 | if len(args.target) < 2: |
| 227 | parser.error("The number of targets has to be at least two.") |
Jeongik Cha | 5e22d28 | 2019-08-07 10:43:20 +0900 | [diff] [blame] | 228 | if args.unzip: |
Jeongik Cha | e35e89d | 2019-12-16 19:59:56 +0900 | [diff] [blame] | 229 | if args.preserve_extracted_files: |
| 230 | main_with_zip(args.target, args) |
| 231 | else: |
| 232 | with tempfile.TemporaryDirectory() as tmpdir: |
| 233 | target_in_tmp = [os.path.join(tmpdir, t) for t in args.target] |
| 234 | for p in target_in_tmp: |
| 235 | os.makedirs(p) |
| 236 | main_with_zip(target_in_tmp, args) |
Jeongik Cha | af646f7 | 2019-12-11 21:55:50 +0900 | [diff] [blame] | 237 | else: |
| 238 | main(args.target, args.search_path, args.whitelist, args.ignore_signing_key) |