blob: c7a9e2b5991298707ca86e891f098376cce393b8 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Tao Bao8dcf7382015-05-21 14:09:49 -070019import common
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070024import re
25import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import threading
27import tempfile
28
Dan Albert8b72aef2015-03-23 19:13:21 -070029from rangelib import RangeSet
30
Doug Zongkerfc44a512014-08-26 13:10:25 -070031
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070032__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
33
Dan Albert8b72aef2015-03-23 19:13:21 -070034
Doug Zongkerfc44a512014-08-26 13:10:25 -070035def compute_patch(src, tgt, imgdiff=False):
36 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
37 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
38 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
39 os.close(patchfd)
40
41 try:
42 with os.fdopen(srcfd, "wb") as f_src:
43 for p in src:
44 f_src.write(p)
45
46 with os.fdopen(tgtfd, "wb") as f_tgt:
47 for p in tgt:
48 f_tgt.write(p)
49 try:
50 os.unlink(patchfile)
51 except OSError:
52 pass
53 if imgdiff:
54 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
55 stdout=open("/dev/null", "a"),
56 stderr=subprocess.STDOUT)
57 else:
58 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
59
60 if p:
61 raise ValueError("diff failed: " + str(p))
62
63 with open(patchfile, "rb") as f:
64 return f.read()
65 finally:
66 try:
67 os.unlink(srcfile)
68 os.unlink(tgtfile)
69 os.unlink(patchfile)
70 except OSError:
71 pass
72
Dan Albert8b72aef2015-03-23 19:13:21 -070073
74class Image(object):
75 def ReadRangeSet(self, ranges):
76 raise NotImplementedError
77
Tao Bao68658c02015-06-01 13:40:49 -070078 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070079 raise NotImplementedError
80
81
82class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 """A zero-length image."""
84 blocksize = 4096
85 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070086 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070087 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 total_blocks = 0
89 file_map = {}
90 def ReadRangeSet(self, ranges):
91 return ()
Tao Bao68658c02015-06-01 13:40:49 -070092 def TotalSha1(self, include_clobbered_blocks=False):
93 # EmptyImage always carries empty clobbered_blocks, so
94 # include_clobbered_blocks can be ignored.
95 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070096 return sha1().hexdigest()
97
98
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
109 if partial > 0:
110 if trim:
111 self.data = self.data[:-partial]
112 elif pad:
113 self.data += '\0' * (self.blocksize - partial)
114 else:
115 raise ValueError(("data for DataImage must be multiple of %d bytes "
116 "unless trim or pad is specified") %
117 (self.blocksize,))
118
119 assert len(self.data) % self.blocksize == 0
120
121 self.total_blocks = len(self.data) / self.blocksize
122 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Baoff777812015-05-12 11:42:31 -0700123 self.clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -0700124 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700125
126 zero_blocks = []
127 nonzero_blocks = []
128 reference = '\0' * self.blocksize
129
130 for i in range(self.total_blocks):
131 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
132 if d == reference:
133 zero_blocks.append(i)
134 zero_blocks.append(i+1)
135 else:
136 nonzero_blocks.append(i)
137 nonzero_blocks.append(i+1)
138
139 self.file_map = {"__ZERO": RangeSet(zero_blocks),
140 "__NONZERO": RangeSet(nonzero_blocks)}
141
142 def ReadRangeSet(self, ranges):
143 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
144
Tao Bao68658c02015-06-01 13:40:49 -0700145 def TotalSha1(self, include_clobbered_blocks=False):
146 # DataImage always carries empty clobbered_blocks, so
147 # include_clobbered_blocks can be ignored.
Tao Baoff777812015-05-12 11:42:31 -0700148 assert self.clobbered_blocks.size() == 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700149 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700150
Doug Zongkerfc44a512014-08-26 13:10:25 -0700151
152class Transfer(object):
153 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
154 self.tgt_name = tgt_name
155 self.src_name = src_name
156 self.tgt_ranges = tgt_ranges
157 self.src_ranges = src_ranges
158 self.style = style
159 self.intact = (getattr(tgt_ranges, "monotonic", False) and
160 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700161
162 # We use OrderedDict rather than dict so that the output is repeatable;
163 # otherwise it would depend on the hash values of the Transfer objects.
164 self.goes_before = OrderedDict()
165 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700166
Doug Zongker62338182014-09-08 08:29:55 -0700167 self.stash_before = []
168 self.use_stash = []
169
Doug Zongkerfc44a512014-08-26 13:10:25 -0700170 self.id = len(by_id)
171 by_id.append(self)
172
Doug Zongker62338182014-09-08 08:29:55 -0700173 def NetStashChange(self):
174 return (sum(sr.size() for (_, sr) in self.stash_before) -
175 sum(sr.size() for (_, sr) in self.use_stash))
176
Tao Bao82c47982015-08-17 09:45:13 -0700177 def ConvertToNew(self):
178 assert self.style != "new"
179 self.use_stash = []
180 self.style = "new"
181 self.src_ranges = RangeSet()
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183 def __str__(self):
184 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
185 " to " + str(self.tgt_ranges) + ">")
186
187
188# BlockImageDiff works on two image objects. An image object is
189# anything that provides the following attributes:
190#
191# blocksize: the size in bytes of a block, currently must be 4096.
192#
193# total_blocks: the total size of the partition/image, in blocks.
194#
195# care_map: a RangeSet containing which blocks (in the range [0,
196# total_blocks) we actually care about; i.e. which blocks contain
197# data.
198#
199# file_map: a dict that partitions the blocks contained in care_map
200# into smaller domains that are useful for doing diffs on.
201# (Typically a domain is a file, and the key in file_map is the
202# pathname.)
203#
Tao Baoff777812015-05-12 11:42:31 -0700204# clobbered_blocks: a RangeSet containing which blocks contain data
205# but may be altered by the FS. They need to be excluded when
206# verifying the partition integrity.
207#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700208# ReadRangeSet(): a function that takes a RangeSet and returns the
209# data contained in the image blocks of that RangeSet. The data
210# is returned as a list or tuple of strings; concatenating the
211# elements together should produce the requested data.
212# Implementations are free to break up the data into list/tuple
213# elements in any way that is convenient.
214#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700215# TotalSha1(): a function that returns (as a hex string) the SHA-1
216# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700217# care_map minus clobbered_blocks, or including the clobbered
218# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700219#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700220# When creating a BlockImageDiff, the src image may be None, in which
221# case the list of transfers produced will never read from the
222# original image.
223
224class BlockImageDiff(object):
Sami Tolvanendd67a292014-12-09 16:40:34 +0000225 def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700226 if threads is None:
227 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700228 if threads == 0:
229 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700230 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700231 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700232 self.transfers = []
233 self.src_basenames = {}
234 self.src_numpatterns = {}
Doug Zongker62338182014-09-08 08:29:55 -0700235
Sami Tolvanendd67a292014-12-09 16:40:34 +0000236 assert version in (1, 2, 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700237
238 self.tgt = tgt
239 if src is None:
240 src = EmptyImage()
241 self.src = src
242
243 # The updater code that installs the patch always uses 4k blocks.
244 assert tgt.blocksize == 4096
245 assert src.blocksize == 4096
246
247 # The range sets in each filemap should comprise a partition of
248 # the care map.
249 self.AssertPartition(src.care_map, src.file_map.values())
250 self.AssertPartition(tgt.care_map, tgt.file_map.values())
251
252 def Compute(self, prefix):
253 # When looking for a source file to use as the diff input for a
254 # target file, we try:
255 # 1) an exact path match if available, otherwise
256 # 2) a exact basename match if available, otherwise
257 # 3) a basename match after all runs of digits are replaced by
258 # "#" if available, otherwise
259 # 4) we have no source for this target.
260 self.AbbreviateSourceNames()
261 self.FindTransfers()
262
263 # Find the ordering dependencies among transfers (this is O(n^2)
264 # in the number of transfers).
265 self.GenerateDigraph()
266 # Find a sequence of transfers that satisfies as many ordering
267 # dependencies as possible (heuristically).
268 self.FindVertexSequence()
269 # Fix up the ordering dependencies that the sequence didn't
270 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700271 if self.version == 1:
272 self.RemoveBackwardEdges()
273 else:
274 self.ReverseBackwardEdges()
275 self.ImproveVertexSequence()
276
Tao Bao82c47982015-08-17 09:45:13 -0700277 # Ensure the runtime stash size is under the limit.
278 if self.version >= 2 and common.OPTIONS.cache_size is not None:
279 self.ReviseStashSize()
280
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281 # Double-check our work.
282 self.AssertSequenceGood()
283
284 self.ComputePatches(prefix)
285 self.WriteTransfers(prefix)
286
Dan Albert8b72aef2015-03-23 19:13:21 -0700287 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000288 data = source.ReadRangeSet(ranges)
289 ctx = sha1()
290
291 for p in data:
292 ctx.update(p)
293
294 return ctx.hexdigest()
295
Doug Zongkerfc44a512014-08-26 13:10:25 -0700296 def WriteTransfers(self, prefix):
297 out = []
298
Doug Zongkerfc44a512014-08-26 13:10:25 -0700299 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300
Doug Zongker62338182014-09-08 08:29:55 -0700301 stashes = {}
302 stashed_blocks = 0
303 max_stashed_blocks = 0
304
305 free_stash_ids = []
306 next_stash_id = 0
307
Doug Zongkerfc44a512014-08-26 13:10:25 -0700308 for xf in self.transfers:
309
Doug Zongker62338182014-09-08 08:29:55 -0700310 if self.version < 2:
311 assert not xf.stash_before
312 assert not xf.use_stash
313
314 for s, sr in xf.stash_before:
315 assert s not in stashes
316 if free_stash_ids:
317 sid = heapq.heappop(free_stash_ids)
318 else:
319 sid = next_stash_id
320 next_stash_id += 1
321 stashes[s] = sid
322 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000323 if self.version == 2:
324 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
325 else:
326 sh = self.HashBlocks(self.src, sr)
327 if sh in stashes:
328 stashes[sh] += 1
329 else:
330 stashes[sh] = 1
331 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700332
333 if stashed_blocks > max_stashed_blocks:
334 max_stashed_blocks = stashed_blocks
335
Jesse Zhao7b985f62015-03-02 16:53:08 -0800336 free_string = []
337
Doug Zongker62338182014-09-08 08:29:55 -0700338 if self.version == 1:
Dan Albert8b72aef2015-03-23 19:13:21 -0700339 src_str = xf.src_ranges.to_string_raw()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000340 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700341
342 # <# blocks> <src ranges>
343 # OR
344 # <# blocks> <src ranges> <src locs> <stash refs...>
345 # OR
346 # <# blocks> - <stash refs...>
347
348 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700349 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700350
351 unstashed_src_ranges = xf.src_ranges
352 mapped_stashes = []
353 for s, sr in xf.use_stash:
354 sid = stashes.pop(s)
355 stashed_blocks -= sr.size()
356 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000357 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700358 sr = xf.src_ranges.map_within(sr)
359 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000360 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700361 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700362 # A stash will be used only once. We need to free the stash
363 # immediately after the use, instead of waiting for the automatic
364 # clean-up at the end. Because otherwise it may take up extra space
365 # and lead to OTA failures.
366 # Bug: 23119955
367 free_string.append("free %d\n" % (sid,))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000368 else:
369 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700370 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000371 stashes[sh] -= 1
372 if stashes[sh] == 0:
373 free_string.append("free %s\n" % (sh))
374 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700375 heapq.heappush(free_stash_ids, sid)
376
377 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700378 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700379 if xf.use_stash:
380 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700381 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700382 mapped_stashes.append(mapped_unstashed)
383 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
384 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700385 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700386 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
387
Dan Albert8b72aef2015-03-23 19:13:21 -0700388 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700389
Sami Tolvanendd67a292014-12-09 16:40:34 +0000390 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700391 # zero <rangeset>
392 # new <rangeset>
393 # erase <rangeset>
394 #
395 # version 1:
396 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
397 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
398 # move <src rangeset> <tgt rangeset>
399 #
400 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700401 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
402 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
403 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000404 #
405 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700406 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
407 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
408 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700409
410 tgt_size = xf.tgt_ranges.size()
411
412 if xf.style == "new":
413 assert xf.tgt_ranges
414 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
415 total += tgt_size
416 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700417 assert xf.tgt_ranges
418 assert xf.src_ranges.size() == tgt_size
419 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700420 if self.version == 1:
421 out.append("%s %s %s\n" % (
422 xf.style,
423 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
424 elif self.version == 2:
425 out.append("%s %s %s\n" % (
426 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700427 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000428 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100429 # take into account automatic stashing of overlapping blocks
430 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700431 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100432 if temp_stash_usage > max_stashed_blocks:
433 max_stashed_blocks = temp_stash_usage
434
Sami Tolvanendd67a292014-12-09 16:40:34 +0000435 out.append("%s %s %s %s\n" % (
436 xf.style,
437 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700438 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700439 total += tgt_size
440 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700441 assert xf.tgt_ranges
442 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700443 if self.version == 1:
444 out.append("%s %d %d %s %s\n" % (
445 xf.style, xf.patch_start, xf.patch_len,
446 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
447 elif self.version == 2:
448 out.append("%s %d %d %s %s\n" % (
449 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700450 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000451 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100452 # take into account automatic stashing of overlapping blocks
453 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700454 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100455 if temp_stash_usage > max_stashed_blocks:
456 max_stashed_blocks = temp_stash_usage
457
Sami Tolvanendd67a292014-12-09 16:40:34 +0000458 out.append("%s %d %d %s %s %s %s\n" % (
459 xf.style,
460 xf.patch_start, xf.patch_len,
461 self.HashBlocks(self.src, xf.src_ranges),
462 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700463 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700464 total += tgt_size
465 elif xf.style == "zero":
466 assert xf.tgt_ranges
467 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
468 if to_zero:
469 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
470 total += to_zero.size()
471 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700472 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700473
Sami Tolvanendd67a292014-12-09 16:40:34 +0000474 if free_string:
475 out.append("".join(free_string))
476
Tao Bao575d68a2015-08-07 19:49:45 -0700477 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700478 # Sanity check: abort if we're going to need more stash space than
479 # the allowed size (cache_size * threshold). There are two purposes
480 # of having a threshold here. a) Part of the cache may have been
481 # occupied by some recovery logs. b) It will buy us some time to deal
482 # with the oversize issue.
483 cache_size = common.OPTIONS.cache_size
484 stash_threshold = common.OPTIONS.stash_threshold
485 max_allowed = cache_size * stash_threshold
486 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
487 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
488 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
489 self.tgt.blocksize, max_allowed, cache_size,
490 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700491
Tao Baoe9b61912015-07-09 17:37:49 -0700492 # Zero out extended blocks as a workaround for bug 20881595.
493 if self.tgt.extended:
494 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
495
496 # We erase all the blocks on the partition that a) don't contain useful
497 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700498 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700499 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
500 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
501 if new_dontcare:
502 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700503
504 out.insert(0, "%d\n" % (self.version,)) # format version number
505 out.insert(1, str(total) + "\n")
506 if self.version >= 2:
507 # version 2 only: after the total block count, we give the number
508 # of stash slots needed, and the maximum size needed (in blocks)
509 out.insert(2, str(next_stash_id) + "\n")
510 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700511
512 with open(prefix + ".transfer.list", "wb") as f:
513 for i in out:
514 f.write(i)
515
Doug Zongker62338182014-09-08 08:29:55 -0700516 if self.version >= 2:
Tao Bao8dcf7382015-05-21 14:09:49 -0700517 max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700518 OPTIONS = common.OPTIONS
519 if OPTIONS.cache_size is not None:
520 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
521 print("max stashed blocks: %d (%d bytes), "
522 "limit: %d bytes (%.2f%%)\n" % (
523 max_stashed_blocks, max_stashed_size, max_allowed,
524 max_stashed_size * 100.0 / max_allowed))
525 else:
526 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
527 max_stashed_blocks, max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700528
Tao Bao82c47982015-08-17 09:45:13 -0700529 def ReviseStashSize(self):
530 print("Revising stash size...")
531 stashes = {}
532
533 # Create the map between a stash and its def/use points. For example, for a
534 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
535 for xf in self.transfers:
536 # Command xf defines (stores) all the stashes in stash_before.
537 for idx, sr in xf.stash_before:
538 stashes[idx] = (sr, xf)
539
540 # Record all the stashes command xf uses.
541 for idx, _ in xf.use_stash:
542 stashes[idx] += (xf,)
543
544 # Compute the maximum blocks available for stash based on /cache size and
545 # the threshold.
546 cache_size = common.OPTIONS.cache_size
547 stash_threshold = common.OPTIONS.stash_threshold
548 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
549
550 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700551 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700552
553 # Now go through all the commands. Compute the required stash size on the
554 # fly. If a command requires excess stash than available, it deletes the
555 # stash by replacing the command that uses the stash with a "new" command
556 # instead.
557 for xf in self.transfers:
558 replaced_cmds = []
559
560 # xf.stash_before generates explicit stash commands.
561 for idx, sr in xf.stash_before:
562 if stashed_blocks + sr.size() > max_allowed:
563 # We cannot stash this one for a later command. Find out the command
564 # that will use this stash and replace the command with "new".
565 use_cmd = stashes[idx][2]
566 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700567 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700568 else:
569 stashed_blocks += sr.size()
570
571 # xf.use_stash generates free commands.
572 for _, sr in xf.use_stash:
573 stashed_blocks -= sr.size()
574
575 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
576 # ComputePatches(), they both have the style of "diff".
577 if xf.style == "diff" and self.version >= 3:
578 assert xf.tgt_ranges and xf.src_ranges
579 if xf.src_ranges.overlaps(xf.tgt_ranges):
580 if stashed_blocks + xf.src_ranges.size() > max_allowed:
581 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700582 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700583
584 # Replace the commands in replaced_cmds with "new"s.
585 for cmd in replaced_cmds:
586 # It no longer uses any commands in "use_stash". Remove the def points
587 # for all those stashes.
588 for idx, sr in cmd.use_stash:
589 def_cmd = stashes[idx][1]
590 assert (idx, sr) in def_cmd.stash_before
591 def_cmd.stash_before.remove((idx, sr))
Tao Bao9a5caf22015-08-25 15:10:10 -0700592 new_blocks += sr.size()
Tao Bao82c47982015-08-17 09:45:13 -0700593
594 cmd.ConvertToNew()
595
Tao Bao9a5caf22015-08-25 15:10:10 -0700596 print(" Total %d blocks are packed as new blocks due to insufficient "
597 "cache size." % (new_blocks,))
598
Doug Zongkerfc44a512014-08-26 13:10:25 -0700599 def ComputePatches(self, prefix):
600 print("Reticulating splines...")
601 diff_q = []
602 patch_num = 0
603 with open(prefix + ".new.dat", "wb") as new_f:
604 for xf in self.transfers:
605 if xf.style == "zero":
606 pass
607 elif xf.style == "new":
608 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
609 new_f.write(piece)
610 elif xf.style == "diff":
611 src = self.src.ReadRangeSet(xf.src_ranges)
612 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
613
614 # We can't compare src and tgt directly because they may have
615 # the same content but be broken up into blocks differently, eg:
616 #
617 # ["he", "llo"] vs ["h", "ello"]
618 #
619 # We want those to compare equal, ideally without having to
620 # actually concatenate the strings (these may be tens of
621 # megabytes).
622
623 src_sha1 = sha1()
624 for p in src:
625 src_sha1.update(p)
626 tgt_sha1 = sha1()
627 tgt_size = 0
628 for p in tgt:
629 tgt_sha1.update(p)
630 tgt_size += len(p)
631
632 if src_sha1.digest() == tgt_sha1.digest():
633 # These are identical; we don't need to generate a patch,
634 # just issue copy commands on the device.
635 xf.style = "move"
636 else:
637 # For files in zip format (eg, APKs, JARs, etc.) we would
638 # like to use imgdiff -z if possible (because it usually
639 # produces significantly smaller patches than bsdiff).
640 # This is permissible if:
641 #
642 # - the source and target files are monotonic (ie, the
643 # data is stored with blocks in increasing order), and
644 # - we haven't removed any blocks from the source set.
645 #
646 # If these conditions are satisfied then appending all the
647 # blocks in the set together in order will produce a valid
648 # zip file (plus possibly extra zeros in the last block),
649 # which is what imgdiff needs to operate. (imgdiff is
650 # fine with extra zeros at the end of the file.)
651 imgdiff = (xf.intact and
652 xf.tgt_name.split(".")[-1].lower()
653 in ("apk", "jar", "zip"))
654 xf.style = "imgdiff" if imgdiff else "bsdiff"
655 diff_q.append((tgt_size, src, tgt, xf, patch_num))
656 patch_num += 1
657
658 else:
659 assert False, "unknown style " + xf.style
660
661 if diff_q:
662 if self.threads > 1:
663 print("Computing patches (using %d threads)..." % (self.threads,))
664 else:
665 print("Computing patches...")
666 diff_q.sort()
667
668 patches = [None] * patch_num
669
Dan Albert8b72aef2015-03-23 19:13:21 -0700670 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700671 lock = threading.Lock()
672 def diff_worker():
673 while True:
674 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700675 if not diff_q:
676 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700677 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
678 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
679 size = len(patch)
680 with lock:
681 patches[patchnum] = (patch, xf)
682 print("%10d %10d (%6.2f%%) %7s %s" % (
683 size, tgt_size, size * 100.0 / tgt_size, xf.style,
684 xf.tgt_name if xf.tgt_name == xf.src_name else (
685 xf.tgt_name + " (from " + xf.src_name + ")")))
686
687 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700688 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700689 for th in threads:
690 th.start()
691 while threads:
692 threads.pop().join()
693 else:
694 patches = []
695
696 p = 0
697 with open(prefix + ".patch.dat", "wb") as patch_f:
698 for patch, xf in patches:
699 xf.patch_start = p
700 xf.patch_len = len(patch)
701 patch_f.write(patch)
702 p += len(patch)
703
704 def AssertSequenceGood(self):
705 # Simulate the sequences of transfers we will output, and check that:
706 # - we never read a block after writing it, and
707 # - we write every block we care about exactly once.
708
709 # Start with no blocks having been touched yet.
710 touched = RangeSet()
711
712 # Imagine processing the transfers in order.
713 for xf in self.transfers:
714 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700715
716 x = xf.src_ranges
717 if self.version >= 2:
718 for _, sr in xf.use_stash:
719 x = x.subtract(sr)
720
721 assert not touched.overlaps(x)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700722 # Check that the output blocks for this transfer haven't yet been touched.
723 assert not touched.overlaps(xf.tgt_ranges)
724 # Touch all the blocks written by this transfer.
725 touched = touched.union(xf.tgt_ranges)
726
727 # Check that we've written every target block.
728 assert touched == self.tgt.care_map
729
Doug Zongker62338182014-09-08 08:29:55 -0700730 def ImproveVertexSequence(self):
731 print("Improving vertex order...")
732
733 # At this point our digraph is acyclic; we reversed any edges that
734 # were backwards in the heuristically-generated sequence. The
735 # previously-generated order is still acceptable, but we hope to
736 # find a better order that needs less memory for stashed data.
737 # Now we do a topological sort to generate a new vertex order,
738 # using a greedy algorithm to choose which vertex goes next
739 # whenever we have a choice.
740
741 # Make a copy of the edge set; this copy will get destroyed by the
742 # algorithm.
743 for xf in self.transfers:
744 xf.incoming = xf.goes_after.copy()
745 xf.outgoing = xf.goes_before.copy()
746
747 L = [] # the new vertex order
748
749 # S is the set of sources in the remaining graph; we always choose
750 # the one that leaves the least amount of stashed data after it's
751 # executed.
752 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
753 if not u.incoming]
754 heapq.heapify(S)
755
756 while S:
757 _, _, xf = heapq.heappop(S)
758 L.append(xf)
759 for u in xf.outgoing:
760 del u.incoming[xf]
761 if not u.incoming:
762 heapq.heappush(S, (u.NetStashChange(), u.order, u))
763
764 # if this fails then our graph had a cycle.
765 assert len(L) == len(self.transfers)
766
767 self.transfers = L
768 for i, xf in enumerate(L):
769 xf.order = i
770
Doug Zongkerfc44a512014-08-26 13:10:25 -0700771 def RemoveBackwardEdges(self):
772 print("Removing backward edges...")
773 in_order = 0
774 out_of_order = 0
775 lost_source = 0
776
777 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700778 lost = 0
779 size = xf.src_ranges.size()
780 for u in xf.goes_before:
781 # xf should go before u
782 if xf.order < u.order:
783 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700784 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700785 else:
786 # it doesn't, boo. trim the blocks that u writes from xf's
787 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700788 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700789 assert xf.src_ranges.overlaps(u.tgt_ranges)
790 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
791 xf.intact = False
792
793 if xf.style == "diff" and not xf.src_ranges:
794 # nothing left to diff from; treat as new data
795 xf.style = "new"
796
797 lost = size - xf.src_ranges.size()
798 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700799
800 print((" %d/%d dependencies (%.2f%%) were violated; "
801 "%d source blocks removed.") %
802 (out_of_order, in_order + out_of_order,
803 (out_of_order * 100.0 / (in_order + out_of_order))
804 if (in_order + out_of_order) else 0.0,
805 lost_source))
806
Doug Zongker62338182014-09-08 08:29:55 -0700807 def ReverseBackwardEdges(self):
808 print("Reversing backward edges...")
809 in_order = 0
810 out_of_order = 0
811 stashes = 0
812 stash_size = 0
813
814 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700815 for u in xf.goes_before.copy():
816 # xf should go before u
817 if xf.order < u.order:
818 # it does, hurray!
819 in_order += 1
820 else:
821 # it doesn't, boo. modify u to stash the blocks that it
822 # writes that xf wants to read, and then require u to go
823 # before xf.
824 out_of_order += 1
825
826 overlap = xf.src_ranges.intersect(u.tgt_ranges)
827 assert overlap
828
829 u.stash_before.append((stashes, overlap))
830 xf.use_stash.append((stashes, overlap))
831 stashes += 1
832 stash_size += overlap.size()
833
834 # reverse the edge direction; now xf must go after u
835 del xf.goes_before[u]
836 del u.goes_after[xf]
837 xf.goes_after[u] = None # value doesn't matter
838 u.goes_before[xf] = None
839
840 print((" %d/%d dependencies (%.2f%%) were violated; "
841 "%d source blocks stashed.") %
842 (out_of_order, in_order + out_of_order,
843 (out_of_order * 100.0 / (in_order + out_of_order))
844 if (in_order + out_of_order) else 0.0,
845 stash_size))
846
Doug Zongkerfc44a512014-08-26 13:10:25 -0700847 def FindVertexSequence(self):
848 print("Finding vertex sequence...")
849
850 # This is based on "A Fast & Effective Heuristic for the Feedback
851 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
852 # it as starting with the digraph G and moving all the vertices to
853 # be on a horizontal line in some order, trying to minimize the
854 # number of edges that end up pointing to the left. Left-pointing
855 # edges will get removed to turn the digraph into a DAG. In this
856 # case each edge has a weight which is the number of source blocks
857 # we'll lose if that edge is removed; we try to minimize the total
858 # weight rather than just the number of edges.
859
860 # Make a copy of the edge set; this copy will get destroyed by the
861 # algorithm.
862 for xf in self.transfers:
863 xf.incoming = xf.goes_after.copy()
864 xf.outgoing = xf.goes_before.copy()
865
866 # We use an OrderedDict instead of just a set so that the output
867 # is repeatable; otherwise it would depend on the hash values of
868 # the transfer objects.
869 G = OrderedDict()
870 for xf in self.transfers:
871 G[xf] = None
872 s1 = deque() # the left side of the sequence, built from left to right
873 s2 = deque() # the right side of the sequence, built from right to left
874
875 while G:
876
877 # Put all sinks at the end of the sequence.
878 while True:
879 sinks = [u for u in G if not u.outgoing]
Dan Albert8b72aef2015-03-23 19:13:21 -0700880 if not sinks:
881 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700882 for u in sinks:
883 s2.appendleft(u)
884 del G[u]
885 for iu in u.incoming:
886 del iu.outgoing[u]
887
888 # Put all the sources at the beginning of the sequence.
889 while True:
890 sources = [u for u in G if not u.incoming]
Dan Albert8b72aef2015-03-23 19:13:21 -0700891 if not sources:
892 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700893 for u in sources:
894 s1.append(u)
895 del G[u]
896 for iu in u.outgoing:
897 del iu.incoming[u]
898
Dan Albert8b72aef2015-03-23 19:13:21 -0700899 if not G:
900 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700901
902 # Find the "best" vertex to put next. "Best" is the one that
903 # maximizes the net difference in source blocks saved we get by
904 # pretending it's a source rather than a sink.
905
906 max_d = None
907 best_u = None
908 for u in G:
909 d = sum(u.outgoing.values()) - sum(u.incoming.values())
910 if best_u is None or d > max_d:
911 max_d = d
912 best_u = u
913
914 u = best_u
915 s1.append(u)
916 del G[u]
917 for iu in u.outgoing:
918 del iu.incoming[u]
919 for iu in u.incoming:
920 del iu.outgoing[u]
921
922 # Now record the sequence in the 'order' field of each transfer,
923 # and by rearranging self.transfers to be in the chosen sequence.
924
925 new_transfers = []
926 for x in itertools.chain(s1, s2):
927 x.order = len(new_transfers)
928 new_transfers.append(x)
929 del x.incoming
930 del x.outgoing
931
932 self.transfers = new_transfers
933
934 def GenerateDigraph(self):
935 print("Generating digraph...")
936 for a in self.transfers:
937 for b in self.transfers:
Dan Albert8b72aef2015-03-23 19:13:21 -0700938 if a is b:
939 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700940
941 # If the blocks written by A are read by B, then B needs to go before A.
942 i = a.tgt_ranges.intersect(b.src_ranges)
943 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700944 if b.src_name == "__ZERO":
945 # the cost of removing source blocks for the __ZERO domain
946 # is (nearly) zero.
947 size = 0
948 else:
949 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700950 b.goes_before[a] = size
951 a.goes_after[b] = size
952
953 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -0700954 """Parse the file_map to generate all the transfers."""
955
956 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
957 split=False):
958 """Wrapper function for adding a Transfer().
959
960 For BBOTA v3, we need to stash source blocks for resumable feature.
961 However, with the growth of file size and the shrink of the cache
962 partition source blocks are too large to be stashed. If a file occupies
963 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
964 into smaller pieces by getting multiple Transfer()s.
965
966 The downside is that after splitting, we can no longer use imgdiff but
967 only bsdiff."""
968
969 MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
970
971 # We care about diff transfers only.
972 if style != "diff" or not split:
973 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
974 return
975
976 # Change nothing for small files.
977 if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
978 src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
979 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
980 return
981
982 pieces = 0
983 while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
984 src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
985 tgt_split_name = "%s-%d" % (tgt_name, pieces)
986 src_split_name = "%s-%d" % (src_name, pieces)
987 tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
988 src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
989 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
990 by_id)
991
992 tgt_ranges = tgt_ranges.subtract(tgt_first)
993 src_ranges = src_ranges.subtract(src_first)
994 pieces += 1
995
996 # Handle remaining blocks.
997 if tgt_ranges.size() or src_ranges.size():
998 # Must be both non-empty.
999 assert tgt_ranges.size() and src_ranges.size()
1000 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1001 src_split_name = "%s-%d" % (src_name, pieces)
1002 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1003 by_id)
1004
Doug Zongkerfc44a512014-08-26 13:10:25 -07001005 empty = RangeSet()
1006 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1007 if tgt_fn == "__ZERO":
1008 # the special "__ZERO" domain is all the blocks not contained
1009 # in any file and that are filled with zeros. We have a
1010 # special transfer style for zero blocks.
1011 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001012 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1013 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001014 continue
1015
Tao Baoff777812015-05-12 11:42:31 -07001016 elif tgt_fn == "__COPY":
1017 # "__COPY" domain includes all the blocks not contained in any
1018 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001019 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001020 continue
1021
Doug Zongkerfc44a512014-08-26 13:10:25 -07001022 elif tgt_fn in self.src.file_map:
1023 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001024 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1025 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001026 continue
1027
1028 b = os.path.basename(tgt_fn)
1029 if b in self.src_basenames:
1030 # Look for an exact basename match in the source.
1031 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001032 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1033 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001034 continue
1035
1036 b = re.sub("[0-9]+", "#", b)
1037 if b in self.src_numpatterns:
1038 # Look for a 'number pattern' match (a basename match after
1039 # all runs of digits are replaced by "#"). (This is useful
1040 # for .so files that contain version numbers in the filename
1041 # that get bumped.)
1042 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001043 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1044 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001045 continue
1046
Tao Bao9a5caf22015-08-25 15:10:10 -07001047 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001048
1049 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001050 for k in self.src.file_map.keys():
1051 b = os.path.basename(k)
1052 self.src_basenames[b] = k
1053 b = re.sub("[0-9]+", "#", b)
1054 self.src_numpatterns[b] = k
1055
1056 @staticmethod
1057 def AssertPartition(total, seq):
1058 """Assert that all the RangeSets in 'seq' form a partition of the
1059 'total' RangeSet (ie, they are nonintersecting and their union
1060 equals 'total')."""
1061 so_far = RangeSet()
1062 for i in seq:
1063 assert not so_far.overlaps(i)
1064 so_far = so_far.union(i)
1065 assert so_far == total