blob: bb2f16d7c4955cb1a4b2a24dca1222c868b04438 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Tao Baod47d8e12015-05-21 14:09:49 -070019import common
Doug Zongkercf4fda72014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070024import re
25import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import threading
27import tempfile
28
Dan Albert8b72aef2015-03-23 19:13:21 -070029from rangelib import RangeSet
30
Doug Zongkerfc44a512014-08-26 13:10:25 -070031
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070032__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
33
Dan Albert8b72aef2015-03-23 19:13:21 -070034
Doug Zongkerfc44a512014-08-26 13:10:25 -070035def compute_patch(src, tgt, imgdiff=False):
36 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
37 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
38 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
39 os.close(patchfd)
40
41 try:
42 with os.fdopen(srcfd, "wb") as f_src:
43 for p in src:
44 f_src.write(p)
45
46 with os.fdopen(tgtfd, "wb") as f_tgt:
47 for p in tgt:
48 f_tgt.write(p)
49 try:
50 os.unlink(patchfile)
51 except OSError:
52 pass
53 if imgdiff:
54 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
55 stdout=open("/dev/null", "a"),
56 stderr=subprocess.STDOUT)
57 else:
58 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
59
60 if p:
61 raise ValueError("diff failed: " + str(p))
62
63 with open(patchfile, "rb") as f:
64 return f.read()
65 finally:
66 try:
67 os.unlink(srcfile)
68 os.unlink(tgtfile)
69 os.unlink(patchfile)
70 except OSError:
71 pass
72
Dan Albert8b72aef2015-03-23 19:13:21 -070073
74class Image(object):
75 def ReadRangeSet(self, ranges):
76 raise NotImplementedError
77
Tao Bao5fcaaef2015-06-01 13:40:49 -070078 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070079 raise NotImplementedError
80
81
82class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 """A zero-length image."""
84 blocksize = 4096
85 care_map = RangeSet()
Tao Bao5ece99d2015-05-12 11:42:31 -070086 clobbered_blocks = RangeSet()
Tao Bao2fd2c9b2015-07-09 17:37:49 -070087 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 total_blocks = 0
89 file_map = {}
90 def ReadRangeSet(self, ranges):
91 return ()
Tao Bao5fcaaef2015-06-01 13:40:49 -070092 def TotalSha1(self, include_clobbered_blocks=False):
93 # EmptyImage always carries empty clobbered_blocks, so
94 # include_clobbered_blocks can be ignored.
95 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070096 return sha1().hexdigest()
97
98
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao28f6f9c2015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao28f6f9c2015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao28f6f9c2015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
131 self.clobbered_blocks = RangeSet(
132 data=(self.total_blocks-1, self.total_blocks))
133 else:
134 self.clobbered_blocks = RangeSet()
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao28f6f9c2015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
150 self.file_map = {"__ZERO": RangeSet(zero_blocks),
151 "__NONZERO": RangeSet(nonzero_blocks)}
152
Tao Bao28f6f9c2015-09-05 20:35:32 -0700153 if self.clobbered_blocks:
154 self.file_map["__COPY"] = self.clobbered_blocks
155
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700156 def ReadRangeSet(self, ranges):
157 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
158
Tao Bao5fcaaef2015-06-01 13:40:49 -0700159 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao28f6f9c2015-09-05 20:35:32 -0700160 if not include_clobbered_blocks:
161 ranges = self.care_map.subtract(self.clobbered_blocks)
162 return sha1(self.ReadRangeSet(ranges)).hexdigest()
163 else:
164 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700165
Doug Zongkerfc44a512014-08-26 13:10:25 -0700166
167class Transfer(object):
168 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
169 self.tgt_name = tgt_name
170 self.src_name = src_name
171 self.tgt_ranges = tgt_ranges
172 self.src_ranges = src_ranges
173 self.style = style
174 self.intact = (getattr(tgt_ranges, "monotonic", False) and
175 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700176
177 # We use OrderedDict rather than dict so that the output is repeatable;
178 # otherwise it would depend on the hash values of the Transfer objects.
179 self.goes_before = OrderedDict()
180 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700181
Doug Zongkercf4fda72014-09-08 08:29:55 -0700182 self.stash_before = []
183 self.use_stash = []
184
Doug Zongkerfc44a512014-08-26 13:10:25 -0700185 self.id = len(by_id)
186 by_id.append(self)
187
Doug Zongkercf4fda72014-09-08 08:29:55 -0700188 def NetStashChange(self):
189 return (sum(sr.size() for (_, sr) in self.stash_before) -
190 sum(sr.size() for (_, sr) in self.use_stash))
191
Tao Bao1fc67632015-08-17 09:45:13 -0700192 def ConvertToNew(self):
193 assert self.style != "new"
194 self.use_stash = []
195 self.style = "new"
196 self.src_ranges = RangeSet()
197
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 def __str__(self):
199 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
200 " to " + str(self.tgt_ranges) + ">")
201
202
203# BlockImageDiff works on two image objects. An image object is
204# anything that provides the following attributes:
205#
206# blocksize: the size in bytes of a block, currently must be 4096.
207#
208# total_blocks: the total size of the partition/image, in blocks.
209#
210# care_map: a RangeSet containing which blocks (in the range [0,
211# total_blocks) we actually care about; i.e. which blocks contain
212# data.
213#
214# file_map: a dict that partitions the blocks contained in care_map
215# into smaller domains that are useful for doing diffs on.
216# (Typically a domain is a file, and the key in file_map is the
217# pathname.)
218#
Tao Bao5ece99d2015-05-12 11:42:31 -0700219# clobbered_blocks: a RangeSet containing which blocks contain data
220# but may be altered by the FS. They need to be excluded when
221# verifying the partition integrity.
222#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700223# ReadRangeSet(): a function that takes a RangeSet and returns the
224# data contained in the image blocks of that RangeSet. The data
225# is returned as a list or tuple of strings; concatenating the
226# elements together should produce the requested data.
227# Implementations are free to break up the data into list/tuple
228# elements in any way that is convenient.
229#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700230# TotalSha1(): a function that returns (as a hex string) the SHA-1
231# hash of all the data in the image (ie, all the blocks in the
Tao Bao5fcaaef2015-06-01 13:40:49 -0700232# care_map minus clobbered_blocks, or including the clobbered
233# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700234#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700235# When creating a BlockImageDiff, the src image may be None, in which
236# case the list of transfers produced will never read from the
237# original image.
238
239class BlockImageDiff(object):
Sami Tolvanencac671a2014-12-09 16:40:34 +0000240 def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700241 if threads is None:
242 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700243 if threads == 0:
244 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700245 self.threads = threads
Doug Zongkercf4fda72014-09-08 08:29:55 -0700246 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700247 self.transfers = []
248 self.src_basenames = {}
249 self.src_numpatterns = {}
Doug Zongkercf4fda72014-09-08 08:29:55 -0700250
Sami Tolvanencac671a2014-12-09 16:40:34 +0000251 assert version in (1, 2, 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700252
253 self.tgt = tgt
254 if src is None:
255 src = EmptyImage()
256 self.src = src
257
258 # The updater code that installs the patch always uses 4k blocks.
259 assert tgt.blocksize == 4096
260 assert src.blocksize == 4096
261
262 # The range sets in each filemap should comprise a partition of
263 # the care map.
264 self.AssertPartition(src.care_map, src.file_map.values())
265 self.AssertPartition(tgt.care_map, tgt.file_map.values())
266
267 def Compute(self, prefix):
268 # When looking for a source file to use as the diff input for a
269 # target file, we try:
270 # 1) an exact path match if available, otherwise
271 # 2) a exact basename match if available, otherwise
272 # 3) a basename match after all runs of digits are replaced by
273 # "#" if available, otherwise
274 # 4) we have no source for this target.
275 self.AbbreviateSourceNames()
276 self.FindTransfers()
277
278 # Find the ordering dependencies among transfers (this is O(n^2)
279 # in the number of transfers).
280 self.GenerateDigraph()
281 # Find a sequence of transfers that satisfies as many ordering
282 # dependencies as possible (heuristically).
283 self.FindVertexSequence()
284 # Fix up the ordering dependencies that the sequence didn't
285 # satisfy.
Doug Zongkercf4fda72014-09-08 08:29:55 -0700286 if self.version == 1:
287 self.RemoveBackwardEdges()
288 else:
289 self.ReverseBackwardEdges()
290 self.ImproveVertexSequence()
291
Tao Bao1fc67632015-08-17 09:45:13 -0700292 # Ensure the runtime stash size is under the limit.
293 if self.version >= 2 and common.OPTIONS.cache_size is not None:
294 self.ReviseStashSize()
295
Doug Zongkerfc44a512014-08-26 13:10:25 -0700296 # Double-check our work.
297 self.AssertSequenceGood()
298
299 self.ComputePatches(prefix)
300 self.WriteTransfers(prefix)
301
Dan Albert8b72aef2015-03-23 19:13:21 -0700302 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanencac671a2014-12-09 16:40:34 +0000303 data = source.ReadRangeSet(ranges)
304 ctx = sha1()
305
306 for p in data:
307 ctx.update(p)
308
309 return ctx.hexdigest()
310
Doug Zongkerfc44a512014-08-26 13:10:25 -0700311 def WriteTransfers(self, prefix):
312 out = []
313
Doug Zongkerfc44a512014-08-26 13:10:25 -0700314 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700315
Doug Zongkercf4fda72014-09-08 08:29:55 -0700316 stashes = {}
317 stashed_blocks = 0
318 max_stashed_blocks = 0
319
320 free_stash_ids = []
321 next_stash_id = 0
322
Doug Zongkerfc44a512014-08-26 13:10:25 -0700323 for xf in self.transfers:
324
Doug Zongkercf4fda72014-09-08 08:29:55 -0700325 if self.version < 2:
326 assert not xf.stash_before
327 assert not xf.use_stash
328
329 for s, sr in xf.stash_before:
330 assert s not in stashes
331 if free_stash_ids:
332 sid = heapq.heappop(free_stash_ids)
333 else:
334 sid = next_stash_id
335 next_stash_id += 1
336 stashes[s] = sid
337 stashed_blocks += sr.size()
Sami Tolvanencac671a2014-12-09 16:40:34 +0000338 if self.version == 2:
339 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
340 else:
341 sh = self.HashBlocks(self.src, sr)
342 if sh in stashes:
343 stashes[sh] += 1
344 else:
345 stashes[sh] = 1
346 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongkercf4fda72014-09-08 08:29:55 -0700347
348 if stashed_blocks > max_stashed_blocks:
349 max_stashed_blocks = stashed_blocks
350
Jesse Zhao7ca20d12015-03-02 16:53:08 -0800351 free_string = []
352
Doug Zongkercf4fda72014-09-08 08:29:55 -0700353 if self.version == 1:
Dan Albert8b72aef2015-03-23 19:13:21 -0700354 src_str = xf.src_ranges.to_string_raw()
Sami Tolvanencac671a2014-12-09 16:40:34 +0000355 elif self.version >= 2:
Doug Zongkercf4fda72014-09-08 08:29:55 -0700356
357 # <# blocks> <src ranges>
358 # OR
359 # <# blocks> <src ranges> <src locs> <stash refs...>
360 # OR
361 # <# blocks> - <stash refs...>
362
363 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700364 src_str = [str(size)]
Doug Zongkercf4fda72014-09-08 08:29:55 -0700365
366 unstashed_src_ranges = xf.src_ranges
367 mapped_stashes = []
368 for s, sr in xf.use_stash:
369 sid = stashes.pop(s)
370 stashed_blocks -= sr.size()
371 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanencac671a2014-12-09 16:40:34 +0000372 sh = self.HashBlocks(self.src, sr)
Doug Zongkercf4fda72014-09-08 08:29:55 -0700373 sr = xf.src_ranges.map_within(sr)
374 mapped_stashes.append(sr)
Sami Tolvanencac671a2014-12-09 16:40:34 +0000375 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700376 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Sami Tolvanencac671a2014-12-09 16:40:34 +0000377 else:
378 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700379 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanencac671a2014-12-09 16:40:34 +0000380 stashes[sh] -= 1
381 if stashes[sh] == 0:
Anthony Kingc713d762015-11-03 00:23:11 +0000382 free_string.append("free %s\n" % sh)
Sami Tolvanencac671a2014-12-09 16:40:34 +0000383 stashes.pop(sh)
Doug Zongkercf4fda72014-09-08 08:29:55 -0700384 heapq.heappush(free_stash_ids, sid)
385
386 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700387 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongkercf4fda72014-09-08 08:29:55 -0700388 if xf.use_stash:
389 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700390 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongkercf4fda72014-09-08 08:29:55 -0700391 mapped_stashes.append(mapped_unstashed)
392 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
393 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700394 src_str.insert(1, "-")
Doug Zongkercf4fda72014-09-08 08:29:55 -0700395 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
396
Dan Albert8b72aef2015-03-23 19:13:21 -0700397 src_str = " ".join(src_str)
Doug Zongkercf4fda72014-09-08 08:29:55 -0700398
Sami Tolvanencac671a2014-12-09 16:40:34 +0000399 # all versions:
Doug Zongkercf4fda72014-09-08 08:29:55 -0700400 # zero <rangeset>
401 # new <rangeset>
402 # erase <rangeset>
403 #
404 # version 1:
405 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
406 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
407 # move <src rangeset> <tgt rangeset>
408 #
409 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700410 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
411 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
412 # move <tgt rangeset> <src_str>
Sami Tolvanencac671a2014-12-09 16:40:34 +0000413 #
414 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700415 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
416 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
417 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700418
419 tgt_size = xf.tgt_ranges.size()
420
421 if xf.style == "new":
422 assert xf.tgt_ranges
423 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
424 total += tgt_size
425 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700426 assert xf.tgt_ranges
427 assert xf.src_ranges.size() == tgt_size
428 if xf.src_ranges != xf.tgt_ranges:
Doug Zongkercf4fda72014-09-08 08:29:55 -0700429 if self.version == 1:
430 out.append("%s %s %s\n" % (
431 xf.style,
432 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
433 elif self.version == 2:
434 out.append("%s %s %s\n" % (
435 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700436 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanencac671a2014-12-09 16:40:34 +0000437 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100438 # take into account automatic stashing of overlapping blocks
439 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700440 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100441 if temp_stash_usage > max_stashed_blocks:
442 max_stashed_blocks = temp_stash_usage
443
Sami Tolvanencac671a2014-12-09 16:40:34 +0000444 out.append("%s %s %s %s\n" % (
445 xf.style,
446 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700447 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700448 total += tgt_size
449 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700450 assert xf.tgt_ranges
451 assert xf.src_ranges
Doug Zongkercf4fda72014-09-08 08:29:55 -0700452 if self.version == 1:
453 out.append("%s %d %d %s %s\n" % (
454 xf.style, xf.patch_start, xf.patch_len,
455 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
456 elif self.version == 2:
457 out.append("%s %d %d %s %s\n" % (
458 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700459 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanencac671a2014-12-09 16:40:34 +0000460 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100461 # take into account automatic stashing of overlapping blocks
462 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700463 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100464 if temp_stash_usage > max_stashed_blocks:
465 max_stashed_blocks = temp_stash_usage
466
Sami Tolvanencac671a2014-12-09 16:40:34 +0000467 out.append("%s %d %d %s %s %s %s\n" % (
468 xf.style,
469 xf.patch_start, xf.patch_len,
470 self.HashBlocks(self.src, xf.src_ranges),
471 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700472 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700473 total += tgt_size
474 elif xf.style == "zero":
475 assert xf.tgt_ranges
476 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
477 if to_zero:
478 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
479 total += to_zero.size()
480 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700481 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700482
Dan Albertee8323b2015-03-27 16:49:01 -0700483 if free_string:
484 out.append("".join(free_string))
Doug Zongkercf4fda72014-09-08 08:29:55 -0700485
Tao Baoe9647142015-08-07 19:49:45 -0700486 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Baod47d8e12015-05-21 14:09:49 -0700487 # Sanity check: abort if we're going to need more stash space than
488 # the allowed size (cache_size * threshold). There are two purposes
489 # of having a threshold here. a) Part of the cache may have been
490 # occupied by some recovery logs. b) It will buy us some time to deal
491 # with the oversize issue.
492 cache_size = common.OPTIONS.cache_size
493 stash_threshold = common.OPTIONS.stash_threshold
494 max_allowed = cache_size * stash_threshold
495 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
496 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
497 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
498 self.tgt.blocksize, max_allowed, cache_size,
499 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700500
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700501 # Zero out extended blocks as a workaround for bug 20881595.
502 if self.tgt.extended:
503 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
504
505 # We erase all the blocks on the partition that a) don't contain useful
506 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700507 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700508 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
509 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
510 if new_dontcare:
511 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker7b0ddf52014-09-09 12:38:47 -0700512
513 out.insert(0, "%d\n" % (self.version,)) # format version number
514 out.insert(1, str(total) + "\n")
515 if self.version >= 2:
516 # version 2 only: after the total block count, we give the number
517 # of stash slots needed, and the maximum size needed (in blocks)
518 out.insert(2, str(next_stash_id) + "\n")
519 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700520
521 with open(prefix + ".transfer.list", "wb") as f:
522 for i in out:
523 f.write(i)
524
Doug Zongkercf4fda72014-09-08 08:29:55 -0700525 if self.version >= 2:
Tao Baod47d8e12015-05-21 14:09:49 -0700526 max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Baoe9647142015-08-07 19:49:45 -0700527 OPTIONS = common.OPTIONS
528 if OPTIONS.cache_size is not None:
529 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
530 print("max stashed blocks: %d (%d bytes), "
531 "limit: %d bytes (%.2f%%)\n" % (
532 max_stashed_blocks, max_stashed_size, max_allowed,
533 max_stashed_size * 100.0 / max_allowed))
534 else:
535 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
536 max_stashed_blocks, max_stashed_size))
Doug Zongkercf4fda72014-09-08 08:29:55 -0700537
Tao Bao1fc67632015-08-17 09:45:13 -0700538 def ReviseStashSize(self):
539 print("Revising stash size...")
540 stashes = {}
541
542 # Create the map between a stash and its def/use points. For example, for a
543 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
544 for xf in self.transfers:
545 # Command xf defines (stores) all the stashes in stash_before.
546 for idx, sr in xf.stash_before:
547 stashes[idx] = (sr, xf)
548
549 # Record all the stashes command xf uses.
550 for idx, _ in xf.use_stash:
551 stashes[idx] += (xf,)
552
553 # Compute the maximum blocks available for stash based on /cache size and
554 # the threshold.
555 cache_size = common.OPTIONS.cache_size
556 stash_threshold = common.OPTIONS.stash_threshold
557 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
558
559 stashed_blocks = 0
Tao Bao937847a2015-08-25 15:10:10 -0700560 new_blocks = 0
Tao Bao1fc67632015-08-17 09:45:13 -0700561
562 # Now go through all the commands. Compute the required stash size on the
563 # fly. If a command requires excess stash than available, it deletes the
564 # stash by replacing the command that uses the stash with a "new" command
565 # instead.
566 for xf in self.transfers:
567 replaced_cmds = []
568
569 # xf.stash_before generates explicit stash commands.
570 for idx, sr in xf.stash_before:
571 if stashed_blocks + sr.size() > max_allowed:
572 # We cannot stash this one for a later command. Find out the command
573 # that will use this stash and replace the command with "new".
574 use_cmd = stashes[idx][2]
575 replaced_cmds.append(use_cmd)
Tao Bao937847a2015-08-25 15:10:10 -0700576 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao1fc67632015-08-17 09:45:13 -0700577 else:
578 stashed_blocks += sr.size()
579
580 # xf.use_stash generates free commands.
581 for _, sr in xf.use_stash:
582 stashed_blocks -= sr.size()
583
584 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
585 # ComputePatches(), they both have the style of "diff".
586 if xf.style == "diff" and self.version >= 3:
587 assert xf.tgt_ranges and xf.src_ranges
588 if xf.src_ranges.overlaps(xf.tgt_ranges):
589 if stashed_blocks + xf.src_ranges.size() > max_allowed:
590 replaced_cmds.append(xf)
Tao Bao937847a2015-08-25 15:10:10 -0700591 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao1fc67632015-08-17 09:45:13 -0700592
593 # Replace the commands in replaced_cmds with "new"s.
594 for cmd in replaced_cmds:
595 # It no longer uses any commands in "use_stash". Remove the def points
596 # for all those stashes.
597 for idx, sr in cmd.use_stash:
598 def_cmd = stashes[idx][1]
599 assert (idx, sr) in def_cmd.stash_before
600 def_cmd.stash_before.remove((idx, sr))
Tao Bao937847a2015-08-25 15:10:10 -0700601 new_blocks += sr.size()
Tao Bao1fc67632015-08-17 09:45:13 -0700602
603 cmd.ConvertToNew()
604
Tao Bao937847a2015-08-25 15:10:10 -0700605 print(" Total %d blocks are packed as new blocks due to insufficient "
606 "cache size." % (new_blocks,))
607
Doug Zongkerfc44a512014-08-26 13:10:25 -0700608 def ComputePatches(self, prefix):
609 print("Reticulating splines...")
610 diff_q = []
611 patch_num = 0
612 with open(prefix + ".new.dat", "wb") as new_f:
613 for xf in self.transfers:
614 if xf.style == "zero":
615 pass
616 elif xf.style == "new":
617 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
618 new_f.write(piece)
619 elif xf.style == "diff":
620 src = self.src.ReadRangeSet(xf.src_ranges)
621 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
622
623 # We can't compare src and tgt directly because they may have
624 # the same content but be broken up into blocks differently, eg:
625 #
626 # ["he", "llo"] vs ["h", "ello"]
627 #
628 # We want those to compare equal, ideally without having to
629 # actually concatenate the strings (these may be tens of
630 # megabytes).
631
632 src_sha1 = sha1()
633 for p in src:
634 src_sha1.update(p)
635 tgt_sha1 = sha1()
636 tgt_size = 0
637 for p in tgt:
638 tgt_sha1.update(p)
639 tgt_size += len(p)
640
641 if src_sha1.digest() == tgt_sha1.digest():
642 # These are identical; we don't need to generate a patch,
643 # just issue copy commands on the device.
644 xf.style = "move"
645 else:
646 # For files in zip format (eg, APKs, JARs, etc.) we would
647 # like to use imgdiff -z if possible (because it usually
648 # produces significantly smaller patches than bsdiff).
649 # This is permissible if:
650 #
651 # - the source and target files are monotonic (ie, the
652 # data is stored with blocks in increasing order), and
653 # - we haven't removed any blocks from the source set.
654 #
655 # If these conditions are satisfied then appending all the
656 # blocks in the set together in order will produce a valid
657 # zip file (plus possibly extra zeros in the last block),
658 # which is what imgdiff needs to operate. (imgdiff is
659 # fine with extra zeros at the end of the file.)
660 imgdiff = (xf.intact and
661 xf.tgt_name.split(".")[-1].lower()
662 in ("apk", "jar", "zip"))
663 xf.style = "imgdiff" if imgdiff else "bsdiff"
664 diff_q.append((tgt_size, src, tgt, xf, patch_num))
665 patch_num += 1
666
667 else:
668 assert False, "unknown style " + xf.style
669
670 if diff_q:
671 if self.threads > 1:
672 print("Computing patches (using %d threads)..." % (self.threads,))
673 else:
674 print("Computing patches...")
675 diff_q.sort()
676
677 patches = [None] * patch_num
678
Dan Albert8b72aef2015-03-23 19:13:21 -0700679 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700680 lock = threading.Lock()
681 def diff_worker():
682 while True:
683 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700684 if not diff_q:
685 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700686 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
687 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
688 size = len(patch)
689 with lock:
690 patches[patchnum] = (patch, xf)
691 print("%10d %10d (%6.2f%%) %7s %s" % (
692 size, tgt_size, size * 100.0 / tgt_size, xf.style,
693 xf.tgt_name if xf.tgt_name == xf.src_name else (
694 xf.tgt_name + " (from " + xf.src_name + ")")))
695
696 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700697 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700698 for th in threads:
699 th.start()
700 while threads:
701 threads.pop().join()
702 else:
703 patches = []
704
705 p = 0
706 with open(prefix + ".patch.dat", "wb") as patch_f:
707 for patch, xf in patches:
708 xf.patch_start = p
709 xf.patch_len = len(patch)
710 patch_f.write(patch)
711 p += len(patch)
712
713 def AssertSequenceGood(self):
714 # Simulate the sequences of transfers we will output, and check that:
715 # - we never read a block after writing it, and
716 # - we write every block we care about exactly once.
717
718 # Start with no blocks having been touched yet.
719 touched = RangeSet()
720
721 # Imagine processing the transfers in order.
722 for xf in self.transfers:
723 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongkercf4fda72014-09-08 08:29:55 -0700724
725 x = xf.src_ranges
726 if self.version >= 2:
727 for _, sr in xf.use_stash:
728 x = x.subtract(sr)
729
730 assert not touched.overlaps(x)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700731 # Check that the output blocks for this transfer haven't yet been touched.
732 assert not touched.overlaps(xf.tgt_ranges)
733 # Touch all the blocks written by this transfer.
734 touched = touched.union(xf.tgt_ranges)
735
736 # Check that we've written every target block.
737 assert touched == self.tgt.care_map
738
Doug Zongkercf4fda72014-09-08 08:29:55 -0700739 def ImproveVertexSequence(self):
740 print("Improving vertex order...")
741
742 # At this point our digraph is acyclic; we reversed any edges that
743 # were backwards in the heuristically-generated sequence. The
744 # previously-generated order is still acceptable, but we hope to
745 # find a better order that needs less memory for stashed data.
746 # Now we do a topological sort to generate a new vertex order,
747 # using a greedy algorithm to choose which vertex goes next
748 # whenever we have a choice.
749
750 # Make a copy of the edge set; this copy will get destroyed by the
751 # algorithm.
752 for xf in self.transfers:
753 xf.incoming = xf.goes_after.copy()
754 xf.outgoing = xf.goes_before.copy()
755
756 L = [] # the new vertex order
757
758 # S is the set of sources in the remaining graph; we always choose
759 # the one that leaves the least amount of stashed data after it's
760 # executed.
761 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
762 if not u.incoming]
763 heapq.heapify(S)
764
765 while S:
766 _, _, xf = heapq.heappop(S)
767 L.append(xf)
768 for u in xf.outgoing:
769 del u.incoming[xf]
770 if not u.incoming:
771 heapq.heappush(S, (u.NetStashChange(), u.order, u))
772
773 # if this fails then our graph had a cycle.
774 assert len(L) == len(self.transfers)
775
776 self.transfers = L
777 for i, xf in enumerate(L):
778 xf.order = i
779
Doug Zongkerfc44a512014-08-26 13:10:25 -0700780 def RemoveBackwardEdges(self):
781 print("Removing backward edges...")
782 in_order = 0
783 out_of_order = 0
784 lost_source = 0
785
786 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700787 lost = 0
788 size = xf.src_ranges.size()
789 for u in xf.goes_before:
790 # xf should go before u
791 if xf.order < u.order:
792 # it does, hurray!
Doug Zongkercf4fda72014-09-08 08:29:55 -0700793 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700794 else:
795 # it doesn't, boo. trim the blocks that u writes from xf's
796 # source, so that xf can go after u.
Doug Zongkercf4fda72014-09-08 08:29:55 -0700797 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700798 assert xf.src_ranges.overlaps(u.tgt_ranges)
799 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
800 xf.intact = False
801
802 if xf.style == "diff" and not xf.src_ranges:
803 # nothing left to diff from; treat as new data
804 xf.style = "new"
805
806 lost = size - xf.src_ranges.size()
807 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700808
809 print((" %d/%d dependencies (%.2f%%) were violated; "
810 "%d source blocks removed.") %
811 (out_of_order, in_order + out_of_order,
812 (out_of_order * 100.0 / (in_order + out_of_order))
813 if (in_order + out_of_order) else 0.0,
814 lost_source))
815
Doug Zongkercf4fda72014-09-08 08:29:55 -0700816 def ReverseBackwardEdges(self):
817 print("Reversing backward edges...")
818 in_order = 0
819 out_of_order = 0
820 stashes = 0
821 stash_size = 0
822
823 for xf in self.transfers:
Doug Zongkercf4fda72014-09-08 08:29:55 -0700824 for u in xf.goes_before.copy():
825 # xf should go before u
826 if xf.order < u.order:
827 # it does, hurray!
828 in_order += 1
829 else:
830 # it doesn't, boo. modify u to stash the blocks that it
831 # writes that xf wants to read, and then require u to go
832 # before xf.
833 out_of_order += 1
834
835 overlap = xf.src_ranges.intersect(u.tgt_ranges)
836 assert overlap
837
838 u.stash_before.append((stashes, overlap))
839 xf.use_stash.append((stashes, overlap))
840 stashes += 1
841 stash_size += overlap.size()
842
843 # reverse the edge direction; now xf must go after u
844 del xf.goes_before[u]
845 del u.goes_after[xf]
846 xf.goes_after[u] = None # value doesn't matter
847 u.goes_before[xf] = None
848
849 print((" %d/%d dependencies (%.2f%%) were violated; "
850 "%d source blocks stashed.") %
851 (out_of_order, in_order + out_of_order,
852 (out_of_order * 100.0 / (in_order + out_of_order))
853 if (in_order + out_of_order) else 0.0,
854 stash_size))
855
Doug Zongkerfc44a512014-08-26 13:10:25 -0700856 def FindVertexSequence(self):
857 print("Finding vertex sequence...")
858
859 # This is based on "A Fast & Effective Heuristic for the Feedback
860 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
861 # it as starting with the digraph G and moving all the vertices to
862 # be on a horizontal line in some order, trying to minimize the
863 # number of edges that end up pointing to the left. Left-pointing
864 # edges will get removed to turn the digraph into a DAG. In this
865 # case each edge has a weight which is the number of source blocks
866 # we'll lose if that edge is removed; we try to minimize the total
867 # weight rather than just the number of edges.
868
869 # Make a copy of the edge set; this copy will get destroyed by the
870 # algorithm.
871 for xf in self.transfers:
872 xf.incoming = xf.goes_after.copy()
873 xf.outgoing = xf.goes_before.copy()
874
875 # We use an OrderedDict instead of just a set so that the output
876 # is repeatable; otherwise it would depend on the hash values of
877 # the transfer objects.
878 G = OrderedDict()
879 for xf in self.transfers:
880 G[xf] = None
881 s1 = deque() # the left side of the sequence, built from left to right
882 s2 = deque() # the right side of the sequence, built from right to left
883
884 while G:
885
886 # Put all sinks at the end of the sequence.
887 while True:
888 sinks = [u for u in G if not u.outgoing]
Dan Albert8b72aef2015-03-23 19:13:21 -0700889 if not sinks:
890 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700891 for u in sinks:
892 s2.appendleft(u)
893 del G[u]
894 for iu in u.incoming:
895 del iu.outgoing[u]
896
897 # Put all the sources at the beginning of the sequence.
898 while True:
899 sources = [u for u in G if not u.incoming]
Dan Albert8b72aef2015-03-23 19:13:21 -0700900 if not sources:
901 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700902 for u in sources:
903 s1.append(u)
904 del G[u]
905 for iu in u.outgoing:
906 del iu.incoming[u]
907
Dan Albert8b72aef2015-03-23 19:13:21 -0700908 if not G:
909 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700910
911 # Find the "best" vertex to put next. "Best" is the one that
912 # maximizes the net difference in source blocks saved we get by
913 # pretending it's a source rather than a sink.
914
915 max_d = None
916 best_u = None
917 for u in G:
918 d = sum(u.outgoing.values()) - sum(u.incoming.values())
919 if best_u is None or d > max_d:
920 max_d = d
921 best_u = u
922
923 u = best_u
924 s1.append(u)
925 del G[u]
926 for iu in u.outgoing:
927 del iu.incoming[u]
928 for iu in u.incoming:
929 del iu.outgoing[u]
930
931 # Now record the sequence in the 'order' field of each transfer,
932 # and by rearranging self.transfers to be in the chosen sequence.
933
934 new_transfers = []
935 for x in itertools.chain(s1, s2):
936 x.order = len(new_transfers)
937 new_transfers.append(x)
938 del x.incoming
939 del x.outgoing
940
941 self.transfers = new_transfers
942
943 def GenerateDigraph(self):
944 print("Generating digraph...")
945 for a in self.transfers:
946 for b in self.transfers:
Dan Albert8b72aef2015-03-23 19:13:21 -0700947 if a is b:
948 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700949
950 # If the blocks written by A are read by B, then B needs to go before A.
951 i = a.tgt_ranges.intersect(b.src_ranges)
952 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700953 if b.src_name == "__ZERO":
954 # the cost of removing source blocks for the __ZERO domain
955 # is (nearly) zero.
956 size = 0
957 else:
958 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700959 b.goes_before[a] = size
960 a.goes_after[b] = size
961
962 def FindTransfers(self):
Tao Bao937847a2015-08-25 15:10:10 -0700963 """Parse the file_map to generate all the transfers."""
964
965 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
966 split=False):
967 """Wrapper function for adding a Transfer().
968
969 For BBOTA v3, we need to stash source blocks for resumable feature.
970 However, with the growth of file size and the shrink of the cache
971 partition source blocks are too large to be stashed. If a file occupies
972 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
973 into smaller pieces by getting multiple Transfer()s.
974
975 The downside is that after splitting, we can no longer use imgdiff but
976 only bsdiff."""
977
978 MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
979
980 # We care about diff transfers only.
981 if style != "diff" or not split:
982 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
983 return
984
985 # Change nothing for small files.
986 if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
987 src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
988 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
989 return
990
991 pieces = 0
992 while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
993 src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
994 tgt_split_name = "%s-%d" % (tgt_name, pieces)
995 src_split_name = "%s-%d" % (src_name, pieces)
996 tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
997 src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
998 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
999 by_id)
1000
1001 tgt_ranges = tgt_ranges.subtract(tgt_first)
1002 src_ranges = src_ranges.subtract(src_first)
1003 pieces += 1
1004
1005 # Handle remaining blocks.
1006 if tgt_ranges.size() or src_ranges.size():
1007 # Must be both non-empty.
1008 assert tgt_ranges.size() and src_ranges.size()
1009 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1010 src_split_name = "%s-%d" % (src_name, pieces)
1011 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1012 by_id)
1013
Doug Zongkerfc44a512014-08-26 13:10:25 -07001014 empty = RangeSet()
1015 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1016 if tgt_fn == "__ZERO":
1017 # the special "__ZERO" domain is all the blocks not contained
1018 # in any file and that are filled with zeros. We have a
1019 # special transfer style for zero blocks.
1020 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao937847a2015-08-25 15:10:10 -07001021 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1022 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001023 continue
1024
Tao Bao5ece99d2015-05-12 11:42:31 -07001025 elif tgt_fn == "__COPY":
1026 # "__COPY" domain includes all the blocks not contained in any
1027 # file and that need to be copied unconditionally to the target.
Tao Bao937847a2015-08-25 15:10:10 -07001028 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Bao5ece99d2015-05-12 11:42:31 -07001029 continue
1030
Doug Zongkerfc44a512014-08-26 13:10:25 -07001031 elif tgt_fn in self.src.file_map:
1032 # Look for an exact pathname match in the source.
Tao Bao937847a2015-08-25 15:10:10 -07001033 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1034 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001035 continue
1036
1037 b = os.path.basename(tgt_fn)
1038 if b in self.src_basenames:
1039 # Look for an exact basename match in the source.
1040 src_fn = self.src_basenames[b]
Tao Bao937847a2015-08-25 15:10:10 -07001041 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1042 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001043 continue
1044
1045 b = re.sub("[0-9]+", "#", b)
1046 if b in self.src_numpatterns:
1047 # Look for a 'number pattern' match (a basename match after
1048 # all runs of digits are replaced by "#"). (This is useful
1049 # for .so files that contain version numbers in the filename
1050 # that get bumped.)
1051 src_fn = self.src_numpatterns[b]
Tao Bao937847a2015-08-25 15:10:10 -07001052 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1053 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001054 continue
1055
Tao Bao937847a2015-08-25 15:10:10 -07001056 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001057
1058 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001059 for k in self.src.file_map.keys():
1060 b = os.path.basename(k)
1061 self.src_basenames[b] = k
1062 b = re.sub("[0-9]+", "#", b)
1063 self.src_numpatterns[b] = k
1064
1065 @staticmethod
1066 def AssertPartition(total, seq):
1067 """Assert that all the RangeSets in 'seq' form a partition of the
1068 'total' RangeSet (ie, they are nonintersecting and their union
1069 equals 'total')."""
1070 so_far = RangeSet()
1071 for i in seq:
1072 assert not so_far.overlaps(i)
1073 so_far = so_far.union(i)
1074 assert so_far == total