blob: bec577323f1e0ee3cb2bbd4ad84111c0e7650c08 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Tao Bao8dcf7382015-05-21 14:09:49 -070019import common
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070024import re
25import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import threading
27import tempfile
28
Dan Albert8b72aef2015-03-23 19:13:21 -070029from rangelib import RangeSet
30
Doug Zongkerfc44a512014-08-26 13:10:25 -070031
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070032__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
33
Dan Albert8b72aef2015-03-23 19:13:21 -070034
Doug Zongkerfc44a512014-08-26 13:10:25 -070035def compute_patch(src, tgt, imgdiff=False):
36 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
37 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
38 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
39 os.close(patchfd)
40
41 try:
42 with os.fdopen(srcfd, "wb") as f_src:
43 for p in src:
44 f_src.write(p)
45
46 with os.fdopen(tgtfd, "wb") as f_tgt:
47 for p in tgt:
48 f_tgt.write(p)
49 try:
50 os.unlink(patchfile)
51 except OSError:
52 pass
53 if imgdiff:
54 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
55 stdout=open("/dev/null", "a"),
56 stderr=subprocess.STDOUT)
57 else:
58 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
59
60 if p:
61 raise ValueError("diff failed: " + str(p))
62
63 with open(patchfile, "rb") as f:
64 return f.read()
65 finally:
66 try:
67 os.unlink(srcfile)
68 os.unlink(tgtfile)
69 os.unlink(patchfile)
70 except OSError:
71 pass
72
Dan Albert8b72aef2015-03-23 19:13:21 -070073
74class Image(object):
75 def ReadRangeSet(self, ranges):
76 raise NotImplementedError
77
Tao Bao68658c02015-06-01 13:40:49 -070078 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070079 raise NotImplementedError
80
81
82class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 """A zero-length image."""
84 blocksize = 4096
85 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070086 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070087 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 total_blocks = 0
89 file_map = {}
90 def ReadRangeSet(self, ranges):
91 return ()
Tao Bao68658c02015-06-01 13:40:49 -070092 def TotalSha1(self, include_clobbered_blocks=False):
93 # EmptyImage always carries empty clobbered_blocks, so
94 # include_clobbered_blocks can be ignored.
95 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070096 return sha1().hexdigest()
97
98
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700160 def ReadRangeSet(self, ranges):
161 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
162
Tao Bao68658c02015-06-01 13:40:49 -0700163 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700164 if not include_clobbered_blocks:
165 ranges = self.care_map.subtract(self.clobbered_blocks)
166 return sha1(self.ReadRangeSet(ranges)).hexdigest()
167 else:
168 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700169
Doug Zongkerfc44a512014-08-26 13:10:25 -0700170
171class Transfer(object):
172 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
173 self.tgt_name = tgt_name
174 self.src_name = src_name
175 self.tgt_ranges = tgt_ranges
176 self.src_ranges = src_ranges
177 self.style = style
178 self.intact = (getattr(tgt_ranges, "monotonic", False) and
179 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700180
181 # We use OrderedDict rather than dict so that the output is repeatable;
182 # otherwise it would depend on the hash values of the Transfer objects.
183 self.goes_before = OrderedDict()
184 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700185
Doug Zongker62338182014-09-08 08:29:55 -0700186 self.stash_before = []
187 self.use_stash = []
188
Doug Zongkerfc44a512014-08-26 13:10:25 -0700189 self.id = len(by_id)
190 by_id.append(self)
191
Doug Zongker62338182014-09-08 08:29:55 -0700192 def NetStashChange(self):
193 return (sum(sr.size() for (_, sr) in self.stash_before) -
194 sum(sr.size() for (_, sr) in self.use_stash))
195
Tao Bao82c47982015-08-17 09:45:13 -0700196 def ConvertToNew(self):
197 assert self.style != "new"
198 self.use_stash = []
199 self.style = "new"
200 self.src_ranges = RangeSet()
201
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202 def __str__(self):
203 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
204 " to " + str(self.tgt_ranges) + ">")
205
206
207# BlockImageDiff works on two image objects. An image object is
208# anything that provides the following attributes:
209#
210# blocksize: the size in bytes of a block, currently must be 4096.
211#
212# total_blocks: the total size of the partition/image, in blocks.
213#
214# care_map: a RangeSet containing which blocks (in the range [0,
215# total_blocks) we actually care about; i.e. which blocks contain
216# data.
217#
218# file_map: a dict that partitions the blocks contained in care_map
219# into smaller domains that are useful for doing diffs on.
220# (Typically a domain is a file, and the key in file_map is the
221# pathname.)
222#
Tao Baoff777812015-05-12 11:42:31 -0700223# clobbered_blocks: a RangeSet containing which blocks contain data
224# but may be altered by the FS. They need to be excluded when
225# verifying the partition integrity.
226#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# ReadRangeSet(): a function that takes a RangeSet and returns the
228# data contained in the image blocks of that RangeSet. The data
229# is returned as a list or tuple of strings; concatenating the
230# elements together should produce the requested data.
231# Implementations are free to break up the data into list/tuple
232# elements in any way that is convenient.
233#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700234# TotalSha1(): a function that returns (as a hex string) the SHA-1
235# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700236# care_map minus clobbered_blocks, or including the clobbered
237# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700238#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700239# When creating a BlockImageDiff, the src image may be None, in which
240# case the list of transfers produced will never read from the
241# original image.
242
243class BlockImageDiff(object):
Sami Tolvanendd67a292014-12-09 16:40:34 +0000244 def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700245 if threads is None:
246 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700247 if threads == 0:
248 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700249 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700250 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700251 self.transfers = []
252 self.src_basenames = {}
253 self.src_numpatterns = {}
Doug Zongker62338182014-09-08 08:29:55 -0700254
Sami Tolvanendd67a292014-12-09 16:40:34 +0000255 assert version in (1, 2, 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700256
257 self.tgt = tgt
258 if src is None:
259 src = EmptyImage()
260 self.src = src
261
262 # The updater code that installs the patch always uses 4k blocks.
263 assert tgt.blocksize == 4096
264 assert src.blocksize == 4096
265
266 # The range sets in each filemap should comprise a partition of
267 # the care map.
268 self.AssertPartition(src.care_map, src.file_map.values())
269 self.AssertPartition(tgt.care_map, tgt.file_map.values())
270
271 def Compute(self, prefix):
272 # When looking for a source file to use as the diff input for a
273 # target file, we try:
274 # 1) an exact path match if available, otherwise
275 # 2) a exact basename match if available, otherwise
276 # 3) a basename match after all runs of digits are replaced by
277 # "#" if available, otherwise
278 # 4) we have no source for this target.
279 self.AbbreviateSourceNames()
280 self.FindTransfers()
281
282 # Find the ordering dependencies among transfers (this is O(n^2)
283 # in the number of transfers).
284 self.GenerateDigraph()
285 # Find a sequence of transfers that satisfies as many ordering
286 # dependencies as possible (heuristically).
287 self.FindVertexSequence()
288 # Fix up the ordering dependencies that the sequence didn't
289 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700290 if self.version == 1:
291 self.RemoveBackwardEdges()
292 else:
293 self.ReverseBackwardEdges()
294 self.ImproveVertexSequence()
295
Tao Bao82c47982015-08-17 09:45:13 -0700296 # Ensure the runtime stash size is under the limit.
297 if self.version >= 2 and common.OPTIONS.cache_size is not None:
298 self.ReviseStashSize()
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 # Double-check our work.
301 self.AssertSequenceGood()
302
303 self.ComputePatches(prefix)
304 self.WriteTransfers(prefix)
305
Dan Albert8b72aef2015-03-23 19:13:21 -0700306 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000307 data = source.ReadRangeSet(ranges)
308 ctx = sha1()
309
310 for p in data:
311 ctx.update(p)
312
313 return ctx.hexdigest()
314
Doug Zongkerfc44a512014-08-26 13:10:25 -0700315 def WriteTransfers(self, prefix):
316 out = []
317
Doug Zongkerfc44a512014-08-26 13:10:25 -0700318 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700319
Doug Zongker62338182014-09-08 08:29:55 -0700320 stashes = {}
321 stashed_blocks = 0
322 max_stashed_blocks = 0
323
324 free_stash_ids = []
325 next_stash_id = 0
326
Doug Zongkerfc44a512014-08-26 13:10:25 -0700327 for xf in self.transfers:
328
Doug Zongker62338182014-09-08 08:29:55 -0700329 if self.version < 2:
330 assert not xf.stash_before
331 assert not xf.use_stash
332
333 for s, sr in xf.stash_before:
334 assert s not in stashes
335 if free_stash_ids:
336 sid = heapq.heappop(free_stash_ids)
337 else:
338 sid = next_stash_id
339 next_stash_id += 1
340 stashes[s] = sid
341 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000342 if self.version == 2:
343 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
344 else:
345 sh = self.HashBlocks(self.src, sr)
346 if sh in stashes:
347 stashes[sh] += 1
348 else:
349 stashes[sh] = 1
350 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700351
352 if stashed_blocks > max_stashed_blocks:
353 max_stashed_blocks = stashed_blocks
354
Jesse Zhao7b985f62015-03-02 16:53:08 -0800355 free_string = []
356
Doug Zongker62338182014-09-08 08:29:55 -0700357 if self.version == 1:
Dan Albert8b72aef2015-03-23 19:13:21 -0700358 src_str = xf.src_ranges.to_string_raw()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000359 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700360
361 # <# blocks> <src ranges>
362 # OR
363 # <# blocks> <src ranges> <src locs> <stash refs...>
364 # OR
365 # <# blocks> - <stash refs...>
366
367 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700368 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700369
370 unstashed_src_ranges = xf.src_ranges
371 mapped_stashes = []
372 for s, sr in xf.use_stash:
373 sid = stashes.pop(s)
374 stashed_blocks -= sr.size()
375 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000376 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700377 sr = xf.src_ranges.map_within(sr)
378 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000379 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700380 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700381 # A stash will be used only once. We need to free the stash
382 # immediately after the use, instead of waiting for the automatic
383 # clean-up at the end. Because otherwise it may take up extra space
384 # and lead to OTA failures.
385 # Bug: 23119955
386 free_string.append("free %d\n" % (sid,))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000387 else:
388 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700389 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000390 stashes[sh] -= 1
391 if stashes[sh] == 0:
392 free_string.append("free %s\n" % (sh))
393 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700394 heapq.heappush(free_stash_ids, sid)
395
396 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700397 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700398 if xf.use_stash:
399 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700400 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700401 mapped_stashes.append(mapped_unstashed)
402 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
403 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700404 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700405 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
406
Dan Albert8b72aef2015-03-23 19:13:21 -0700407 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700408
Sami Tolvanendd67a292014-12-09 16:40:34 +0000409 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700410 # zero <rangeset>
411 # new <rangeset>
412 # erase <rangeset>
413 #
414 # version 1:
415 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
416 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
417 # move <src rangeset> <tgt rangeset>
418 #
419 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700420 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
421 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
422 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000423 #
424 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700425 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
426 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
427 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700428
429 tgt_size = xf.tgt_ranges.size()
430
431 if xf.style == "new":
432 assert xf.tgt_ranges
433 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
434 total += tgt_size
435 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700436 assert xf.tgt_ranges
437 assert xf.src_ranges.size() == tgt_size
438 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700439 if self.version == 1:
440 out.append("%s %s %s\n" % (
441 xf.style,
442 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
443 elif self.version == 2:
444 out.append("%s %s %s\n" % (
445 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700446 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000447 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100448 # take into account automatic stashing of overlapping blocks
449 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700450 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100451 if temp_stash_usage > max_stashed_blocks:
452 max_stashed_blocks = temp_stash_usage
453
Sami Tolvanendd67a292014-12-09 16:40:34 +0000454 out.append("%s %s %s %s\n" % (
455 xf.style,
456 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700457 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700458 total += tgt_size
459 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700460 assert xf.tgt_ranges
461 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700462 if self.version == 1:
463 out.append("%s %d %d %s %s\n" % (
464 xf.style, xf.patch_start, xf.patch_len,
465 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
466 elif self.version == 2:
467 out.append("%s %d %d %s %s\n" % (
468 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700469 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000470 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100471 # take into account automatic stashing of overlapping blocks
472 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700473 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100474 if temp_stash_usage > max_stashed_blocks:
475 max_stashed_blocks = temp_stash_usage
476
Sami Tolvanendd67a292014-12-09 16:40:34 +0000477 out.append("%s %d %d %s %s %s %s\n" % (
478 xf.style,
479 xf.patch_start, xf.patch_len,
480 self.HashBlocks(self.src, xf.src_ranges),
481 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700482 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700483 total += tgt_size
484 elif xf.style == "zero":
485 assert xf.tgt_ranges
486 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
487 if to_zero:
488 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
489 total += to_zero.size()
490 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700491 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700492
Sami Tolvanendd67a292014-12-09 16:40:34 +0000493 if free_string:
494 out.append("".join(free_string))
495
Tao Bao575d68a2015-08-07 19:49:45 -0700496 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700497 # Sanity check: abort if we're going to need more stash space than
498 # the allowed size (cache_size * threshold). There are two purposes
499 # of having a threshold here. a) Part of the cache may have been
500 # occupied by some recovery logs. b) It will buy us some time to deal
501 # with the oversize issue.
502 cache_size = common.OPTIONS.cache_size
503 stash_threshold = common.OPTIONS.stash_threshold
504 max_allowed = cache_size * stash_threshold
505 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
506 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
507 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
508 self.tgt.blocksize, max_allowed, cache_size,
509 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700510
Tao Baoe9b61912015-07-09 17:37:49 -0700511 # Zero out extended blocks as a workaround for bug 20881595.
512 if self.tgt.extended:
513 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
514
515 # We erase all the blocks on the partition that a) don't contain useful
516 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700517 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700518 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
519 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
520 if new_dontcare:
521 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700522
523 out.insert(0, "%d\n" % (self.version,)) # format version number
524 out.insert(1, str(total) + "\n")
525 if self.version >= 2:
526 # version 2 only: after the total block count, we give the number
527 # of stash slots needed, and the maximum size needed (in blocks)
528 out.insert(2, str(next_stash_id) + "\n")
529 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700530
531 with open(prefix + ".transfer.list", "wb") as f:
532 for i in out:
533 f.write(i)
534
Doug Zongker62338182014-09-08 08:29:55 -0700535 if self.version >= 2:
Tao Bao8dcf7382015-05-21 14:09:49 -0700536 max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700537 OPTIONS = common.OPTIONS
538 if OPTIONS.cache_size is not None:
539 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
540 print("max stashed blocks: %d (%d bytes), "
541 "limit: %d bytes (%.2f%%)\n" % (
542 max_stashed_blocks, max_stashed_size, max_allowed,
543 max_stashed_size * 100.0 / max_allowed))
544 else:
545 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
546 max_stashed_blocks, max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700547
Tao Bao82c47982015-08-17 09:45:13 -0700548 def ReviseStashSize(self):
549 print("Revising stash size...")
550 stashes = {}
551
552 # Create the map between a stash and its def/use points. For example, for a
553 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
554 for xf in self.transfers:
555 # Command xf defines (stores) all the stashes in stash_before.
556 for idx, sr in xf.stash_before:
557 stashes[idx] = (sr, xf)
558
559 # Record all the stashes command xf uses.
560 for idx, _ in xf.use_stash:
561 stashes[idx] += (xf,)
562
563 # Compute the maximum blocks available for stash based on /cache size and
564 # the threshold.
565 cache_size = common.OPTIONS.cache_size
566 stash_threshold = common.OPTIONS.stash_threshold
567 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
568
569 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700570 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700571
572 # Now go through all the commands. Compute the required stash size on the
573 # fly. If a command requires excess stash than available, it deletes the
574 # stash by replacing the command that uses the stash with a "new" command
575 # instead.
576 for xf in self.transfers:
577 replaced_cmds = []
578
579 # xf.stash_before generates explicit stash commands.
580 for idx, sr in xf.stash_before:
581 if stashed_blocks + sr.size() > max_allowed:
582 # We cannot stash this one for a later command. Find out the command
583 # that will use this stash and replace the command with "new".
584 use_cmd = stashes[idx][2]
585 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700586 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700587 else:
588 stashed_blocks += sr.size()
589
590 # xf.use_stash generates free commands.
591 for _, sr in xf.use_stash:
592 stashed_blocks -= sr.size()
593
594 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
595 # ComputePatches(), they both have the style of "diff".
596 if xf.style == "diff" and self.version >= 3:
597 assert xf.tgt_ranges and xf.src_ranges
598 if xf.src_ranges.overlaps(xf.tgt_ranges):
599 if stashed_blocks + xf.src_ranges.size() > max_allowed:
600 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700601 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700602
603 # Replace the commands in replaced_cmds with "new"s.
604 for cmd in replaced_cmds:
605 # It no longer uses any commands in "use_stash". Remove the def points
606 # for all those stashes.
607 for idx, sr in cmd.use_stash:
608 def_cmd = stashes[idx][1]
609 assert (idx, sr) in def_cmd.stash_before
610 def_cmd.stash_before.remove((idx, sr))
Tao Bao9a5caf22015-08-25 15:10:10 -0700611 new_blocks += sr.size()
Tao Bao82c47982015-08-17 09:45:13 -0700612
613 cmd.ConvertToNew()
614
Tao Bao9a5caf22015-08-25 15:10:10 -0700615 print(" Total %d blocks are packed as new blocks due to insufficient "
616 "cache size." % (new_blocks,))
617
Doug Zongkerfc44a512014-08-26 13:10:25 -0700618 def ComputePatches(self, prefix):
619 print("Reticulating splines...")
620 diff_q = []
621 patch_num = 0
622 with open(prefix + ".new.dat", "wb") as new_f:
623 for xf in self.transfers:
624 if xf.style == "zero":
625 pass
626 elif xf.style == "new":
627 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
628 new_f.write(piece)
629 elif xf.style == "diff":
630 src = self.src.ReadRangeSet(xf.src_ranges)
631 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
632
633 # We can't compare src and tgt directly because they may have
634 # the same content but be broken up into blocks differently, eg:
635 #
636 # ["he", "llo"] vs ["h", "ello"]
637 #
638 # We want those to compare equal, ideally without having to
639 # actually concatenate the strings (these may be tens of
640 # megabytes).
641
642 src_sha1 = sha1()
643 for p in src:
644 src_sha1.update(p)
645 tgt_sha1 = sha1()
646 tgt_size = 0
647 for p in tgt:
648 tgt_sha1.update(p)
649 tgt_size += len(p)
650
651 if src_sha1.digest() == tgt_sha1.digest():
652 # These are identical; we don't need to generate a patch,
653 # just issue copy commands on the device.
654 xf.style = "move"
655 else:
656 # For files in zip format (eg, APKs, JARs, etc.) we would
657 # like to use imgdiff -z if possible (because it usually
658 # produces significantly smaller patches than bsdiff).
659 # This is permissible if:
660 #
661 # - the source and target files are monotonic (ie, the
662 # data is stored with blocks in increasing order), and
663 # - we haven't removed any blocks from the source set.
664 #
665 # If these conditions are satisfied then appending all the
666 # blocks in the set together in order will produce a valid
667 # zip file (plus possibly extra zeros in the last block),
668 # which is what imgdiff needs to operate. (imgdiff is
669 # fine with extra zeros at the end of the file.)
670 imgdiff = (xf.intact and
671 xf.tgt_name.split(".")[-1].lower()
672 in ("apk", "jar", "zip"))
673 xf.style = "imgdiff" if imgdiff else "bsdiff"
674 diff_q.append((tgt_size, src, tgt, xf, patch_num))
675 patch_num += 1
676
677 else:
678 assert False, "unknown style " + xf.style
679
680 if diff_q:
681 if self.threads > 1:
682 print("Computing patches (using %d threads)..." % (self.threads,))
683 else:
684 print("Computing patches...")
685 diff_q.sort()
686
687 patches = [None] * patch_num
688
Dan Albert8b72aef2015-03-23 19:13:21 -0700689 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700690 lock = threading.Lock()
691 def diff_worker():
692 while True:
693 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700694 if not diff_q:
695 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700696 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
697 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
698 size = len(patch)
699 with lock:
700 patches[patchnum] = (patch, xf)
701 print("%10d %10d (%6.2f%%) %7s %s" % (
702 size, tgt_size, size * 100.0 / tgt_size, xf.style,
703 xf.tgt_name if xf.tgt_name == xf.src_name else (
704 xf.tgt_name + " (from " + xf.src_name + ")")))
705
706 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700707 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700708 for th in threads:
709 th.start()
710 while threads:
711 threads.pop().join()
712 else:
713 patches = []
714
715 p = 0
716 with open(prefix + ".patch.dat", "wb") as patch_f:
717 for patch, xf in patches:
718 xf.patch_start = p
719 xf.patch_len = len(patch)
720 patch_f.write(patch)
721 p += len(patch)
722
723 def AssertSequenceGood(self):
724 # Simulate the sequences of transfers we will output, and check that:
725 # - we never read a block after writing it, and
726 # - we write every block we care about exactly once.
727
728 # Start with no blocks having been touched yet.
729 touched = RangeSet()
730
731 # Imagine processing the transfers in order.
732 for xf in self.transfers:
733 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700734
735 x = xf.src_ranges
736 if self.version >= 2:
737 for _, sr in xf.use_stash:
738 x = x.subtract(sr)
739
740 assert not touched.overlaps(x)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700741 # Check that the output blocks for this transfer haven't yet been touched.
742 assert not touched.overlaps(xf.tgt_ranges)
743 # Touch all the blocks written by this transfer.
744 touched = touched.union(xf.tgt_ranges)
745
746 # Check that we've written every target block.
747 assert touched == self.tgt.care_map
748
Doug Zongker62338182014-09-08 08:29:55 -0700749 def ImproveVertexSequence(self):
750 print("Improving vertex order...")
751
752 # At this point our digraph is acyclic; we reversed any edges that
753 # were backwards in the heuristically-generated sequence. The
754 # previously-generated order is still acceptable, but we hope to
755 # find a better order that needs less memory for stashed data.
756 # Now we do a topological sort to generate a new vertex order,
757 # using a greedy algorithm to choose which vertex goes next
758 # whenever we have a choice.
759
760 # Make a copy of the edge set; this copy will get destroyed by the
761 # algorithm.
762 for xf in self.transfers:
763 xf.incoming = xf.goes_after.copy()
764 xf.outgoing = xf.goes_before.copy()
765
766 L = [] # the new vertex order
767
768 # S is the set of sources in the remaining graph; we always choose
769 # the one that leaves the least amount of stashed data after it's
770 # executed.
771 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
772 if not u.incoming]
773 heapq.heapify(S)
774
775 while S:
776 _, _, xf = heapq.heappop(S)
777 L.append(xf)
778 for u in xf.outgoing:
779 del u.incoming[xf]
780 if not u.incoming:
781 heapq.heappush(S, (u.NetStashChange(), u.order, u))
782
783 # if this fails then our graph had a cycle.
784 assert len(L) == len(self.transfers)
785
786 self.transfers = L
787 for i, xf in enumerate(L):
788 xf.order = i
789
Doug Zongkerfc44a512014-08-26 13:10:25 -0700790 def RemoveBackwardEdges(self):
791 print("Removing backward edges...")
792 in_order = 0
793 out_of_order = 0
794 lost_source = 0
795
796 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700797 lost = 0
798 size = xf.src_ranges.size()
799 for u in xf.goes_before:
800 # xf should go before u
801 if xf.order < u.order:
802 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700803 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700804 else:
805 # it doesn't, boo. trim the blocks that u writes from xf's
806 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700807 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700808 assert xf.src_ranges.overlaps(u.tgt_ranges)
809 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
810 xf.intact = False
811
812 if xf.style == "diff" and not xf.src_ranges:
813 # nothing left to diff from; treat as new data
814 xf.style = "new"
815
816 lost = size - xf.src_ranges.size()
817 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700818
819 print((" %d/%d dependencies (%.2f%%) were violated; "
820 "%d source blocks removed.") %
821 (out_of_order, in_order + out_of_order,
822 (out_of_order * 100.0 / (in_order + out_of_order))
823 if (in_order + out_of_order) else 0.0,
824 lost_source))
825
Doug Zongker62338182014-09-08 08:29:55 -0700826 def ReverseBackwardEdges(self):
827 print("Reversing backward edges...")
828 in_order = 0
829 out_of_order = 0
830 stashes = 0
831 stash_size = 0
832
833 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700834 for u in xf.goes_before.copy():
835 # xf should go before u
836 if xf.order < u.order:
837 # it does, hurray!
838 in_order += 1
839 else:
840 # it doesn't, boo. modify u to stash the blocks that it
841 # writes that xf wants to read, and then require u to go
842 # before xf.
843 out_of_order += 1
844
845 overlap = xf.src_ranges.intersect(u.tgt_ranges)
846 assert overlap
847
848 u.stash_before.append((stashes, overlap))
849 xf.use_stash.append((stashes, overlap))
850 stashes += 1
851 stash_size += overlap.size()
852
853 # reverse the edge direction; now xf must go after u
854 del xf.goes_before[u]
855 del u.goes_after[xf]
856 xf.goes_after[u] = None # value doesn't matter
857 u.goes_before[xf] = None
858
859 print((" %d/%d dependencies (%.2f%%) were violated; "
860 "%d source blocks stashed.") %
861 (out_of_order, in_order + out_of_order,
862 (out_of_order * 100.0 / (in_order + out_of_order))
863 if (in_order + out_of_order) else 0.0,
864 stash_size))
865
Doug Zongkerfc44a512014-08-26 13:10:25 -0700866 def FindVertexSequence(self):
867 print("Finding vertex sequence...")
868
869 # This is based on "A Fast & Effective Heuristic for the Feedback
870 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
871 # it as starting with the digraph G and moving all the vertices to
872 # be on a horizontal line in some order, trying to minimize the
873 # number of edges that end up pointing to the left. Left-pointing
874 # edges will get removed to turn the digraph into a DAG. In this
875 # case each edge has a weight which is the number of source blocks
876 # we'll lose if that edge is removed; we try to minimize the total
877 # weight rather than just the number of edges.
878
879 # Make a copy of the edge set; this copy will get destroyed by the
880 # algorithm.
881 for xf in self.transfers:
882 xf.incoming = xf.goes_after.copy()
883 xf.outgoing = xf.goes_before.copy()
884
885 # We use an OrderedDict instead of just a set so that the output
886 # is repeatable; otherwise it would depend on the hash values of
887 # the transfer objects.
888 G = OrderedDict()
889 for xf in self.transfers:
890 G[xf] = None
891 s1 = deque() # the left side of the sequence, built from left to right
892 s2 = deque() # the right side of the sequence, built from right to left
893
894 while G:
895
896 # Put all sinks at the end of the sequence.
897 while True:
898 sinks = [u for u in G if not u.outgoing]
Dan Albert8b72aef2015-03-23 19:13:21 -0700899 if not sinks:
900 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700901 for u in sinks:
902 s2.appendleft(u)
903 del G[u]
904 for iu in u.incoming:
905 del iu.outgoing[u]
906
907 # Put all the sources at the beginning of the sequence.
908 while True:
909 sources = [u for u in G if not u.incoming]
Dan Albert8b72aef2015-03-23 19:13:21 -0700910 if not sources:
911 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700912 for u in sources:
913 s1.append(u)
914 del G[u]
915 for iu in u.outgoing:
916 del iu.incoming[u]
917
Dan Albert8b72aef2015-03-23 19:13:21 -0700918 if not G:
919 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700920
921 # Find the "best" vertex to put next. "Best" is the one that
922 # maximizes the net difference in source blocks saved we get by
923 # pretending it's a source rather than a sink.
924
925 max_d = None
926 best_u = None
927 for u in G:
928 d = sum(u.outgoing.values()) - sum(u.incoming.values())
929 if best_u is None or d > max_d:
930 max_d = d
931 best_u = u
932
933 u = best_u
934 s1.append(u)
935 del G[u]
936 for iu in u.outgoing:
937 del iu.incoming[u]
938 for iu in u.incoming:
939 del iu.outgoing[u]
940
941 # Now record the sequence in the 'order' field of each transfer,
942 # and by rearranging self.transfers to be in the chosen sequence.
943
944 new_transfers = []
945 for x in itertools.chain(s1, s2):
946 x.order = len(new_transfers)
947 new_transfers.append(x)
948 del x.incoming
949 del x.outgoing
950
951 self.transfers = new_transfers
952
953 def GenerateDigraph(self):
954 print("Generating digraph...")
955 for a in self.transfers:
956 for b in self.transfers:
Dan Albert8b72aef2015-03-23 19:13:21 -0700957 if a is b:
958 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700959
960 # If the blocks written by A are read by B, then B needs to go before A.
961 i = a.tgt_ranges.intersect(b.src_ranges)
962 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700963 if b.src_name == "__ZERO":
964 # the cost of removing source blocks for the __ZERO domain
965 # is (nearly) zero.
966 size = 0
967 else:
968 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700969 b.goes_before[a] = size
970 a.goes_after[b] = size
971
972 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -0700973 """Parse the file_map to generate all the transfers."""
974
975 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
976 split=False):
977 """Wrapper function for adding a Transfer().
978
979 For BBOTA v3, we need to stash source blocks for resumable feature.
980 However, with the growth of file size and the shrink of the cache
981 partition source blocks are too large to be stashed. If a file occupies
982 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
983 into smaller pieces by getting multiple Transfer()s.
984
985 The downside is that after splitting, we can no longer use imgdiff but
986 only bsdiff."""
987
988 MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
989
990 # We care about diff transfers only.
991 if style != "diff" or not split:
992 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
993 return
994
995 # Change nothing for small files.
996 if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
997 src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
998 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
999 return
1000
1001 pieces = 0
1002 while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
1003 src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
1004 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1005 src_split_name = "%s-%d" % (src_name, pieces)
1006 tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
1007 src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
1008 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1009 by_id)
1010
1011 tgt_ranges = tgt_ranges.subtract(tgt_first)
1012 src_ranges = src_ranges.subtract(src_first)
1013 pieces += 1
1014
1015 # Handle remaining blocks.
1016 if tgt_ranges.size() or src_ranges.size():
1017 # Must be both non-empty.
1018 assert tgt_ranges.size() and src_ranges.size()
1019 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1020 src_split_name = "%s-%d" % (src_name, pieces)
1021 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1022 by_id)
1023
Doug Zongkerfc44a512014-08-26 13:10:25 -07001024 empty = RangeSet()
1025 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1026 if tgt_fn == "__ZERO":
1027 # the special "__ZERO" domain is all the blocks not contained
1028 # in any file and that are filled with zeros. We have a
1029 # special transfer style for zero blocks.
1030 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001031 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1032 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001033 continue
1034
Tao Baoff777812015-05-12 11:42:31 -07001035 elif tgt_fn == "__COPY":
1036 # "__COPY" domain includes all the blocks not contained in any
1037 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001038 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001039 continue
1040
Doug Zongkerfc44a512014-08-26 13:10:25 -07001041 elif tgt_fn in self.src.file_map:
1042 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001043 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1044 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001045 continue
1046
1047 b = os.path.basename(tgt_fn)
1048 if b in self.src_basenames:
1049 # Look for an exact basename match in the source.
1050 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001051 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1052 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001053 continue
1054
1055 b = re.sub("[0-9]+", "#", b)
1056 if b in self.src_numpatterns:
1057 # Look for a 'number pattern' match (a basename match after
1058 # all runs of digits are replaced by "#"). (This is useful
1059 # for .so files that contain version numbers in the filename
1060 # that get bumped.)
1061 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001062 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1063 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001064 continue
1065
Tao Bao9a5caf22015-08-25 15:10:10 -07001066 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001067
1068 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001069 for k in self.src.file_map.keys():
1070 b = os.path.basename(k)
1071 self.src_basenames[b] = k
1072 b = re.sub("[0-9]+", "#", b)
1073 self.src_numpatterns[b] = k
1074
1075 @staticmethod
1076 def AssertPartition(total, seq):
1077 """Assert that all the RangeSets in 'seq' form a partition of the
1078 'total' RangeSet (ie, they are nonintersecting and their union
1079 equals 'total')."""
1080 so_far = RangeSet()
1081 for i in seq:
1082 assert not so_far.overlaps(i)
1083 so_far = so_far.union(i)
1084 assert so_far == total