blob: 1d338eebc364d6ac98135aec7f11b5404227269a [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Tao Bao8dcf7382015-05-21 14:09:49 -070019import common
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070024import re
25import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import threading
27import tempfile
28
Dan Albert8b72aef2015-03-23 19:13:21 -070029from rangelib import RangeSet
30
Doug Zongkerfc44a512014-08-26 13:10:25 -070031
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070032__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
33
Dan Albert8b72aef2015-03-23 19:13:21 -070034
Doug Zongkerfc44a512014-08-26 13:10:25 -070035def compute_patch(src, tgt, imgdiff=False):
36 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
37 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
38 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
39 os.close(patchfd)
40
41 try:
42 with os.fdopen(srcfd, "wb") as f_src:
43 for p in src:
44 f_src.write(p)
45
46 with os.fdopen(tgtfd, "wb") as f_tgt:
47 for p in tgt:
48 f_tgt.write(p)
49 try:
50 os.unlink(patchfile)
51 except OSError:
52 pass
53 if imgdiff:
54 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
55 stdout=open("/dev/null", "a"),
56 stderr=subprocess.STDOUT)
57 else:
58 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
59
60 if p:
61 raise ValueError("diff failed: " + str(p))
62
63 with open(patchfile, "rb") as f:
64 return f.read()
65 finally:
66 try:
67 os.unlink(srcfile)
68 os.unlink(tgtfile)
69 os.unlink(patchfile)
70 except OSError:
71 pass
72
Dan Albert8b72aef2015-03-23 19:13:21 -070073
74class Image(object):
75 def ReadRangeSet(self, ranges):
76 raise NotImplementedError
77
Tao Bao68658c02015-06-01 13:40:49 -070078 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070079 raise NotImplementedError
80
81
82class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 """A zero-length image."""
84 blocksize = 4096
85 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070086 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070087 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 total_blocks = 0
89 file_map = {}
90 def ReadRangeSet(self, ranges):
91 return ()
Tao Bao68658c02015-06-01 13:40:49 -070092 def TotalSha1(self, include_clobbered_blocks=False):
93 # EmptyImage always carries empty clobbered_blocks, so
94 # include_clobbered_blocks can be ignored.
95 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070096 return sha1().hexdigest()
97
98
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700160 def ReadRangeSet(self, ranges):
161 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
162
Tao Bao68658c02015-06-01 13:40:49 -0700163 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700164 if not include_clobbered_blocks:
165 ranges = self.care_map.subtract(self.clobbered_blocks)
166 return sha1(self.ReadRangeSet(ranges)).hexdigest()
167 else:
168 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700169
Doug Zongkerfc44a512014-08-26 13:10:25 -0700170
171class Transfer(object):
172 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
173 self.tgt_name = tgt_name
174 self.src_name = src_name
175 self.tgt_ranges = tgt_ranges
176 self.src_ranges = src_ranges
177 self.style = style
178 self.intact = (getattr(tgt_ranges, "monotonic", False) and
179 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700180
181 # We use OrderedDict rather than dict so that the output is repeatable;
182 # otherwise it would depend on the hash values of the Transfer objects.
183 self.goes_before = OrderedDict()
184 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700185
Doug Zongker62338182014-09-08 08:29:55 -0700186 self.stash_before = []
187 self.use_stash = []
188
Doug Zongkerfc44a512014-08-26 13:10:25 -0700189 self.id = len(by_id)
190 by_id.append(self)
191
Doug Zongker62338182014-09-08 08:29:55 -0700192 def NetStashChange(self):
193 return (sum(sr.size() for (_, sr) in self.stash_before) -
194 sum(sr.size() for (_, sr) in self.use_stash))
195
Tao Bao82c47982015-08-17 09:45:13 -0700196 def ConvertToNew(self):
197 assert self.style != "new"
198 self.use_stash = []
199 self.style = "new"
200 self.src_ranges = RangeSet()
201
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202 def __str__(self):
203 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
204 " to " + str(self.tgt_ranges) + ">")
205
206
207# BlockImageDiff works on two image objects. An image object is
208# anything that provides the following attributes:
209#
210# blocksize: the size in bytes of a block, currently must be 4096.
211#
212# total_blocks: the total size of the partition/image, in blocks.
213#
214# care_map: a RangeSet containing which blocks (in the range [0,
215# total_blocks) we actually care about; i.e. which blocks contain
216# data.
217#
218# file_map: a dict that partitions the blocks contained in care_map
219# into smaller domains that are useful for doing diffs on.
220# (Typically a domain is a file, and the key in file_map is the
221# pathname.)
222#
Tao Baoff777812015-05-12 11:42:31 -0700223# clobbered_blocks: a RangeSet containing which blocks contain data
224# but may be altered by the FS. They need to be excluded when
225# verifying the partition integrity.
226#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# ReadRangeSet(): a function that takes a RangeSet and returns the
228# data contained in the image blocks of that RangeSet. The data
229# is returned as a list or tuple of strings; concatenating the
230# elements together should produce the requested data.
231# Implementations are free to break up the data into list/tuple
232# elements in any way that is convenient.
233#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700234# TotalSha1(): a function that returns (as a hex string) the SHA-1
235# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700236# care_map minus clobbered_blocks, or including the clobbered
237# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700238#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700239# When creating a BlockImageDiff, the src image may be None, in which
240# case the list of transfers produced will never read from the
241# original image.
242
243class BlockImageDiff(object):
Tao Baoeba409c2015-10-21 13:30:43 -0700244 def __init__(self, tgt, src=None, threads=None, version=4):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700245 if threads is None:
246 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700247 if threads == 0:
248 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700249 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700250 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700251 self.transfers = []
252 self.src_basenames = {}
253 self.src_numpatterns = {}
Doug Zongker62338182014-09-08 08:29:55 -0700254
Tao Baoeba409c2015-10-21 13:30:43 -0700255 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700256
257 self.tgt = tgt
258 if src is None:
259 src = EmptyImage()
260 self.src = src
261
262 # The updater code that installs the patch always uses 4k blocks.
263 assert tgt.blocksize == 4096
264 assert src.blocksize == 4096
265
266 # The range sets in each filemap should comprise a partition of
267 # the care map.
268 self.AssertPartition(src.care_map, src.file_map.values())
269 self.AssertPartition(tgt.care_map, tgt.file_map.values())
270
271 def Compute(self, prefix):
272 # When looking for a source file to use as the diff input for a
273 # target file, we try:
274 # 1) an exact path match if available, otherwise
275 # 2) a exact basename match if available, otherwise
276 # 3) a basename match after all runs of digits are replaced by
277 # "#" if available, otherwise
278 # 4) we have no source for this target.
279 self.AbbreviateSourceNames()
280 self.FindTransfers()
281
282 # Find the ordering dependencies among transfers (this is O(n^2)
283 # in the number of transfers).
284 self.GenerateDigraph()
285 # Find a sequence of transfers that satisfies as many ordering
286 # dependencies as possible (heuristically).
287 self.FindVertexSequence()
288 # Fix up the ordering dependencies that the sequence didn't
289 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700290 if self.version == 1:
291 self.RemoveBackwardEdges()
292 else:
293 self.ReverseBackwardEdges()
294 self.ImproveVertexSequence()
295
Tao Bao82c47982015-08-17 09:45:13 -0700296 # Ensure the runtime stash size is under the limit.
297 if self.version >= 2 and common.OPTIONS.cache_size is not None:
298 self.ReviseStashSize()
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 # Double-check our work.
301 self.AssertSequenceGood()
302
303 self.ComputePatches(prefix)
304 self.WriteTransfers(prefix)
305
Dan Albert8b72aef2015-03-23 19:13:21 -0700306 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000307 data = source.ReadRangeSet(ranges)
308 ctx = sha1()
309
310 for p in data:
311 ctx.update(p)
312
313 return ctx.hexdigest()
314
Doug Zongkerfc44a512014-08-26 13:10:25 -0700315 def WriteTransfers(self, prefix):
316 out = []
317
Doug Zongkerfc44a512014-08-26 13:10:25 -0700318 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700319
Doug Zongker62338182014-09-08 08:29:55 -0700320 stashes = {}
321 stashed_blocks = 0
322 max_stashed_blocks = 0
323
324 free_stash_ids = []
325 next_stash_id = 0
326
Doug Zongkerfc44a512014-08-26 13:10:25 -0700327 for xf in self.transfers:
328
Doug Zongker62338182014-09-08 08:29:55 -0700329 if self.version < 2:
330 assert not xf.stash_before
331 assert not xf.use_stash
332
333 for s, sr in xf.stash_before:
334 assert s not in stashes
335 if free_stash_ids:
336 sid = heapq.heappop(free_stash_ids)
337 else:
338 sid = next_stash_id
339 next_stash_id += 1
340 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000341 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800342 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000343 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
344 else:
345 sh = self.HashBlocks(self.src, sr)
346 if sh in stashes:
347 stashes[sh] += 1
348 else:
349 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800350 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000351 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700352
353 if stashed_blocks > max_stashed_blocks:
354 max_stashed_blocks = stashed_blocks
355
Jesse Zhao7b985f62015-03-02 16:53:08 -0800356 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800357 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800358
Doug Zongker62338182014-09-08 08:29:55 -0700359 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700360 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000361 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700362
363 # <# blocks> <src ranges>
364 # OR
365 # <# blocks> <src ranges> <src locs> <stash refs...>
366 # OR
367 # <# blocks> - <stash refs...>
368
369 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700370 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700371
372 unstashed_src_ranges = xf.src_ranges
373 mapped_stashes = []
374 for s, sr in xf.use_stash:
375 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700376 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000377 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700378 sr = xf.src_ranges.map_within(sr)
379 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000380 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700381 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700382 # A stash will be used only once. We need to free the stash
383 # immediately after the use, instead of waiting for the automatic
384 # clean-up at the end. Because otherwise it may take up extra space
385 # and lead to OTA failures.
386 # Bug: 23119955
387 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800388 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000389 else:
390 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700391 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000392 stashes[sh] -= 1
393 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800394 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000395 free_string.append("free %s\n" % (sh))
396 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700397 heapq.heappush(free_stash_ids, sid)
398
399 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700400 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700401 if xf.use_stash:
402 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700403 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700404 mapped_stashes.append(mapped_unstashed)
405 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
406 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700407 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700408 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
409
Dan Albert8b72aef2015-03-23 19:13:21 -0700410 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700411
Sami Tolvanendd67a292014-12-09 16:40:34 +0000412 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700413 # zero <rangeset>
414 # new <rangeset>
415 # erase <rangeset>
416 #
417 # version 1:
418 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
419 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
420 # move <src rangeset> <tgt rangeset>
421 #
422 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700423 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
424 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
425 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000426 #
427 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700428 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
429 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
430 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700431
432 tgt_size = xf.tgt_ranges.size()
433
434 if xf.style == "new":
435 assert xf.tgt_ranges
436 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
437 total += tgt_size
438 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700439 assert xf.tgt_ranges
440 assert xf.src_ranges.size() == tgt_size
441 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700442 if self.version == 1:
443 out.append("%s %s %s\n" % (
444 xf.style,
445 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
446 elif self.version == 2:
447 out.append("%s %s %s\n" % (
448 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700449 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000450 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100451 # take into account automatic stashing of overlapping blocks
452 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700453 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100454 if temp_stash_usage > max_stashed_blocks:
455 max_stashed_blocks = temp_stash_usage
456
Sami Tolvanendd67a292014-12-09 16:40:34 +0000457 out.append("%s %s %s %s\n" % (
458 xf.style,
459 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700460 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700461 total += tgt_size
462 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700463 assert xf.tgt_ranges
464 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700465 if self.version == 1:
466 out.append("%s %d %d %s %s\n" % (
467 xf.style, xf.patch_start, xf.patch_len,
468 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
469 elif self.version == 2:
470 out.append("%s %d %d %s %s\n" % (
471 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700472 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000473 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100474 # take into account automatic stashing of overlapping blocks
475 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700476 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100477 if temp_stash_usage > max_stashed_blocks:
478 max_stashed_blocks = temp_stash_usage
479
Sami Tolvanendd67a292014-12-09 16:40:34 +0000480 out.append("%s %d %d %s %s %s %s\n" % (
481 xf.style,
482 xf.patch_start, xf.patch_len,
483 self.HashBlocks(self.src, xf.src_ranges),
484 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700485 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700486 total += tgt_size
487 elif xf.style == "zero":
488 assert xf.tgt_ranges
489 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
490 if to_zero:
491 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
492 total += to_zero.size()
493 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700494 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700495
Sami Tolvanendd67a292014-12-09 16:40:34 +0000496 if free_string:
497 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800498 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000499
Tao Bao575d68a2015-08-07 19:49:45 -0700500 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700501 # Sanity check: abort if we're going to need more stash space than
502 # the allowed size (cache_size * threshold). There are two purposes
503 # of having a threshold here. a) Part of the cache may have been
504 # occupied by some recovery logs. b) It will buy us some time to deal
505 # with the oversize issue.
506 cache_size = common.OPTIONS.cache_size
507 stash_threshold = common.OPTIONS.stash_threshold
508 max_allowed = cache_size * stash_threshold
509 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
510 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
511 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
512 self.tgt.blocksize, max_allowed, cache_size,
513 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700514
Tao Baoe9b61912015-07-09 17:37:49 -0700515 # Zero out extended blocks as a workaround for bug 20881595.
516 if self.tgt.extended:
517 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
Tao Baob32d56e2015-09-09 11:55:01 -0700518 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700519
520 # We erase all the blocks on the partition that a) don't contain useful
521 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700522 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700523 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
524 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
525 if new_dontcare:
526 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700527
528 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700529 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700530 if self.version >= 2:
531 # version 2 only: after the total block count, we give the number
532 # of stash slots needed, and the maximum size needed (in blocks)
533 out.insert(2, str(next_stash_id) + "\n")
534 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700535
536 with open(prefix + ".transfer.list", "wb") as f:
537 for i in out:
538 f.write(i)
539
Doug Zongker62338182014-09-08 08:29:55 -0700540 if self.version >= 2:
Tao Bao8dcf7382015-05-21 14:09:49 -0700541 max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700542 OPTIONS = common.OPTIONS
543 if OPTIONS.cache_size is not None:
544 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
545 print("max stashed blocks: %d (%d bytes), "
546 "limit: %d bytes (%.2f%%)\n" % (
547 max_stashed_blocks, max_stashed_size, max_allowed,
548 max_stashed_size * 100.0 / max_allowed))
549 else:
550 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
551 max_stashed_blocks, max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700552
Tao Bao82c47982015-08-17 09:45:13 -0700553 def ReviseStashSize(self):
554 print("Revising stash size...")
555 stashes = {}
556
557 # Create the map between a stash and its def/use points. For example, for a
558 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
559 for xf in self.transfers:
560 # Command xf defines (stores) all the stashes in stash_before.
561 for idx, sr in xf.stash_before:
562 stashes[idx] = (sr, xf)
563
564 # Record all the stashes command xf uses.
565 for idx, _ in xf.use_stash:
566 stashes[idx] += (xf,)
567
568 # Compute the maximum blocks available for stash based on /cache size and
569 # the threshold.
570 cache_size = common.OPTIONS.cache_size
571 stash_threshold = common.OPTIONS.stash_threshold
572 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
573
574 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700575 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700576
577 # Now go through all the commands. Compute the required stash size on the
578 # fly. If a command requires excess stash than available, it deletes the
579 # stash by replacing the command that uses the stash with a "new" command
580 # instead.
581 for xf in self.transfers:
582 replaced_cmds = []
583
584 # xf.stash_before generates explicit stash commands.
585 for idx, sr in xf.stash_before:
586 if stashed_blocks + sr.size() > max_allowed:
587 # We cannot stash this one for a later command. Find out the command
588 # that will use this stash and replace the command with "new".
589 use_cmd = stashes[idx][2]
590 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700591 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700592 else:
593 stashed_blocks += sr.size()
594
595 # xf.use_stash generates free commands.
596 for _, sr in xf.use_stash:
597 stashed_blocks -= sr.size()
598
599 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
600 # ComputePatches(), they both have the style of "diff".
601 if xf.style == "diff" and self.version >= 3:
602 assert xf.tgt_ranges and xf.src_ranges
603 if xf.src_ranges.overlaps(xf.tgt_ranges):
604 if stashed_blocks + xf.src_ranges.size() > max_allowed:
605 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700606 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700607
608 # Replace the commands in replaced_cmds with "new"s.
609 for cmd in replaced_cmds:
610 # It no longer uses any commands in "use_stash". Remove the def points
611 # for all those stashes.
612 for idx, sr in cmd.use_stash:
613 def_cmd = stashes[idx][1]
614 assert (idx, sr) in def_cmd.stash_before
615 def_cmd.stash_before.remove((idx, sr))
616
Tianjie Xuebe39a02016-01-14 14:12:26 -0800617 # Add up blocks that violates space limit and print total number to
618 # screen later.
619 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700620 cmd.ConvertToNew()
621
Tianjie Xuebe39a02016-01-14 14:12:26 -0800622 num_of_bytes = new_blocks * self.tgt.blocksize
623 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
624 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao9a5caf22015-08-25 15:10:10 -0700625
Doug Zongkerfc44a512014-08-26 13:10:25 -0700626 def ComputePatches(self, prefix):
627 print("Reticulating splines...")
628 diff_q = []
629 patch_num = 0
630 with open(prefix + ".new.dat", "wb") as new_f:
631 for xf in self.transfers:
632 if xf.style == "zero":
633 pass
634 elif xf.style == "new":
635 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
636 new_f.write(piece)
637 elif xf.style == "diff":
638 src = self.src.ReadRangeSet(xf.src_ranges)
639 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
640
641 # We can't compare src and tgt directly because they may have
642 # the same content but be broken up into blocks differently, eg:
643 #
644 # ["he", "llo"] vs ["h", "ello"]
645 #
646 # We want those to compare equal, ideally without having to
647 # actually concatenate the strings (these may be tens of
648 # megabytes).
649
650 src_sha1 = sha1()
651 for p in src:
652 src_sha1.update(p)
653 tgt_sha1 = sha1()
654 tgt_size = 0
655 for p in tgt:
656 tgt_sha1.update(p)
657 tgt_size += len(p)
658
659 if src_sha1.digest() == tgt_sha1.digest():
660 # These are identical; we don't need to generate a patch,
661 # just issue copy commands on the device.
662 xf.style = "move"
663 else:
664 # For files in zip format (eg, APKs, JARs, etc.) we would
665 # like to use imgdiff -z if possible (because it usually
666 # produces significantly smaller patches than bsdiff).
667 # This is permissible if:
668 #
669 # - the source and target files are monotonic (ie, the
670 # data is stored with blocks in increasing order), and
671 # - we haven't removed any blocks from the source set.
672 #
673 # If these conditions are satisfied then appending all the
674 # blocks in the set together in order will produce a valid
675 # zip file (plus possibly extra zeros in the last block),
676 # which is what imgdiff needs to operate. (imgdiff is
677 # fine with extra zeros at the end of the file.)
678 imgdiff = (xf.intact and
679 xf.tgt_name.split(".")[-1].lower()
680 in ("apk", "jar", "zip"))
681 xf.style = "imgdiff" if imgdiff else "bsdiff"
682 diff_q.append((tgt_size, src, tgt, xf, patch_num))
683 patch_num += 1
684
685 else:
686 assert False, "unknown style " + xf.style
687
688 if diff_q:
689 if self.threads > 1:
690 print("Computing patches (using %d threads)..." % (self.threads,))
691 else:
692 print("Computing patches...")
693 diff_q.sort()
694
695 patches = [None] * patch_num
696
Dan Albert8b72aef2015-03-23 19:13:21 -0700697 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700698 lock = threading.Lock()
699 def diff_worker():
700 while True:
701 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700702 if not diff_q:
703 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700704 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
705 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
706 size = len(patch)
707 with lock:
708 patches[patchnum] = (patch, xf)
709 print("%10d %10d (%6.2f%%) %7s %s" % (
710 size, tgt_size, size * 100.0 / tgt_size, xf.style,
711 xf.tgt_name if xf.tgt_name == xf.src_name else (
712 xf.tgt_name + " (from " + xf.src_name + ")")))
713
714 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700715 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700716 for th in threads:
717 th.start()
718 while threads:
719 threads.pop().join()
720 else:
721 patches = []
722
723 p = 0
724 with open(prefix + ".patch.dat", "wb") as patch_f:
725 for patch, xf in patches:
726 xf.patch_start = p
727 xf.patch_len = len(patch)
728 patch_f.write(patch)
729 p += len(patch)
730
731 def AssertSequenceGood(self):
732 # Simulate the sequences of transfers we will output, and check that:
733 # - we never read a block after writing it, and
734 # - we write every block we care about exactly once.
735
736 # Start with no blocks having been touched yet.
737 touched = RangeSet()
738
739 # Imagine processing the transfers in order.
740 for xf in self.transfers:
741 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700742
743 x = xf.src_ranges
744 if self.version >= 2:
745 for _, sr in xf.use_stash:
746 x = x.subtract(sr)
747
748 assert not touched.overlaps(x)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700749 # Check that the output blocks for this transfer haven't yet been touched.
750 assert not touched.overlaps(xf.tgt_ranges)
751 # Touch all the blocks written by this transfer.
752 touched = touched.union(xf.tgt_ranges)
753
754 # Check that we've written every target block.
755 assert touched == self.tgt.care_map
756
Doug Zongker62338182014-09-08 08:29:55 -0700757 def ImproveVertexSequence(self):
758 print("Improving vertex order...")
759
760 # At this point our digraph is acyclic; we reversed any edges that
761 # were backwards in the heuristically-generated sequence. The
762 # previously-generated order is still acceptable, but we hope to
763 # find a better order that needs less memory for stashed data.
764 # Now we do a topological sort to generate a new vertex order,
765 # using a greedy algorithm to choose which vertex goes next
766 # whenever we have a choice.
767
768 # Make a copy of the edge set; this copy will get destroyed by the
769 # algorithm.
770 for xf in self.transfers:
771 xf.incoming = xf.goes_after.copy()
772 xf.outgoing = xf.goes_before.copy()
773
774 L = [] # the new vertex order
775
776 # S is the set of sources in the remaining graph; we always choose
777 # the one that leaves the least amount of stashed data after it's
778 # executed.
779 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
780 if not u.incoming]
781 heapq.heapify(S)
782
783 while S:
784 _, _, xf = heapq.heappop(S)
785 L.append(xf)
786 for u in xf.outgoing:
787 del u.incoming[xf]
788 if not u.incoming:
789 heapq.heappush(S, (u.NetStashChange(), u.order, u))
790
791 # if this fails then our graph had a cycle.
792 assert len(L) == len(self.transfers)
793
794 self.transfers = L
795 for i, xf in enumerate(L):
796 xf.order = i
797
Doug Zongkerfc44a512014-08-26 13:10:25 -0700798 def RemoveBackwardEdges(self):
799 print("Removing backward edges...")
800 in_order = 0
801 out_of_order = 0
802 lost_source = 0
803
804 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700805 lost = 0
806 size = xf.src_ranges.size()
807 for u in xf.goes_before:
808 # xf should go before u
809 if xf.order < u.order:
810 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700811 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700812 else:
813 # it doesn't, boo. trim the blocks that u writes from xf's
814 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700815 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700816 assert xf.src_ranges.overlaps(u.tgt_ranges)
817 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
818 xf.intact = False
819
820 if xf.style == "diff" and not xf.src_ranges:
821 # nothing left to diff from; treat as new data
822 xf.style = "new"
823
824 lost = size - xf.src_ranges.size()
825 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700826
827 print((" %d/%d dependencies (%.2f%%) were violated; "
828 "%d source blocks removed.") %
829 (out_of_order, in_order + out_of_order,
830 (out_of_order * 100.0 / (in_order + out_of_order))
831 if (in_order + out_of_order) else 0.0,
832 lost_source))
833
Doug Zongker62338182014-09-08 08:29:55 -0700834 def ReverseBackwardEdges(self):
835 print("Reversing backward edges...")
836 in_order = 0
837 out_of_order = 0
838 stashes = 0
839 stash_size = 0
840
841 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700842 for u in xf.goes_before.copy():
843 # xf should go before u
844 if xf.order < u.order:
845 # it does, hurray!
846 in_order += 1
847 else:
848 # it doesn't, boo. modify u to stash the blocks that it
849 # writes that xf wants to read, and then require u to go
850 # before xf.
851 out_of_order += 1
852
853 overlap = xf.src_ranges.intersect(u.tgt_ranges)
854 assert overlap
855
856 u.stash_before.append((stashes, overlap))
857 xf.use_stash.append((stashes, overlap))
858 stashes += 1
859 stash_size += overlap.size()
860
861 # reverse the edge direction; now xf must go after u
862 del xf.goes_before[u]
863 del u.goes_after[xf]
864 xf.goes_after[u] = None # value doesn't matter
865 u.goes_before[xf] = None
866
867 print((" %d/%d dependencies (%.2f%%) were violated; "
868 "%d source blocks stashed.") %
869 (out_of_order, in_order + out_of_order,
870 (out_of_order * 100.0 / (in_order + out_of_order))
871 if (in_order + out_of_order) else 0.0,
872 stash_size))
873
Doug Zongkerfc44a512014-08-26 13:10:25 -0700874 def FindVertexSequence(self):
875 print("Finding vertex sequence...")
876
877 # This is based on "A Fast & Effective Heuristic for the Feedback
878 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
879 # it as starting with the digraph G and moving all the vertices to
880 # be on a horizontal line in some order, trying to minimize the
881 # number of edges that end up pointing to the left. Left-pointing
882 # edges will get removed to turn the digraph into a DAG. In this
883 # case each edge has a weight which is the number of source blocks
884 # we'll lose if that edge is removed; we try to minimize the total
885 # weight rather than just the number of edges.
886
887 # Make a copy of the edge set; this copy will get destroyed by the
888 # algorithm.
889 for xf in self.transfers:
890 xf.incoming = xf.goes_after.copy()
891 xf.outgoing = xf.goes_before.copy()
892
893 # We use an OrderedDict instead of just a set so that the output
894 # is repeatable; otherwise it would depend on the hash values of
895 # the transfer objects.
896 G = OrderedDict()
897 for xf in self.transfers:
898 G[xf] = None
899 s1 = deque() # the left side of the sequence, built from left to right
900 s2 = deque() # the right side of the sequence, built from right to left
901
902 while G:
903
904 # Put all sinks at the end of the sequence.
905 while True:
906 sinks = [u for u in G if not u.outgoing]
Dan Albert8b72aef2015-03-23 19:13:21 -0700907 if not sinks:
908 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700909 for u in sinks:
910 s2.appendleft(u)
911 del G[u]
912 for iu in u.incoming:
913 del iu.outgoing[u]
914
915 # Put all the sources at the beginning of the sequence.
916 while True:
917 sources = [u for u in G if not u.incoming]
Dan Albert8b72aef2015-03-23 19:13:21 -0700918 if not sources:
919 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700920 for u in sources:
921 s1.append(u)
922 del G[u]
923 for iu in u.outgoing:
924 del iu.incoming[u]
925
Dan Albert8b72aef2015-03-23 19:13:21 -0700926 if not G:
927 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700928
929 # Find the "best" vertex to put next. "Best" is the one that
930 # maximizes the net difference in source blocks saved we get by
931 # pretending it's a source rather than a sink.
932
933 max_d = None
934 best_u = None
935 for u in G:
936 d = sum(u.outgoing.values()) - sum(u.incoming.values())
937 if best_u is None or d > max_d:
938 max_d = d
939 best_u = u
940
941 u = best_u
942 s1.append(u)
943 del G[u]
944 for iu in u.outgoing:
945 del iu.incoming[u]
946 for iu in u.incoming:
947 del iu.outgoing[u]
948
949 # Now record the sequence in the 'order' field of each transfer,
950 # and by rearranging self.transfers to be in the chosen sequence.
951
952 new_transfers = []
953 for x in itertools.chain(s1, s2):
954 x.order = len(new_transfers)
955 new_transfers.append(x)
956 del x.incoming
957 del x.outgoing
958
959 self.transfers = new_transfers
960
961 def GenerateDigraph(self):
962 print("Generating digraph...")
963 for a in self.transfers:
964 for b in self.transfers:
Dan Albert8b72aef2015-03-23 19:13:21 -0700965 if a is b:
966 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967
968 # If the blocks written by A are read by B, then B needs to go before A.
969 i = a.tgt_ranges.intersect(b.src_ranges)
970 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700971 if b.src_name == "__ZERO":
972 # the cost of removing source blocks for the __ZERO domain
973 # is (nearly) zero.
974 size = 0
975 else:
976 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700977 b.goes_before[a] = size
978 a.goes_after[b] = size
979
980 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -0700981 """Parse the file_map to generate all the transfers."""
982
983 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
984 split=False):
985 """Wrapper function for adding a Transfer().
986
987 For BBOTA v3, we need to stash source blocks for resumable feature.
988 However, with the growth of file size and the shrink of the cache
989 partition source blocks are too large to be stashed. If a file occupies
990 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
991 into smaller pieces by getting multiple Transfer()s.
992
Tianjie Xubb86e1d2016-01-13 16:14:10 -0800993 The downside is that after splitting, we may increase the package size
994 since the split pieces don't align well. According to our experiments,
995 1/8 of the cache size as the per-piece limit appears to be optimal.
996 Compared to the fixed 1024-block limit, it reduces the overall package
997 size by 30% volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -0700998
999 # We care about diff transfers only.
1000 if style != "diff" or not split:
1001 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1002 return
1003
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001004 pieces = 0
1005 cache_size = common.OPTIONS.cache_size
1006 split_threshold = 0.125
1007 max_blocks_per_transfer = int(cache_size * split_threshold /
1008 self.tgt.blocksize)
1009
Tao Bao9a5caf22015-08-25 15:10:10 -07001010 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001011 if (tgt_ranges.size() <= max_blocks_per_transfer and
1012 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001013 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1014 return
1015
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001016 while (tgt_ranges.size() > max_blocks_per_transfer and
1017 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001018 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1019 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001020 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1021 src_first = src_ranges.first(max_blocks_per_transfer)
1022
Tao Bao9a5caf22015-08-25 15:10:10 -07001023 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1024 by_id)
1025
1026 tgt_ranges = tgt_ranges.subtract(tgt_first)
1027 src_ranges = src_ranges.subtract(src_first)
1028 pieces += 1
1029
1030 # Handle remaining blocks.
1031 if tgt_ranges.size() or src_ranges.size():
1032 # Must be both non-empty.
1033 assert tgt_ranges.size() and src_ranges.size()
1034 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1035 src_split_name = "%s-%d" % (src_name, pieces)
1036 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1037 by_id)
1038
Doug Zongkerfc44a512014-08-26 13:10:25 -07001039 empty = RangeSet()
1040 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1041 if tgt_fn == "__ZERO":
1042 # the special "__ZERO" domain is all the blocks not contained
1043 # in any file and that are filled with zeros. We have a
1044 # special transfer style for zero blocks.
1045 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001046 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1047 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001048 continue
1049
Tao Baoff777812015-05-12 11:42:31 -07001050 elif tgt_fn == "__COPY":
1051 # "__COPY" domain includes all the blocks not contained in any
1052 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001053 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001054 continue
1055
Doug Zongkerfc44a512014-08-26 13:10:25 -07001056 elif tgt_fn in self.src.file_map:
1057 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001058 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1059 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001060 continue
1061
1062 b = os.path.basename(tgt_fn)
1063 if b in self.src_basenames:
1064 # Look for an exact basename match in the source.
1065 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001066 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1067 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001068 continue
1069
1070 b = re.sub("[0-9]+", "#", b)
1071 if b in self.src_numpatterns:
1072 # Look for a 'number pattern' match (a basename match after
1073 # all runs of digits are replaced by "#"). (This is useful
1074 # for .so files that contain version numbers in the filename
1075 # that get bumped.)
1076 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001077 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1078 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001079 continue
1080
Tao Bao9a5caf22015-08-25 15:10:10 -07001081 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001082
1083 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001084 for k in self.src.file_map.keys():
1085 b = os.path.basename(k)
1086 self.src_basenames[b] = k
1087 b = re.sub("[0-9]+", "#", b)
1088 self.src_numpatterns[b] = k
1089
1090 @staticmethod
1091 def AssertPartition(total, seq):
1092 """Assert that all the RangeSets in 'seq' form a partition of the
1093 'total' RangeSet (ie, they are nonintersecting and their union
1094 equals 'total')."""
1095 so_far = RangeSet()
1096 for i in seq:
1097 assert not so_far.overlaps(i)
1098 so_far = so_far.union(i)
1099 assert so_far == total