blob: ded34b9795db512ce8ac903cdd9bb35c8bd76306 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import array
Tao Bao8dcf7382015-05-21 14:09:49 -070020import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080021import functools
Doug Zongker62338182014-09-08 08:29:55 -070022import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import itertools
24import multiprocessing
25import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongker6ab2a502016-02-09 08:28:09 -080029import time
Doug Zongkerfc44a512014-08-26 13:10:25 -070030import tempfile
31
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Doug Zongkerfc44a512014-08-26 13:10:25 -070038def compute_patch(src, tgt, imgdiff=False):
39 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
40 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
41 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
42 os.close(patchfd)
43
44 try:
45 with os.fdopen(srcfd, "wb") as f_src:
46 for p in src:
47 f_src.write(p)
48
49 with os.fdopen(tgtfd, "wb") as f_tgt:
50 for p in tgt:
51 f_tgt.write(p)
52 try:
53 os.unlink(patchfile)
54 except OSError:
55 pass
56 if imgdiff:
57 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
58 stdout=open("/dev/null", "a"),
59 stderr=subprocess.STDOUT)
60 else:
61 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
62
63 if p:
64 raise ValueError("diff failed: " + str(p))
65
66 with open(patchfile, "rb") as f:
67 return f.read()
68 finally:
69 try:
70 os.unlink(srcfile)
71 os.unlink(tgtfile)
72 os.unlink(patchfile)
73 except OSError:
74 pass
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class Image(object):
78 def ReadRangeSet(self, ranges):
79 raise NotImplementedError
80
Tao Bao68658c02015-06-01 13:40:49 -070081 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070082 raise NotImplementedError
83
84
85class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 """A zero-length image."""
87 blocksize = 4096
88 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070089 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070090 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 total_blocks = 0
92 file_map = {}
93 def ReadRangeSet(self, ranges):
94 return ()
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
101
Dan Albert8b72aef2015-03-23 19:13:21 -0700102class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103 """An image wrapped around a single string of data."""
104
105 def __init__(self, data, trim=False, pad=False):
106 self.data = data
107 self.blocksize = 4096
108
109 assert not (trim and pad)
110
111 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700112 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700113 if partial > 0:
114 if trim:
115 self.data = self.data[:-partial]
116 elif pad:
117 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700118 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700119 else:
120 raise ValueError(("data for DataImage must be multiple of %d bytes "
121 "unless trim or pad is specified") %
122 (self.blocksize,))
123
124 assert len(self.data) % self.blocksize == 0
125
126 self.total_blocks = len(self.data) / self.blocksize
127 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700128 # When the last block is padded, we always write the whole block even for
129 # incremental OTAs. Because otherwise the last block may get skipped if
130 # unchanged for an incremental, but would fail the post-install
131 # verification if it has non-zero contents in the padding bytes.
132 # Bug: 23828506
133 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700134 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700135 else:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = []
137 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700138 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700139
140 zero_blocks = []
141 nonzero_blocks = []
142 reference = '\0' * self.blocksize
143
Tao Bao7589e962015-09-05 20:35:32 -0700144 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700145 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
146 if d == reference:
147 zero_blocks.append(i)
148 zero_blocks.append(i+1)
149 else:
150 nonzero_blocks.append(i)
151 nonzero_blocks.append(i+1)
152
Tao Bao42206c32015-09-08 13:39:40 -0700153 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700154
Tao Bao42206c32015-09-08 13:39:40 -0700155 self.file_map = dict()
156 if zero_blocks:
157 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
158 if nonzero_blocks:
159 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
160 if clobbered_blocks:
161 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700162
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700163 def ReadRangeSet(self, ranges):
164 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
165
Tao Bao68658c02015-06-01 13:40:49 -0700166 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700167 if not include_clobbered_blocks:
168 ranges = self.care_map.subtract(self.clobbered_blocks)
169 return sha1(self.ReadRangeSet(ranges)).hexdigest()
170 else:
171 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Doug Zongkerfc44a512014-08-26 13:10:25 -0700173
174class Transfer(object):
175 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
176 self.tgt_name = tgt_name
177 self.src_name = src_name
178 self.tgt_ranges = tgt_ranges
179 self.src_ranges = src_ranges
180 self.style = style
181 self.intact = (getattr(tgt_ranges, "monotonic", False) and
182 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700183
184 # We use OrderedDict rather than dict so that the output is repeatable;
185 # otherwise it would depend on the hash values of the Transfer objects.
186 self.goes_before = OrderedDict()
187 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
Doug Zongker62338182014-09-08 08:29:55 -0700189 self.stash_before = []
190 self.use_stash = []
191
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.id = len(by_id)
193 by_id.append(self)
194
Doug Zongker62338182014-09-08 08:29:55 -0700195 def NetStashChange(self):
196 return (sum(sr.size() for (_, sr) in self.stash_before) -
197 sum(sr.size() for (_, sr) in self.use_stash))
198
Tao Bao82c47982015-08-17 09:45:13 -0700199 def ConvertToNew(self):
200 assert self.style != "new"
201 self.use_stash = []
202 self.style = "new"
203 self.src_ranges = RangeSet()
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 def __str__(self):
206 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
207 " to " + str(self.tgt_ranges) + ">")
208
209
Doug Zongker6ab2a502016-02-09 08:28:09 -0800210@functools.total_ordering
211class HeapItem(object):
212 def __init__(self, item):
213 self.item = item
214 # Negate the score since python's heap is a min-heap and we want
215 # the maximum score.
216 self.score = -item.score
217 def clear(self):
218 self.item = None
219 def __bool__(self):
220 return self.item is None
221 def __eq__(self, other):
222 return self.score == other.score
223 def __le__(self, other):
224 return self.score <= other.score
225
226
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# BlockImageDiff works on two image objects. An image object is
228# anything that provides the following attributes:
229#
230# blocksize: the size in bytes of a block, currently must be 4096.
231#
232# total_blocks: the total size of the partition/image, in blocks.
233#
234# care_map: a RangeSet containing which blocks (in the range [0,
235# total_blocks) we actually care about; i.e. which blocks contain
236# data.
237#
238# file_map: a dict that partitions the blocks contained in care_map
239# into smaller domains that are useful for doing diffs on.
240# (Typically a domain is a file, and the key in file_map is the
241# pathname.)
242#
Tao Baoff777812015-05-12 11:42:31 -0700243# clobbered_blocks: a RangeSet containing which blocks contain data
244# but may be altered by the FS. They need to be excluded when
245# verifying the partition integrity.
246#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247# ReadRangeSet(): a function that takes a RangeSet and returns the
248# data contained in the image blocks of that RangeSet. The data
249# is returned as a list or tuple of strings; concatenating the
250# elements together should produce the requested data.
251# Implementations are free to break up the data into list/tuple
252# elements in any way that is convenient.
253#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700254# TotalSha1(): a function that returns (as a hex string) the SHA-1
255# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700256# care_map minus clobbered_blocks, or including the clobbered
257# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700258#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# When creating a BlockImageDiff, the src image may be None, in which
260# case the list of transfers produced will never read from the
261# original image.
262
263class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700264 def __init__(self, tgt, src=None, threads=None, version=4,
265 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700266 if threads is None:
267 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700268 if threads == 0:
269 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700270 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700271 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700272 self.transfers = []
273 self.src_basenames = {}
274 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800275 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700276 self.touched_src_ranges = RangeSet()
277 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700278 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700279
Tao Baoeba409c2015-10-21 13:30:43 -0700280 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281
282 self.tgt = tgt
283 if src is None:
284 src = EmptyImage()
285 self.src = src
286
287 # The updater code that installs the patch always uses 4k blocks.
288 assert tgt.blocksize == 4096
289 assert src.blocksize == 4096
290
291 # The range sets in each filemap should comprise a partition of
292 # the care map.
293 self.AssertPartition(src.care_map, src.file_map.values())
294 self.AssertPartition(tgt.care_map, tgt.file_map.values())
295
Tao Baob4cfca52016-02-04 14:26:02 -0800296 @property
297 def max_stashed_size(self):
298 return self._max_stashed_size
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 def Compute(self, prefix):
301 # When looking for a source file to use as the diff input for a
302 # target file, we try:
303 # 1) an exact path match if available, otherwise
304 # 2) a exact basename match if available, otherwise
305 # 3) a basename match after all runs of digits are replaced by
306 # "#" if available, otherwise
307 # 4) we have no source for this target.
308 self.AbbreviateSourceNames()
309 self.FindTransfers()
310
311 # Find the ordering dependencies among transfers (this is O(n^2)
312 # in the number of transfers).
313 self.GenerateDigraph()
314 # Find a sequence of transfers that satisfies as many ordering
315 # dependencies as possible (heuristically).
316 self.FindVertexSequence()
317 # Fix up the ordering dependencies that the sequence didn't
318 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700319 if self.version == 1:
320 self.RemoveBackwardEdges()
321 else:
322 self.ReverseBackwardEdges()
323 self.ImproveVertexSequence()
324
Tao Bao82c47982015-08-17 09:45:13 -0700325 # Ensure the runtime stash size is under the limit.
326 if self.version >= 2 and common.OPTIONS.cache_size is not None:
327 self.ReviseStashSize()
328
Doug Zongkerfc44a512014-08-26 13:10:25 -0700329 # Double-check our work.
330 self.AssertSequenceGood()
331
332 self.ComputePatches(prefix)
333 self.WriteTransfers(prefix)
334
Dan Albert8b72aef2015-03-23 19:13:21 -0700335 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000336 data = source.ReadRangeSet(ranges)
337 ctx = sha1()
338
339 for p in data:
340 ctx.update(p)
341
342 return ctx.hexdigest()
343
Doug Zongkerfc44a512014-08-26 13:10:25 -0700344 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700345 def WriteSplitTransfers(out, style, target_blocks):
346 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700347
348 This prevents the target size of one command from being too large; and
349 might help to avoid fsync errors on some devices."""
350
Tianjie Xu37e29302016-06-23 16:10:35 -0700351 assert (style == "new" or style == "zero")
352 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700353 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700354 while target_blocks:
355 blocks_to_write = target_blocks.first(blocks_limit)
356 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
357 total += blocks_to_write.size()
358 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700359 return total
360
Doug Zongkerfc44a512014-08-26 13:10:25 -0700361 out = []
362
Doug Zongkerfc44a512014-08-26 13:10:25 -0700363 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700364
Doug Zongker62338182014-09-08 08:29:55 -0700365 stashes = {}
366 stashed_blocks = 0
367 max_stashed_blocks = 0
368
369 free_stash_ids = []
370 next_stash_id = 0
371
Doug Zongkerfc44a512014-08-26 13:10:25 -0700372 for xf in self.transfers:
373
Doug Zongker62338182014-09-08 08:29:55 -0700374 if self.version < 2:
375 assert not xf.stash_before
376 assert not xf.use_stash
377
378 for s, sr in xf.stash_before:
379 assert s not in stashes
380 if free_stash_ids:
381 sid = heapq.heappop(free_stash_ids)
382 else:
383 sid = next_stash_id
384 next_stash_id += 1
385 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000386 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800387 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000388 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
389 else:
390 sh = self.HashBlocks(self.src, sr)
391 if sh in stashes:
392 stashes[sh] += 1
393 else:
394 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800395 stashed_blocks += sr.size()
Tao Baod522bdc2016-04-12 15:53:16 -0700396 self.touched_src_ranges = self.touched_src_ranges.union(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000397 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700398
399 if stashed_blocks > max_stashed_blocks:
400 max_stashed_blocks = stashed_blocks
401
Jesse Zhao7b985f62015-03-02 16:53:08 -0800402 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800403 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800404
Doug Zongker62338182014-09-08 08:29:55 -0700405 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700406 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000407 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700408
409 # <# blocks> <src ranges>
410 # OR
411 # <# blocks> <src ranges> <src locs> <stash refs...>
412 # OR
413 # <# blocks> - <stash refs...>
414
415 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700416 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700417
418 unstashed_src_ranges = xf.src_ranges
419 mapped_stashes = []
420 for s, sr in xf.use_stash:
Tao Baoe27acfd2016-12-16 11:13:55 -0800421 # TODO: We don't need 'sid' (nor free_stash_ids) in BBOTA v3+.
Doug Zongker62338182014-09-08 08:29:55 -0700422 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700423 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000424 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700425 sr = xf.src_ranges.map_within(sr)
426 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000427 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700428 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700429 # A stash will be used only once. We need to free the stash
430 # immediately after the use, instead of waiting for the automatic
431 # clean-up at the end. Because otherwise it may take up extra space
432 # and lead to OTA failures.
433 # Bug: 23119955
434 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800435 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000436 else:
437 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700438 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000439 stashes[sh] -= 1
440 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800441 free_size += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800442 free_string.append("free %s\n" % (sh,))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000443 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700444 heapq.heappush(free_stash_ids, sid)
445
446 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700447 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700448 if xf.use_stash:
449 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700450 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700451 mapped_stashes.append(mapped_unstashed)
452 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
453 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700454 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700455 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
456
Dan Albert8b72aef2015-03-23 19:13:21 -0700457 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700458
Sami Tolvanendd67a292014-12-09 16:40:34 +0000459 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700460 # zero <rangeset>
461 # new <rangeset>
462 # erase <rangeset>
463 #
464 # version 1:
465 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
466 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
467 # move <src rangeset> <tgt rangeset>
468 #
469 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700470 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
471 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
472 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000473 #
474 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700475 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
476 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
477 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700478
479 tgt_size = xf.tgt_ranges.size()
480
481 if xf.style == "new":
482 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700483 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700484 total += tgt_size
485 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700486 assert xf.tgt_ranges
487 assert xf.src_ranges.size() == tgt_size
488 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700489 if self.version == 1:
490 out.append("%s %s %s\n" % (
491 xf.style,
492 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
493 elif self.version == 2:
494 out.append("%s %s %s\n" % (
495 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700496 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000497 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100498 # take into account automatic stashing of overlapping blocks
499 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700500 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100501 if temp_stash_usage > max_stashed_blocks:
502 max_stashed_blocks = temp_stash_usage
503
Tao Baod522bdc2016-04-12 15:53:16 -0700504 self.touched_src_ranges = self.touched_src_ranges.union(
505 xf.src_ranges)
506
Sami Tolvanendd67a292014-12-09 16:40:34 +0000507 out.append("%s %s %s %s\n" % (
508 xf.style,
509 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700510 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700511 total += tgt_size
512 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700513 assert xf.tgt_ranges
514 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700515 if self.version == 1:
516 out.append("%s %d %d %s %s\n" % (
517 xf.style, xf.patch_start, xf.patch_len,
518 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
519 elif self.version == 2:
520 out.append("%s %d %d %s %s\n" % (
521 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700522 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000523 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100524 # take into account automatic stashing of overlapping blocks
525 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700526 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100527 if temp_stash_usage > max_stashed_blocks:
528 max_stashed_blocks = temp_stash_usage
529
Tao Baod522bdc2016-04-12 15:53:16 -0700530 self.touched_src_ranges = self.touched_src_ranges.union(
531 xf.src_ranges)
532
Sami Tolvanendd67a292014-12-09 16:40:34 +0000533 out.append("%s %d %d %s %s %s %s\n" % (
534 xf.style,
535 xf.patch_start, xf.patch_len,
536 self.HashBlocks(self.src, xf.src_ranges),
537 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700538 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700539 total += tgt_size
540 elif xf.style == "zero":
541 assert xf.tgt_ranges
542 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700543 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700544 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700545 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700546 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700547
Sami Tolvanendd67a292014-12-09 16:40:34 +0000548 if free_string:
549 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800550 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000551
Tao Bao575d68a2015-08-07 19:49:45 -0700552 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700553 # Sanity check: abort if we're going to need more stash space than
554 # the allowed size (cache_size * threshold). There are two purposes
555 # of having a threshold here. a) Part of the cache may have been
556 # occupied by some recovery logs. b) It will buy us some time to deal
557 # with the oversize issue.
558 cache_size = common.OPTIONS.cache_size
559 stash_threshold = common.OPTIONS.stash_threshold
560 max_allowed = cache_size * stash_threshold
561 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
562 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
563 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
564 self.tgt.blocksize, max_allowed, cache_size,
565 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700566
Tao Baod522bdc2016-04-12 15:53:16 -0700567 if self.version >= 3:
568 self.touched_src_sha1 = self.HashBlocks(
569 self.src, self.touched_src_ranges)
570
Tao Baoe9b61912015-07-09 17:37:49 -0700571 # Zero out extended blocks as a workaround for bug 20881595.
572 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700573 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700574 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700575 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700576
577 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700578 # data in the new image; b) will not be touched by dm-verity. Out of those
579 # blocks, we erase the ones that won't be used in this update at the
580 # beginning of an update. The rest would be erased at the end. This is to
581 # work around the eMMC issue observed on some devices, which may otherwise
582 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700583 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700584 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
585 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700586
587 erase_first = new_dontcare.subtract(self.touched_src_ranges)
588 if erase_first:
589 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
590
591 erase_last = new_dontcare.subtract(erase_first)
592 if erase_last:
593 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700594
595 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700596 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700597 if self.version >= 2:
598 # version 2 only: after the total block count, we give the number
599 # of stash slots needed, and the maximum size needed (in blocks)
600 out.insert(2, str(next_stash_id) + "\n")
601 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700602
603 with open(prefix + ".transfer.list", "wb") as f:
604 for i in out:
605 f.write(i)
606
Doug Zongker62338182014-09-08 08:29:55 -0700607 if self.version >= 2:
Tao Baob4cfca52016-02-04 14:26:02 -0800608 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700609 OPTIONS = common.OPTIONS
610 if OPTIONS.cache_size is not None:
611 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
612 print("max stashed blocks: %d (%d bytes), "
613 "limit: %d bytes (%.2f%%)\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800614 max_stashed_blocks, self._max_stashed_size, max_allowed,
615 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao575d68a2015-08-07 19:49:45 -0700616 else:
617 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800618 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700619
Tao Bao82c47982015-08-17 09:45:13 -0700620 def ReviseStashSize(self):
621 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800622 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700623
624 # Create the map between a stash and its def/use points. For example, for a
625 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
626 for xf in self.transfers:
627 # Command xf defines (stores) all the stashes in stash_before.
628 for idx, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800629 stash_map[idx] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700630
631 # Record all the stashes command xf uses.
632 for idx, _ in xf.use_stash:
Tao Baoe27acfd2016-12-16 11:13:55 -0800633 stash_map[idx] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700634
635 # Compute the maximum blocks available for stash based on /cache size and
636 # the threshold.
637 cache_size = common.OPTIONS.cache_size
638 stash_threshold = common.OPTIONS.stash_threshold
639 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
640
Tao Baoe27acfd2016-12-16 11:13:55 -0800641 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700642 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700643 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700644
Tao Baoe27acfd2016-12-16 11:13:55 -0800645 free_stash_ids = []
646 next_stash_id = 0
647
Tao Bao82c47982015-08-17 09:45:13 -0700648 # Now go through all the commands. Compute the required stash size on the
649 # fly. If a command requires excess stash than available, it deletes the
650 # stash by replacing the command that uses the stash with a "new" command
651 # instead.
652 for xf in self.transfers:
653 replaced_cmds = []
654
655 # xf.stash_before generates explicit stash commands.
656 for idx, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800657 assert idx not in stashes
658 if free_stash_ids:
659 sid = heapq.heappop(free_stash_ids)
660 else:
661 sid = next_stash_id
662 next_stash_id += 1
663 stashes[idx] = sid
664
665 # Check the post-command stashed_blocks.
666 stashed_blocks_after = stashed_blocks
667 if self.version == 2:
668 stashed_blocks_after += sr.size()
669 else:
670 sh = self.HashBlocks(self.src, sr)
671 if sh in stashes:
672 stashes[sh] += 1
673 else:
674 stashes[sh] = 1
675 stashed_blocks_after += sr.size()
676
677 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700678 # We cannot stash this one for a later command. Find out the command
679 # that will use this stash and replace the command with "new".
Tao Baoe27acfd2016-12-16 11:13:55 -0800680 use_cmd = stash_map[idx][2]
Tao Bao82c47982015-08-17 09:45:13 -0700681 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700682 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700683 else:
Tao Baoe27acfd2016-12-16 11:13:55 -0800684 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700685
686 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
687 # ComputePatches(), they both have the style of "diff".
688 if xf.style == "diff" and self.version >= 3:
689 assert xf.tgt_ranges and xf.src_ranges
690 if xf.src_ranges.overlaps(xf.tgt_ranges):
691 if stashed_blocks + xf.src_ranges.size() > max_allowed:
692 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700693 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700694
695 # Replace the commands in replaced_cmds with "new"s.
696 for cmd in replaced_cmds:
697 # It no longer uses any commands in "use_stash". Remove the def points
698 # for all those stashes.
699 for idx, sr in cmd.use_stash:
Tao Baoe27acfd2016-12-16 11:13:55 -0800700 def_cmd = stash_map[idx][1]
Tao Bao82c47982015-08-17 09:45:13 -0700701 assert (idx, sr) in def_cmd.stash_before
702 def_cmd.stash_before.remove((idx, sr))
703
Tianjie Xuebe39a02016-01-14 14:12:26 -0800704 # Add up blocks that violates space limit and print total number to
705 # screen later.
706 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700707 cmd.ConvertToNew()
708
Tao Baoe27acfd2016-12-16 11:13:55 -0800709 # xf.use_stash generates free commands.
710 for idx, sr in xf.use_stash:
711 sid = stashes.pop(idx)
712 if self.version == 2:
713 stashed_blocks -= sr.size()
714 else:
715 sh = self.HashBlocks(self.src, sr)
716 assert sh in stashes
717 stashes[sh] -= 1
718 if stashes[sh] == 0:
719 stashed_blocks -= sr.size()
720 stashes.pop(sh)
721 heapq.heappush(free_stash_ids, sid)
722
Tianjie Xuebe39a02016-01-14 14:12:26 -0800723 num_of_bytes = new_blocks * self.tgt.blocksize
724 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
725 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800726 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700727
Doug Zongkerfc44a512014-08-26 13:10:25 -0700728 def ComputePatches(self, prefix):
729 print("Reticulating splines...")
730 diff_q = []
731 patch_num = 0
732 with open(prefix + ".new.dat", "wb") as new_f:
733 for xf in self.transfers:
734 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700735 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
736 print("%10d %10d (%6.2f%%) %7s %s %s" % (
737 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
738 str(xf.tgt_ranges)))
739
Doug Zongkerfc44a512014-08-26 13:10:25 -0700740 elif xf.style == "new":
741 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
742 new_f.write(piece)
Tao Bao08c85832016-09-19 22:26:30 -0700743 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
744 print("%10d %10d (%6.2f%%) %7s %s %s" % (
745 tgt_size, tgt_size, 100.0, xf.style,
746 xf.tgt_name, str(xf.tgt_ranges)))
747
Doug Zongkerfc44a512014-08-26 13:10:25 -0700748 elif xf.style == "diff":
749 src = self.src.ReadRangeSet(xf.src_ranges)
750 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
751
752 # We can't compare src and tgt directly because they may have
753 # the same content but be broken up into blocks differently, eg:
754 #
755 # ["he", "llo"] vs ["h", "ello"]
756 #
757 # We want those to compare equal, ideally without having to
758 # actually concatenate the strings (these may be tens of
759 # megabytes).
760
761 src_sha1 = sha1()
762 for p in src:
763 src_sha1.update(p)
764 tgt_sha1 = sha1()
765 tgt_size = 0
766 for p in tgt:
767 tgt_sha1.update(p)
768 tgt_size += len(p)
769
770 if src_sha1.digest() == tgt_sha1.digest():
771 # These are identical; we don't need to generate a patch,
772 # just issue copy commands on the device.
773 xf.style = "move"
Tao Bao08c85832016-09-19 22:26:30 -0700774 if xf.src_ranges != xf.tgt_ranges:
775 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
776 tgt_size, tgt_size, 100.0, xf.style,
777 xf.tgt_name if xf.tgt_name == xf.src_name else (
778 xf.tgt_name + " (from " + xf.src_name + ")"),
779 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700780 else:
781 # For files in zip format (eg, APKs, JARs, etc.) we would
782 # like to use imgdiff -z if possible (because it usually
783 # produces significantly smaller patches than bsdiff).
784 # This is permissible if:
785 #
Tao Bao293fd132016-06-11 12:19:23 -0700786 # - imgdiff is not disabled, and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700787 # - the source and target files are monotonic (ie, the
788 # data is stored with blocks in increasing order), and
789 # - we haven't removed any blocks from the source set.
790 #
791 # If these conditions are satisfied then appending all the
792 # blocks in the set together in order will produce a valid
793 # zip file (plus possibly extra zeros in the last block),
794 # which is what imgdiff needs to operate. (imgdiff is
795 # fine with extra zeros at the end of the file.)
Tao Bao293fd132016-06-11 12:19:23 -0700796 imgdiff = (not self.disable_imgdiff and xf.intact and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700797 xf.tgt_name.split(".")[-1].lower()
798 in ("apk", "jar", "zip"))
799 xf.style = "imgdiff" if imgdiff else "bsdiff"
800 diff_q.append((tgt_size, src, tgt, xf, patch_num))
801 patch_num += 1
802
803 else:
804 assert False, "unknown style " + xf.style
805
806 if diff_q:
807 if self.threads > 1:
808 print("Computing patches (using %d threads)..." % (self.threads,))
809 else:
810 print("Computing patches...")
811 diff_q.sort()
812
813 patches = [None] * patch_num
814
Dan Albert8b72aef2015-03-23 19:13:21 -0700815 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700816 lock = threading.Lock()
817 def diff_worker():
818 while True:
819 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700820 if not diff_q:
821 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700822 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
823 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
824 size = len(patch)
825 with lock:
826 patches[patchnum] = (patch, xf)
Tao Bao08c85832016-09-19 22:26:30 -0700827 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Doug Zongkerfc44a512014-08-26 13:10:25 -0700828 size, tgt_size, size * 100.0 / tgt_size, xf.style,
829 xf.tgt_name if xf.tgt_name == xf.src_name else (
Tao Bao08c85832016-09-19 22:26:30 -0700830 xf.tgt_name + " (from " + xf.src_name + ")"),
831 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700832
833 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700834 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700835 for th in threads:
836 th.start()
837 while threads:
838 threads.pop().join()
839 else:
840 patches = []
841
842 p = 0
843 with open(prefix + ".patch.dat", "wb") as patch_f:
844 for patch, xf in patches:
845 xf.patch_start = p
846 xf.patch_len = len(patch)
847 patch_f.write(patch)
848 p += len(patch)
849
850 def AssertSequenceGood(self):
851 # Simulate the sequences of transfers we will output, and check that:
852 # - we never read a block after writing it, and
853 # - we write every block we care about exactly once.
854
855 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800856 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700857
858 # Imagine processing the transfers in order.
859 for xf in self.transfers:
860 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700861
862 x = xf.src_ranges
863 if self.version >= 2:
864 for _, sr in xf.use_stash:
865 x = x.subtract(sr)
866
Doug Zongker6ab2a502016-02-09 08:28:09 -0800867 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800868 # Source image could be larger. Don't check the blocks that are in the
869 # source image only. Since they are not in 'touched', and won't ever
870 # be touched.
871 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800872 assert touched[i] == 0
873
874 # Check that the output blocks for this transfer haven't yet
875 # been touched, and touch all the blocks written by this
876 # transfer.
877 for s, e in xf.tgt_ranges:
878 for i in range(s, e):
879 assert touched[i] == 0
880 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700881
882 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800883 for s, e in self.tgt.care_map:
884 for i in range(s, e):
885 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700886
Doug Zongker62338182014-09-08 08:29:55 -0700887 def ImproveVertexSequence(self):
888 print("Improving vertex order...")
889
890 # At this point our digraph is acyclic; we reversed any edges that
891 # were backwards in the heuristically-generated sequence. The
892 # previously-generated order is still acceptable, but we hope to
893 # find a better order that needs less memory for stashed data.
894 # Now we do a topological sort to generate a new vertex order,
895 # using a greedy algorithm to choose which vertex goes next
896 # whenever we have a choice.
897
898 # Make a copy of the edge set; this copy will get destroyed by the
899 # algorithm.
900 for xf in self.transfers:
901 xf.incoming = xf.goes_after.copy()
902 xf.outgoing = xf.goes_before.copy()
903
904 L = [] # the new vertex order
905
906 # S is the set of sources in the remaining graph; we always choose
907 # the one that leaves the least amount of stashed data after it's
908 # executed.
909 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
910 if not u.incoming]
911 heapq.heapify(S)
912
913 while S:
914 _, _, xf = heapq.heappop(S)
915 L.append(xf)
916 for u in xf.outgoing:
917 del u.incoming[xf]
918 if not u.incoming:
919 heapq.heappush(S, (u.NetStashChange(), u.order, u))
920
921 # if this fails then our graph had a cycle.
922 assert len(L) == len(self.transfers)
923
924 self.transfers = L
925 for i, xf in enumerate(L):
926 xf.order = i
927
Doug Zongkerfc44a512014-08-26 13:10:25 -0700928 def RemoveBackwardEdges(self):
929 print("Removing backward edges...")
930 in_order = 0
931 out_of_order = 0
932 lost_source = 0
933
934 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700935 lost = 0
936 size = xf.src_ranges.size()
937 for u in xf.goes_before:
938 # xf should go before u
939 if xf.order < u.order:
940 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700941 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700942 else:
943 # it doesn't, boo. trim the blocks that u writes from xf's
944 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700945 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700946 assert xf.src_ranges.overlaps(u.tgt_ranges)
947 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
948 xf.intact = False
949
950 if xf.style == "diff" and not xf.src_ranges:
951 # nothing left to diff from; treat as new data
952 xf.style = "new"
953
954 lost = size - xf.src_ranges.size()
955 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700956
957 print((" %d/%d dependencies (%.2f%%) were violated; "
958 "%d source blocks removed.") %
959 (out_of_order, in_order + out_of_order,
960 (out_of_order * 100.0 / (in_order + out_of_order))
961 if (in_order + out_of_order) else 0.0,
962 lost_source))
963
Doug Zongker62338182014-09-08 08:29:55 -0700964 def ReverseBackwardEdges(self):
965 print("Reversing backward edges...")
966 in_order = 0
967 out_of_order = 0
968 stashes = 0
969 stash_size = 0
970
971 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700972 for u in xf.goes_before.copy():
973 # xf should go before u
974 if xf.order < u.order:
975 # it does, hurray!
976 in_order += 1
977 else:
978 # it doesn't, boo. modify u to stash the blocks that it
979 # writes that xf wants to read, and then require u to go
980 # before xf.
981 out_of_order += 1
982
983 overlap = xf.src_ranges.intersect(u.tgt_ranges)
984 assert overlap
985
986 u.stash_before.append((stashes, overlap))
987 xf.use_stash.append((stashes, overlap))
988 stashes += 1
989 stash_size += overlap.size()
990
991 # reverse the edge direction; now xf must go after u
992 del xf.goes_before[u]
993 del u.goes_after[xf]
994 xf.goes_after[u] = None # value doesn't matter
995 u.goes_before[xf] = None
996
997 print((" %d/%d dependencies (%.2f%%) were violated; "
998 "%d source blocks stashed.") %
999 (out_of_order, in_order + out_of_order,
1000 (out_of_order * 100.0 / (in_order + out_of_order))
1001 if (in_order + out_of_order) else 0.0,
1002 stash_size))
1003
Doug Zongkerfc44a512014-08-26 13:10:25 -07001004 def FindVertexSequence(self):
1005 print("Finding vertex sequence...")
1006
1007 # This is based on "A Fast & Effective Heuristic for the Feedback
1008 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1009 # it as starting with the digraph G and moving all the vertices to
1010 # be on a horizontal line in some order, trying to minimize the
1011 # number of edges that end up pointing to the left. Left-pointing
1012 # edges will get removed to turn the digraph into a DAG. In this
1013 # case each edge has a weight which is the number of source blocks
1014 # we'll lose if that edge is removed; we try to minimize the total
1015 # weight rather than just the number of edges.
1016
1017 # Make a copy of the edge set; this copy will get destroyed by the
1018 # algorithm.
1019 for xf in self.transfers:
1020 xf.incoming = xf.goes_after.copy()
1021 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001022 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001023
1024 # We use an OrderedDict instead of just a set so that the output
1025 # is repeatable; otherwise it would depend on the hash values of
1026 # the transfer objects.
1027 G = OrderedDict()
1028 for xf in self.transfers:
1029 G[xf] = None
1030 s1 = deque() # the left side of the sequence, built from left to right
1031 s2 = deque() # the right side of the sequence, built from right to left
1032
Doug Zongker6ab2a502016-02-09 08:28:09 -08001033 heap = []
1034 for xf in self.transfers:
1035 xf.heap_item = HeapItem(xf)
1036 heap.append(xf.heap_item)
1037 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001038
Tao Bao33482282016-10-24 16:49:08 -07001039 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1040 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1041 # { key1: None, key2: None, ... }.
1042 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1043 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001044
1045 def adjust_score(iu, delta):
1046 iu.score += delta
1047 iu.heap_item.clear()
1048 iu.heap_item = HeapItem(iu)
1049 heapq.heappush(heap, iu.heap_item)
1050
1051 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001052 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001053 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001054 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001055 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001056 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001057 s2.appendleft(u)
1058 del G[u]
1059 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001060 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001061 if not iu.outgoing:
1062 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001063 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001064
1065 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001066 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001067 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001068 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001069 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001070 s1.append(u)
1071 del G[u]
1072 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001073 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001074 if not iu.incoming:
1075 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001076 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001077
Doug Zongker6ab2a502016-02-09 08:28:09 -08001078 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001079
1080 # Find the "best" vertex to put next. "Best" is the one that
1081 # maximizes the net difference in source blocks saved we get by
1082 # pretending it's a source rather than a sink.
1083
Doug Zongker6ab2a502016-02-09 08:28:09 -08001084 while True:
1085 u = heapq.heappop(heap)
1086 if u and u.item in G:
1087 u = u.item
1088 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001089
Doug Zongkerfc44a512014-08-26 13:10:25 -07001090 s1.append(u)
1091 del G[u]
1092 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001093 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001094 if not iu.incoming:
1095 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001096
Doug Zongkerfc44a512014-08-26 13:10:25 -07001097 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001098 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001099 if not iu.outgoing:
1100 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001101
1102 # Now record the sequence in the 'order' field of each transfer,
1103 # and by rearranging self.transfers to be in the chosen sequence.
1104
1105 new_transfers = []
1106 for x in itertools.chain(s1, s2):
1107 x.order = len(new_transfers)
1108 new_transfers.append(x)
1109 del x.incoming
1110 del x.outgoing
1111
1112 self.transfers = new_transfers
1113
1114 def GenerateDigraph(self):
1115 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001116
1117 # Each item of source_ranges will be:
1118 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001119 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001120 source_ranges = []
1121 for b in self.transfers:
1122 for s, e in b.src_ranges:
1123 if e > len(source_ranges):
1124 source_ranges.extend([None] * (e-len(source_ranges)))
1125 for i in range(s, e):
1126 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001127 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001128 else:
Tao Bao33482282016-10-24 16:49:08 -07001129 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001130
Doug Zongkerfc44a512014-08-26 13:10:25 -07001131 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001132 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001133 for s, e in a.tgt_ranges:
1134 for i in range(s, e):
1135 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001136 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1137 if source_ranges[i] is not None:
1138 for j in source_ranges[i]:
1139 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001140
1141 for b in intersections:
1142 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001143
1144 # If the blocks written by A are read by B, then B needs to go before A.
1145 i = a.tgt_ranges.intersect(b.src_ranges)
1146 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001147 if b.src_name == "__ZERO":
1148 # the cost of removing source blocks for the __ZERO domain
1149 # is (nearly) zero.
1150 size = 0
1151 else:
1152 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001153 b.goes_before[a] = size
1154 a.goes_after[b] = size
1155
1156 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001157 """Parse the file_map to generate all the transfers."""
1158
Tao Bao08c85832016-09-19 22:26:30 -07001159 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1160 style, by_id):
1161 """Add one or multiple Transfer()s by splitting large files.
Tao Bao9a5caf22015-08-25 15:10:10 -07001162
1163 For BBOTA v3, we need to stash source blocks for resumable feature.
1164 However, with the growth of file size and the shrink of the cache
1165 partition source blocks are too large to be stashed. If a file occupies
Tao Bao08c85832016-09-19 22:26:30 -07001166 too many blocks, we split it into smaller pieces by getting multiple
1167 Transfer()s.
Tao Bao9a5caf22015-08-25 15:10:10 -07001168
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001169 The downside is that after splitting, we may increase the package size
1170 since the split pieces don't align well. According to our experiments,
1171 1/8 of the cache size as the per-piece limit appears to be optimal.
1172 Compared to the fixed 1024-block limit, it reduces the overall package
Tao Bao08c85832016-09-19 22:26:30 -07001173 size by 30% for volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001174
Tao Bao08c85832016-09-19 22:26:30 -07001175 # Possibly split large files into smaller chunks.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001176 pieces = 0
1177 cache_size = common.OPTIONS.cache_size
1178 split_threshold = 0.125
1179 max_blocks_per_transfer = int(cache_size * split_threshold /
1180 self.tgt.blocksize)
1181
Tao Bao9a5caf22015-08-25 15:10:10 -07001182 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001183 if (tgt_ranges.size() <= max_blocks_per_transfer and
1184 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001185 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1186 return
1187
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001188 while (tgt_ranges.size() > max_blocks_per_transfer and
1189 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001190 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1191 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001192 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1193 src_first = src_ranges.first(max_blocks_per_transfer)
1194
Tao Bao9a5caf22015-08-25 15:10:10 -07001195 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1196 by_id)
1197
1198 tgt_ranges = tgt_ranges.subtract(tgt_first)
1199 src_ranges = src_ranges.subtract(src_first)
1200 pieces += 1
1201
1202 # Handle remaining blocks.
1203 if tgt_ranges.size() or src_ranges.size():
1204 # Must be both non-empty.
1205 assert tgt_ranges.size() and src_ranges.size()
1206 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1207 src_split_name = "%s-%d" % (src_name, pieces)
1208 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1209 by_id)
1210
Tao Bao08c85832016-09-19 22:26:30 -07001211 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1212 split=False):
1213 """Wrapper function for adding a Transfer()."""
1214
1215 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1216 # otherwise add the Transfer() as is.
1217 if style != "diff" or not split:
1218 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1219 return
1220
1221 # Handle .odex files specially to analyze the block-wise difference. If
1222 # most of the blocks are identical with only few changes (e.g. header),
1223 # we will patch the changed blocks only. This avoids stashing unchanged
1224 # blocks while patching. We limit the analysis to files without size
1225 # changes only. This is to avoid sacrificing the OTA generation cost too
1226 # much.
1227 if (tgt_name.split(".")[-1].lower() == 'odex' and
1228 tgt_ranges.size() == src_ranges.size()):
1229
1230 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1231 # few blocks remain identical, we lose the opportunity to use imgdiff
1232 # that may have better compression ratio than bsdiff.
1233 crop_threshold = 0.5
1234
1235 tgt_skipped = RangeSet()
1236 src_skipped = RangeSet()
1237 tgt_size = tgt_ranges.size()
1238 tgt_changed = 0
1239 for src_block, tgt_block in zip(src_ranges.next_item(),
1240 tgt_ranges.next_item()):
1241 src_rs = RangeSet(str(src_block))
1242 tgt_rs = RangeSet(str(tgt_block))
1243 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1244 tgt_skipped = tgt_skipped.union(tgt_rs)
1245 src_skipped = src_skipped.union(src_rs)
1246 else:
1247 tgt_changed += tgt_rs.size()
1248
1249 # Terminate early if no clear sign of benefits.
1250 if tgt_changed > tgt_size * crop_threshold:
1251 break
1252
1253 if tgt_changed < tgt_size * crop_threshold:
1254 assert tgt_changed + tgt_skipped.size() == tgt_size
1255 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1256 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1257 AddSplitTransfers(
1258 "%s-skipped" % (tgt_name,),
1259 "%s-skipped" % (src_name,),
1260 tgt_skipped, src_skipped, style, by_id)
1261
1262 # Intentionally change the file extension to avoid being imgdiff'd as
1263 # the files are no longer in their original format.
1264 tgt_name = "%s-cropped" % (tgt_name,)
1265 src_name = "%s-cropped" % (src_name,)
1266 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1267 src_ranges = src_ranges.subtract(src_skipped)
1268
1269 # Possibly having no changed blocks.
1270 if not tgt_ranges:
1271 return
1272
1273 # Add the transfer(s).
1274 AddSplitTransfers(
1275 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1276
1277 print("Finding transfers...")
1278
Doug Zongkerfc44a512014-08-26 13:10:25 -07001279 empty = RangeSet()
1280 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1281 if tgt_fn == "__ZERO":
1282 # the special "__ZERO" domain is all the blocks not contained
1283 # in any file and that are filled with zeros. We have a
1284 # special transfer style for zero blocks.
1285 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001286 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1287 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001288 continue
1289
Tao Baoff777812015-05-12 11:42:31 -07001290 elif tgt_fn == "__COPY":
1291 # "__COPY" domain includes all the blocks not contained in any
1292 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001293 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001294 continue
1295
Doug Zongkerfc44a512014-08-26 13:10:25 -07001296 elif tgt_fn in self.src.file_map:
1297 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001298 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1299 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001300 continue
1301
1302 b = os.path.basename(tgt_fn)
1303 if b in self.src_basenames:
1304 # Look for an exact basename match in the source.
1305 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001306 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1307 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001308 continue
1309
1310 b = re.sub("[0-9]+", "#", b)
1311 if b in self.src_numpatterns:
1312 # Look for a 'number pattern' match (a basename match after
1313 # all runs of digits are replaced by "#"). (This is useful
1314 # for .so files that contain version numbers in the filename
1315 # that get bumped.)
1316 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001317 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1318 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001319 continue
1320
Tao Bao9a5caf22015-08-25 15:10:10 -07001321 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001322
1323 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001324 for k in self.src.file_map.keys():
1325 b = os.path.basename(k)
1326 self.src_basenames[b] = k
1327 b = re.sub("[0-9]+", "#", b)
1328 self.src_numpatterns[b] = k
1329
1330 @staticmethod
1331 def AssertPartition(total, seq):
1332 """Assert that all the RangeSets in 'seq' form a partition of the
1333 'total' RangeSet (ie, they are nonintersecting and their union
1334 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001335
Doug Zongkerfc44a512014-08-26 13:10:25 -07001336 so_far = RangeSet()
1337 for i in seq:
1338 assert not so_far.overlaps(i)
1339 so_far = so_far.union(i)
1340 assert so_far == total