Blame - tools/releasetools/blockimgdiff.py - platform_build_make

blob: bec577323f1e0ee3cb2bbd4ad84111c0e7650c08 [file] [log] [blame]

Doug Zongker	424296a	2014-09-02 08:53:09 -0700	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	15	from __future__ import print_function
				16
				17	from collections import deque, OrderedDict
				18	from hashlib import sha1
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	19	import common
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	20	import heapq
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	21	import itertools
				22	import multiprocessing
				23	import os
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	24	import re
				25	import subprocess
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	26	import threading
				27	import tempfile
				28
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	29	from rangelib import RangeSet
				30
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	31
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	32	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				33
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	34
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	35	def compute_patch(src, tgt, imgdiff=False):
				36	srcfd, srcfile = tempfile.mkstemp(prefix="src-")
				37	tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
				38	patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
				39	os.close(patchfd)
				40
				41	try:
				42	with os.fdopen(srcfd, "wb") as f_src:
				43	for p in src:
				44	f_src.write(p)
				45
				46	with os.fdopen(tgtfd, "wb") as f_tgt:
				47	for p in tgt:
				48	f_tgt.write(p)
				49	try:
				50	os.unlink(patchfile)
				51	except OSError:
				52	pass
				53	if imgdiff:
				54	p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
				55	stdout=open("/dev/null", "a"),
				56	stderr=subprocess.STDOUT)
				57	else:
				58	p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
				59
				60	if p:
				61	raise ValueError("diff failed: " + str(p))
				62
				63	with open(patchfile, "rb") as f:
				64	return f.read()
				65	finally:
				66	try:
				67	os.unlink(srcfile)
				68	os.unlink(tgtfile)
				69	os.unlink(patchfile)
				70	except OSError:
				71	pass
				72
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	73
				74	class Image(object):
				75	def ReadRangeSet(self, ranges):
				76	raise NotImplementedError
				77
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	78	def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	79	raise NotImplementedError
				80
				81
				82	class EmptyImage(Image):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	83	"""A zero-length image."""
				84	blocksize = 4096
				85	care_map = RangeSet()
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	86	clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	87	extended = RangeSet()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	88	total_blocks = 0
				89	file_map = {}
				90	def ReadRangeSet(self, ranges):
				91	return ()
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	92	def TotalSha1(self, include_clobbered_blocks=False):
				93	# EmptyImage always carries empty clobbered_blocks, so
				94	# include_clobbered_blocks can be ignored.
				95	assert self.clobbered_blocks.size() == 0
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	96	return sha1().hexdigest()
				97
				98
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	99	class DataImage(Image):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	109	padded = False
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	110	if partial > 0:
				111	if trim:
				112	self.data = self.data[:-partial]
				113	elif pad:
				114	self.data += '\0' * (self.blocksize - partial)
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	115	padded = True
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	116	else:
				117	raise ValueError(("data for DataImage must be multiple of %d bytes "
				118	"unless trim or pad is specified") %
				119	(self.blocksize,))
				120
				121	assert len(self.data) % self.blocksize == 0
				122
				123	self.total_blocks = len(self.data) / self.blocksize
				124	self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	125	# When the last block is padded, we always write the whole block even for
				126	# incremental OTAs. Because otherwise the last block may get skipped if
				127	# unchanged for an incremental, but would fail the post-install
				128	# verification if it has non-zero contents in the padding bytes.
				129	# Bug: 23828506
				130	if padded:
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame^]	131	clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	132	else:
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame^]	133	clobbered_blocks = []
				134	self.clobbered_blocks = clobbered_blocks
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	135	self.extended = RangeSet()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	136
				137	zero_blocks = []
				138	nonzero_blocks = []
				139	reference = '\0' * self.blocksize
				140
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	141	for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	142	d = self.data[iself.blocksize : (i+1)self.blocksize]
				143	if d == reference:
				144	zero_blocks.append(i)
				145	zero_blocks.append(i+1)
				146	else:
				147	nonzero_blocks.append(i)
				148	nonzero_blocks.append(i+1)
				149
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame^]	150	assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	151
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame^]	152	self.file_map = dict()
				153	if zero_blocks:
				154	self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
				155	if nonzero_blocks:
				156	self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
				157	if clobbered_blocks:
				158	self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	159
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	160	def ReadRangeSet(self, ranges):
				161	return [self.data[sself.blocksize:eself.blocksize] for (s, e) in ranges]
				162
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	163	def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	164	if not include_clobbered_blocks:
				165	ranges = self.care_map.subtract(self.clobbered_blocks)
				166	return sha1(self.ReadRangeSet(ranges)).hexdigest()
				167	else:
				168	return sha1(self.data).hexdigest()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	169
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	170
				171	class Transfer(object):
				172	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
				173	self.tgt_name = tgt_name
				174	self.src_name = src_name
				175	self.tgt_ranges = tgt_ranges
				176	self.src_ranges = src_ranges
				177	self.style = style
				178	self.intact = (getattr(tgt_ranges, "monotonic", False) and
				179	getattr(src_ranges, "monotonic", False))
Tao Bao	b8c8717	2015-03-19 19:42:12 -0700	[diff] [blame]	180
				181	# We use OrderedDict rather than dict so that the output is repeatable;
				182	# otherwise it would depend on the hash values of the Transfer objects.
				183	self.goes_before = OrderedDict()
				184	self.goes_after = OrderedDict()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	185
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	186	self.stash_before = []
				187	self.use_stash = []
				188
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	189	self.id = len(by_id)
				190	by_id.append(self)
				191
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	192	def NetStashChange(self):
				193	return (sum(sr.size() for (_, sr) in self.stash_before) -
				194	sum(sr.size() for (_, sr) in self.use_stash))
				195
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	196	def ConvertToNew(self):
				197	assert self.style != "new"
				198	self.use_stash = []
				199	self.style = "new"
				200	self.src_ranges = RangeSet()
				201
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	202	def __str__(self):
				203	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				204	" to " + str(self.tgt_ranges) + ">")
				205
				206
				207	# BlockImageDiff works on two image objects. An image object is
				208	# anything that provides the following attributes:
				209	#
				210	# blocksize: the size in bytes of a block, currently must be 4096.
				211	#
				212	# total_blocks: the total size of the partition/image, in blocks.
				213	#
				214	# care_map: a RangeSet containing which blocks (in the range [0,
				215	# total_blocks) we actually care about; i.e. which blocks contain
				216	# data.
				217	#
				218	# file_map: a dict that partitions the blocks contained in care_map
				219	# into smaller domains that are useful for doing diffs on.
				220	# (Typically a domain is a file, and the key in file_map is the
				221	# pathname.)
				222	#
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	223	# clobbered_blocks: a RangeSet containing which blocks contain data
				224	# but may be altered by the FS. They need to be excluded when
				225	# verifying the partition integrity.
				226	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	227	# ReadRangeSet(): a function that takes a RangeSet and returns the
				228	# data contained in the image blocks of that RangeSet. The data
				229	# is returned as a list or tuple of strings; concatenating the
				230	# elements together should produce the requested data.
				231	# Implementations are free to break up the data into list/tuple
				232	# elements in any way that is convenient.
				233	#
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	234	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				235	# hash of all the data in the image (ie, all the blocks in the
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	236	# care_map minus clobbered_blocks, or including the clobbered
				237	# blocks if include_clobbered_blocks is True).
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	238	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	239	# When creating a BlockImageDiff, the src image may be None, in which
				240	# case the list of transfers produced will never read from the
				241	# original image.
				242
				243	class BlockImageDiff(object):
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	244	def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	245	if threads is None:
				246	threads = multiprocessing.cpu_count() // 2
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	247	if threads == 0:
				248	threads = 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	249	self.threads = threads
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	250	self.version = version
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	251	self.transfers = []
				252	self.src_basenames = {}
				253	self.src_numpatterns = {}
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	254
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	255	assert version in (1, 2, 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	256
				257	self.tgt = tgt
				258	if src is None:
				259	src = EmptyImage()
				260	self.src = src
				261
				262	# The updater code that installs the patch always uses 4k blocks.
				263	assert tgt.blocksize == 4096
				264	assert src.blocksize == 4096
				265
				266	# The range sets in each filemap should comprise a partition of
				267	# the care map.
				268	self.AssertPartition(src.care_map, src.file_map.values())
				269	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				270
				271	def Compute(self, prefix):
				272	# When looking for a source file to use as the diff input for a
				273	# target file, we try:
				274	# 1) an exact path match if available, otherwise
				275	# 2) a exact basename match if available, otherwise
				276	# 3) a basename match after all runs of digits are replaced by
				277	# "#" if available, otherwise
				278	# 4) we have no source for this target.
				279	self.AbbreviateSourceNames()
				280	self.FindTransfers()
				281
				282	# Find the ordering dependencies among transfers (this is O(n^2)
				283	# in the number of transfers).
				284	self.GenerateDigraph()
				285	# Find a sequence of transfers that satisfies as many ordering
				286	# dependencies as possible (heuristically).
				287	self.FindVertexSequence()
				288	# Fix up the ordering dependencies that the sequence didn't
				289	# satisfy.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	290	if self.version == 1:
				291	self.RemoveBackwardEdges()
				292	else:
				293	self.ReverseBackwardEdges()
				294	self.ImproveVertexSequence()
				295
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	296	# Ensure the runtime stash size is under the limit.
				297	if self.version >= 2 and common.OPTIONS.cache_size is not None:
				298	self.ReviseStashSize()
				299
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	300	# Double-check our work.
				301	self.AssertSequenceGood()
				302
				303	self.ComputePatches(prefix)
				304	self.WriteTransfers(prefix)
				305
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	306	def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	307	data = source.ReadRangeSet(ranges)
				308	ctx = sha1()
				309
				310	for p in data:
				311	ctx.update(p)
				312
				313	return ctx.hexdigest()
				314
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	315	def WriteTransfers(self, prefix):
				316	out = []
				317
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	318	total = 0
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	319
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	320	stashes = {}
				321	stashed_blocks = 0
				322	max_stashed_blocks = 0
				323
				324	free_stash_ids = []
				325	next_stash_id = 0
				326
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	327	for xf in self.transfers:
				328
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	329	if self.version < 2:
				330	assert not xf.stash_before
				331	assert not xf.use_stash
				332
				333	for s, sr in xf.stash_before:
				334	assert s not in stashes
				335	if free_stash_ids:
				336	sid = heapq.heappop(free_stash_ids)
				337	else:
				338	sid = next_stash_id
				339	next_stash_id += 1
				340	stashes[s] = sid
				341	stashed_blocks += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	342	if self.version == 2:
				343	out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
				344	else:
				345	sh = self.HashBlocks(self.src, sr)
				346	if sh in stashes:
				347	stashes[sh] += 1
				348	else:
				349	stashes[sh] = 1
				350	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	351
				352	if stashed_blocks > max_stashed_blocks:
				353	max_stashed_blocks = stashed_blocks
				354
Jesse Zhao	7b985f6	2015-03-02 16:53:08 -0800	[diff] [blame]	355	free_string = []
				356
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	357	if self.version == 1:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	358	src_str = xf.src_ranges.to_string_raw()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	359	elif self.version >= 2:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	360
				361	# <# blocks> <src ranges>
				362	# OR
				363	# <# blocks> <src ranges> <src locs> <stash refs...>
				364	# OR
				365	# <# blocks> - <stash refs...>
				366
				367	size = xf.src_ranges.size()
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	368	src_str = [str(size)]
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	369
				370	unstashed_src_ranges = xf.src_ranges
				371	mapped_stashes = []
				372	for s, sr in xf.use_stash:
				373	sid = stashes.pop(s)
				374	stashed_blocks -= sr.size()
				375	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	376	sh = self.HashBlocks(self.src, sr)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	377	sr = xf.src_ranges.map_within(sr)
				378	mapped_stashes.append(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	379	if self.version == 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	380	src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Bao	bb625d2	2015-08-13 14:44:15 -0700	[diff] [blame]	381	# A stash will be used only once. We need to free the stash
				382	# immediately after the use, instead of waiting for the automatic
				383	# clean-up at the end. Because otherwise it may take up extra space
				384	# and lead to OTA failures.
				385	# Bug: 23119955
				386	free_string.append("free %d\n" % (sid,))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	387	else:
				388	assert sh in stashes
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	389	src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	390	stashes[sh] -= 1
				391	if stashes[sh] == 0:
				392	free_string.append("free %s\n" % (sh))
				393	stashes.pop(sh)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	394	heapq.heappush(free_stash_ids, sid)
				395
				396	if unstashed_src_ranges:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	397	src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	398	if xf.use_stash:
				399	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	400	src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	401	mapped_stashes.append(mapped_unstashed)
				402	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				403	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	404	src_str.insert(1, "-")
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	405	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				406
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	407	src_str = " ".join(src_str)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	408
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	409	# all versions:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	410	# zero <rangeset>
				411	# new <rangeset>
				412	# erase <rangeset>
				413	#
				414	# version 1:
				415	# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
				416	# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
				417	# move <src rangeset> <tgt rangeset>
				418	#
				419	# version 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	420	# bsdiff patchstart patchlen <tgt rangeset> <src_str>
				421	# imgdiff patchstart patchlen <tgt rangeset> <src_str>
				422	# move <tgt rangeset> <src_str>
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	423	#
				424	# version 3:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	425	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				426	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				427	# move hash <tgt rangeset> <src_str>
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	428
				429	tgt_size = xf.tgt_ranges.size()
				430
				431	if xf.style == "new":
				432	assert xf.tgt_ranges
				433	out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
				434	total += tgt_size
				435	elif xf.style == "move":
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	436	assert xf.tgt_ranges
				437	assert xf.src_ranges.size() == tgt_size
				438	if xf.src_ranges != xf.tgt_ranges:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	439	if self.version == 1:
				440	out.append("%s %s %s\n" % (
				441	xf.style,
				442	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				443	elif self.version == 2:
				444	out.append("%s %s %s\n" % (
				445	xf.style,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	446	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	447	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	448	# take into account automatic stashing of overlapping blocks
				449	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	450	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	451	if temp_stash_usage > max_stashed_blocks:
				452	max_stashed_blocks = temp_stash_usage
				453
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	454	out.append("%s %s %s %s\n" % (
				455	xf.style,
				456	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	457	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	458	total += tgt_size
				459	elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	460	assert xf.tgt_ranges
				461	assert xf.src_ranges
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	462	if self.version == 1:
				463	out.append("%s %d %d %s %s\n" % (
				464	xf.style, xf.patch_start, xf.patch_len,
				465	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				466	elif self.version == 2:
				467	out.append("%s %d %d %s %s\n" % (
				468	xf.style, xf.patch_start, xf.patch_len,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	469	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	470	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	471	# take into account automatic stashing of overlapping blocks
				472	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	473	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	474	if temp_stash_usage > max_stashed_blocks:
				475	max_stashed_blocks = temp_stash_usage
				476
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	477	out.append("%s %d %d %s %s %s %s\n" % (
				478	xf.style,
				479	xf.patch_start, xf.patch_len,
				480	self.HashBlocks(self.src, xf.src_ranges),
				481	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	482	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	483	total += tgt_size
				484	elif xf.style == "zero":
				485	assert xf.tgt_ranges
				486	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				487	if to_zero:
				488	out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
				489	total += to_zero.size()
				490	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	491	raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	492
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	493	if free_string:
				494	out.append("".join(free_string))
				495
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	496	if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	497	# Sanity check: abort if we're going to need more stash space than
				498	# the allowed size (cache_size * threshold). There are two purposes
				499	# of having a threshold here. a) Part of the cache may have been
				500	# occupied by some recovery logs. b) It will buy us some time to deal
				501	# with the oversize issue.
				502	cache_size = common.OPTIONS.cache_size
				503	stash_threshold = common.OPTIONS.stash_threshold
				504	max_allowed = cache_size * stash_threshold
				505	assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
				506	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				507	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				508	self.tgt.blocksize, max_allowed, cache_size,
				509	stash_threshold)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	510
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	511	# Zero out extended blocks as a workaround for bug 20881595.
				512	if self.tgt.extended:
				513	out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
				514
				515	# We erase all the blocks on the partition that a) don't contain useful
				516	# data in the new image and b) will not be touched by dm-verity.
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	517	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	518	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				519	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				520	if new_dontcare:
				521	out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker	e985f6f	2014-09-09 12:38:47 -0700	[diff] [blame]	522
				523	out.insert(0, "%d\n" % (self.version,)) # format version number
				524	out.insert(1, str(total) + "\n")
				525	if self.version >= 2:
				526	# version 2 only: after the total block count, we give the number
				527	# of stash slots needed, and the maximum size needed (in blocks)
				528	out.insert(2, str(next_stash_id) + "\n")
				529	out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	530
				531	with open(prefix + ".transfer.list", "wb") as f:
				532	for i in out:
				533	f.write(i)
				534
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	535	if self.version >= 2:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	536	max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	537	OPTIONS = common.OPTIONS
				538	if OPTIONS.cache_size is not None:
				539	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				540	print("max stashed blocks: %d (%d bytes), "
				541	"limit: %d bytes (%.2f%%)\n" % (
				542	max_stashed_blocks, max_stashed_size, max_allowed,
				543	max_stashed_size * 100.0 / max_allowed))
				544	else:
				545	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
				546	max_stashed_blocks, max_stashed_size))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	547
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	548	def ReviseStashSize(self):
				549	print("Revising stash size...")
				550	stashes = {}
				551
				552	# Create the map between a stash and its def/use points. For example, for a
				553	# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
				554	for xf in self.transfers:
				555	# Command xf defines (stores) all the stashes in stash_before.
				556	for idx, sr in xf.stash_before:
				557	stashes[idx] = (sr, xf)
				558
				559	# Record all the stashes command xf uses.
				560	for idx, _ in xf.use_stash:
				561	stashes[idx] += (xf,)
				562
				563	# Compute the maximum blocks available for stash based on /cache size and
				564	# the threshold.
				565	cache_size = common.OPTIONS.cache_size
				566	stash_threshold = common.OPTIONS.stash_threshold
				567	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				568
				569	stashed_blocks = 0
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	570	new_blocks = 0
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	571
				572	# Now go through all the commands. Compute the required stash size on the
				573	# fly. If a command requires excess stash than available, it deletes the
				574	# stash by replacing the command that uses the stash with a "new" command
				575	# instead.
				576	for xf in self.transfers:
				577	replaced_cmds = []
				578
				579	# xf.stash_before generates explicit stash commands.
				580	for idx, sr in xf.stash_before:
				581	if stashed_blocks + sr.size() > max_allowed:
				582	# We cannot stash this one for a later command. Find out the command
				583	# that will use this stash and replace the command with "new".
				584	use_cmd = stashes[idx][2]
				585	replaced_cmds.append(use_cmd)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	586	print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	587	else:
				588	stashed_blocks += sr.size()
				589
				590	# xf.use_stash generates free commands.
				591	for _, sr in xf.use_stash:
				592	stashed_blocks -= sr.size()
				593
				594	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				595	# ComputePatches(), they both have the style of "diff".
				596	if xf.style == "diff" and self.version >= 3:
				597	assert xf.tgt_ranges and xf.src_ranges
				598	if xf.src_ranges.overlaps(xf.tgt_ranges):
				599	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				600	replaced_cmds.append(xf)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	601	print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	602
				603	# Replace the commands in replaced_cmds with "new"s.
				604	for cmd in replaced_cmds:
				605	# It no longer uses any commands in "use_stash". Remove the def points
				606	# for all those stashes.
				607	for idx, sr in cmd.use_stash:
				608	def_cmd = stashes[idx][1]
				609	assert (idx, sr) in def_cmd.stash_before
				610	def_cmd.stash_before.remove((idx, sr))
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	611	new_blocks += sr.size()
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	612
				613	cmd.ConvertToNew()
				614
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	615	print(" Total %d blocks are packed as new blocks due to insufficient "
				616	"cache size." % (new_blocks,))
				617
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	618	def ComputePatches(self, prefix):
				619	print("Reticulating splines...")
				620	diff_q = []
				621	patch_num = 0
				622	with open(prefix + ".new.dat", "wb") as new_f:
				623	for xf in self.transfers:
				624	if xf.style == "zero":
				625	pass
				626	elif xf.style == "new":
				627	for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
				628	new_f.write(piece)
				629	elif xf.style == "diff":
				630	src = self.src.ReadRangeSet(xf.src_ranges)
				631	tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
				632
				633	# We can't compare src and tgt directly because they may have
				634	# the same content but be broken up into blocks differently, eg:
				635	#
				636	# ["he", "llo"] vs ["h", "ello"]
				637	#
				638	# We want those to compare equal, ideally without having to
				639	# actually concatenate the strings (these may be tens of
				640	# megabytes).
				641
				642	src_sha1 = sha1()
				643	for p in src:
				644	src_sha1.update(p)
				645	tgt_sha1 = sha1()
				646	tgt_size = 0
				647	for p in tgt:
				648	tgt_sha1.update(p)
				649	tgt_size += len(p)
				650
				651	if src_sha1.digest() == tgt_sha1.digest():
				652	# These are identical; we don't need to generate a patch,
				653	# just issue copy commands on the device.
				654	xf.style = "move"
				655	else:
				656	# For files in zip format (eg, APKs, JARs, etc.) we would
				657	# like to use imgdiff -z if possible (because it usually
				658	# produces significantly smaller patches than bsdiff).
				659	# This is permissible if:
				660	#
				661	# - the source and target files are monotonic (ie, the
				662	# data is stored with blocks in increasing order), and
				663	# - we haven't removed any blocks from the source set.
				664	#
				665	# If these conditions are satisfied then appending all the
				666	# blocks in the set together in order will produce a valid
				667	# zip file (plus possibly extra zeros in the last block),
				668	# which is what imgdiff needs to operate. (imgdiff is
				669	# fine with extra zeros at the end of the file.)
				670	imgdiff = (xf.intact and
				671	xf.tgt_name.split(".")[-1].lower()
				672	in ("apk", "jar", "zip"))
				673	xf.style = "imgdiff" if imgdiff else "bsdiff"
				674	diff_q.append((tgt_size, src, tgt, xf, patch_num))
				675	patch_num += 1
				676
				677	else:
				678	assert False, "unknown style " + xf.style
				679
				680	if diff_q:
				681	if self.threads > 1:
				682	print("Computing patches (using %d threads)..." % (self.threads,))
				683	else:
				684	print("Computing patches...")
				685	diff_q.sort()
				686
				687	patches = [None] * patch_num
				688
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	689	# TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	690	lock = threading.Lock()
				691	def diff_worker():
				692	while True:
				693	with lock:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	694	if not diff_q:
				695	return
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	696	tgt_size, src, tgt, xf, patchnum = diff_q.pop()
				697	patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
				698	size = len(patch)
				699	with lock:
				700	patches[patchnum] = (patch, xf)
				701	print("%10d %10d (%6.2f%%) %7s %s" % (
				702	size, tgt_size, size * 100.0 / tgt_size, xf.style,
				703	xf.tgt_name if xf.tgt_name == xf.src_name else (
				704	xf.tgt_name + " (from " + xf.src_name + ")")))
				705
				706	threads = [threading.Thread(target=diff_worker)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	707	for _ in range(self.threads)]
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	708	for th in threads:
				709	th.start()
				710	while threads:
				711	threads.pop().join()
				712	else:
				713	patches = []
				714
				715	p = 0
				716	with open(prefix + ".patch.dat", "wb") as patch_f:
				717	for patch, xf in patches:
				718	xf.patch_start = p
				719	xf.patch_len = len(patch)
				720	patch_f.write(patch)
				721	p += len(patch)
				722
				723	def AssertSequenceGood(self):
				724	# Simulate the sequences of transfers we will output, and check that:
				725	# - we never read a block after writing it, and
				726	# - we write every block we care about exactly once.
				727
				728	# Start with no blocks having been touched yet.
				729	touched = RangeSet()
				730
				731	# Imagine processing the transfers in order.
				732	for xf in self.transfers:
				733	# Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	734
				735	x = xf.src_ranges
				736	if self.version >= 2:
				737	for _, sr in xf.use_stash:
				738	x = x.subtract(sr)
				739
				740	assert not touched.overlaps(x)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	741	# Check that the output blocks for this transfer haven't yet been touched.
				742	assert not touched.overlaps(xf.tgt_ranges)
				743	# Touch all the blocks written by this transfer.
				744	touched = touched.union(xf.tgt_ranges)
				745
				746	# Check that we've written every target block.
				747	assert touched == self.tgt.care_map
				748
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	749	def ImproveVertexSequence(self):
				750	print("Improving vertex order...")
				751
				752	# At this point our digraph is acyclic; we reversed any edges that
				753	# were backwards in the heuristically-generated sequence. The
				754	# previously-generated order is still acceptable, but we hope to
				755	# find a better order that needs less memory for stashed data.
				756	# Now we do a topological sort to generate a new vertex order,
				757	# using a greedy algorithm to choose which vertex goes next
				758	# whenever we have a choice.
				759
				760	# Make a copy of the edge set; this copy will get destroyed by the
				761	# algorithm.
				762	for xf in self.transfers:
				763	xf.incoming = xf.goes_after.copy()
				764	xf.outgoing = xf.goes_before.copy()
				765
				766	L = [] # the new vertex order
				767
				768	# S is the set of sources in the remaining graph; we always choose
				769	# the one that leaves the least amount of stashed data after it's
				770	# executed.
				771	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				772	if not u.incoming]
				773	heapq.heapify(S)
				774
				775	while S:
				776	_, _, xf = heapq.heappop(S)
				777	L.append(xf)
				778	for u in xf.outgoing:
				779	del u.incoming[xf]
				780	if not u.incoming:
				781	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				782
				783	# if this fails then our graph had a cycle.
				784	assert len(L) == len(self.transfers)
				785
				786	self.transfers = L
				787	for i, xf in enumerate(L):
				788	xf.order = i
				789
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	790	def RemoveBackwardEdges(self):
				791	print("Removing backward edges...")
				792	in_order = 0
				793	out_of_order = 0
				794	lost_source = 0
				795
				796	for xf in self.transfers:
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	797	lost = 0
				798	size = xf.src_ranges.size()
				799	for u in xf.goes_before:
				800	# xf should go before u
				801	if xf.order < u.order:
				802	# it does, hurray!
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	803	in_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	804	else:
				805	# it doesn't, boo. trim the blocks that u writes from xf's
				806	# source, so that xf can go after u.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	807	out_of_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	808	assert xf.src_ranges.overlaps(u.tgt_ranges)
				809	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				810	xf.intact = False
				811
				812	if xf.style == "diff" and not xf.src_ranges:
				813	# nothing left to diff from; treat as new data
				814	xf.style = "new"
				815
				816	lost = size - xf.src_ranges.size()
				817	lost_source += lost
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	818
				819	print((" %d/%d dependencies (%.2f%%) were violated; "
				820	"%d source blocks removed.") %
				821	(out_of_order, in_order + out_of_order,
				822	(out_of_order * 100.0 / (in_order + out_of_order))
				823	if (in_order + out_of_order) else 0.0,
				824	lost_source))
				825
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	826	def ReverseBackwardEdges(self):
				827	print("Reversing backward edges...")
				828	in_order = 0
				829	out_of_order = 0
				830	stashes = 0
				831	stash_size = 0
				832
				833	for xf in self.transfers:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	834	for u in xf.goes_before.copy():
				835	# xf should go before u
				836	if xf.order < u.order:
				837	# it does, hurray!
				838	in_order += 1
				839	else:
				840	# it doesn't, boo. modify u to stash the blocks that it
				841	# writes that xf wants to read, and then require u to go
				842	# before xf.
				843	out_of_order += 1
				844
				845	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				846	assert overlap
				847
				848	u.stash_before.append((stashes, overlap))
				849	xf.use_stash.append((stashes, overlap))
				850	stashes += 1
				851	stash_size += overlap.size()
				852
				853	# reverse the edge direction; now xf must go after u
				854	del xf.goes_before[u]
				855	del u.goes_after[xf]
				856	xf.goes_after[u] = None # value doesn't matter
				857	u.goes_before[xf] = None
				858
				859	print((" %d/%d dependencies (%.2f%%) were violated; "
				860	"%d source blocks stashed.") %
				861	(out_of_order, in_order + out_of_order,
				862	(out_of_order * 100.0 / (in_order + out_of_order))
				863	if (in_order + out_of_order) else 0.0,
				864	stash_size))
				865
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	866	def FindVertexSequence(self):
				867	print("Finding vertex sequence...")
				868
				869	# This is based on "A Fast & Effective Heuristic for the Feedback
				870	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				871	# it as starting with the digraph G and moving all the vertices to
				872	# be on a horizontal line in some order, trying to minimize the
				873	# number of edges that end up pointing to the left. Left-pointing
				874	# edges will get removed to turn the digraph into a DAG. In this
				875	# case each edge has a weight which is the number of source blocks
				876	# we'll lose if that edge is removed; we try to minimize the total
				877	# weight rather than just the number of edges.
				878
				879	# Make a copy of the edge set; this copy will get destroyed by the
				880	# algorithm.
				881	for xf in self.transfers:
				882	xf.incoming = xf.goes_after.copy()
				883	xf.outgoing = xf.goes_before.copy()
				884
				885	# We use an OrderedDict instead of just a set so that the output
				886	# is repeatable; otherwise it would depend on the hash values of
				887	# the transfer objects.
				888	G = OrderedDict()
				889	for xf in self.transfers:
				890	G[xf] = None
				891	s1 = deque() # the left side of the sequence, built from left to right
				892	s2 = deque() # the right side of the sequence, built from right to left
				893
				894	while G:
				895
				896	# Put all sinks at the end of the sequence.
				897	while True:
				898	sinks = [u for u in G if not u.outgoing]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	899	if not sinks:
				900	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	901	for u in sinks:
				902	s2.appendleft(u)
				903	del G[u]
				904	for iu in u.incoming:
				905	del iu.outgoing[u]
				906
				907	# Put all the sources at the beginning of the sequence.
				908	while True:
				909	sources = [u for u in G if not u.incoming]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	910	if not sources:
				911	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	912	for u in sources:
				913	s1.append(u)
				914	del G[u]
				915	for iu in u.outgoing:
				916	del iu.incoming[u]
				917
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	918	if not G:
				919	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	920
				921	# Find the "best" vertex to put next. "Best" is the one that
				922	# maximizes the net difference in source blocks saved we get by
				923	# pretending it's a source rather than a sink.
				924
				925	max_d = None
				926	best_u = None
				927	for u in G:
				928	d = sum(u.outgoing.values()) - sum(u.incoming.values())
				929	if best_u is None or d > max_d:
				930	max_d = d
				931	best_u = u
				932
				933	u = best_u
				934	s1.append(u)
				935	del G[u]
				936	for iu in u.outgoing:
				937	del iu.incoming[u]
				938	for iu in u.incoming:
				939	del iu.outgoing[u]
				940
				941	# Now record the sequence in the 'order' field of each transfer,
				942	# and by rearranging self.transfers to be in the chosen sequence.
				943
				944	new_transfers = []
				945	for x in itertools.chain(s1, s2):
				946	x.order = len(new_transfers)
				947	new_transfers.append(x)
				948	del x.incoming
				949	del x.outgoing
				950
				951	self.transfers = new_transfers
				952
				953	def GenerateDigraph(self):
				954	print("Generating digraph...")
				955	for a in self.transfers:
				956	for b in self.transfers:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	957	if a is b:
				958	continue
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	959
				960	# If the blocks written by A are read by B, then B needs to go before A.
				961	i = a.tgt_ranges.intersect(b.src_ranges)
				962	if i:
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	963	if b.src_name == "__ZERO":
				964	# the cost of removing source blocks for the __ZERO domain
				965	# is (nearly) zero.
				966	size = 0
				967	else:
				968	size = i.size()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	969	b.goes_before[a] = size
				970	a.goes_after[b] = size
				971
				972	def FindTransfers(self):
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	973	"""Parse the file_map to generate all the transfers."""
				974
				975	def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
				976	split=False):
				977	"""Wrapper function for adding a Transfer().
				978
				979	For BBOTA v3, we need to stash source blocks for resumable feature.
				980	However, with the growth of file size and the shrink of the cache
				981	partition source blocks are too large to be stashed. If a file occupies
				982	too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
				983	into smaller pieces by getting multiple Transfer()s.
				984
				985	The downside is that after splitting, we can no longer use imgdiff but
				986	only bsdiff."""
				987
				988	MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
				989
				990	# We care about diff transfers only.
				991	if style != "diff" or not split:
				992	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				993	return
				994
				995	# Change nothing for small files.
				996	if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
				997	src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
				998	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				999	return
				1000
				1001	pieces = 0
				1002	while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
				1003	src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
				1004	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1005	src_split_name = "%s-%d" % (src_name, pieces)
				1006	tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				1007	src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				1008	Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
				1009	by_id)
				1010
				1011	tgt_ranges = tgt_ranges.subtract(tgt_first)
				1012	src_ranges = src_ranges.subtract(src_first)
				1013	pieces += 1
				1014
				1015	# Handle remaining blocks.
				1016	if tgt_ranges.size() or src_ranges.size():
				1017	# Must be both non-empty.
				1018	assert tgt_ranges.size() and src_ranges.size()
				1019	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1020	src_split_name = "%s-%d" % (src_name, pieces)
				1021	Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
				1022	by_id)
				1023
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1024	empty = RangeSet()
				1025	for tgt_fn, tgt_ranges in self.tgt.file_map.items():
				1026	if tgt_fn == "__ZERO":
				1027	# the special "__ZERO" domain is all the blocks not contained
				1028	# in any file and that are filled with zeros. We have a
				1029	# special transfer style for zero blocks.
				1030	src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1031	AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				1032	"zero", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1033	continue
				1034
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1035	elif tgt_fn == "__COPY":
				1036	# "__COPY" domain includes all the blocks not contained in any
				1037	# file and that need to be copied unconditionally to the target.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1038	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1039	continue
				1040
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1041	elif tgt_fn in self.src.file_map:
				1042	# Look for an exact pathname match in the source.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1043	AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				1044	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1045	continue
				1046
				1047	b = os.path.basename(tgt_fn)
				1048	if b in self.src_basenames:
				1049	# Look for an exact basename match in the source.
				1050	src_fn = self.src_basenames[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1051	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1052	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1053	continue
				1054
				1055	b = re.sub("[0-9]+", "#", b)
				1056	if b in self.src_numpatterns:
				1057	# Look for a 'number pattern' match (a basename match after
				1058	# all runs of digits are replaced by "#"). (This is useful
				1059	# for .so files that contain version numbers in the filename
				1060	# that get bumped.)
				1061	src_fn = self.src_numpatterns[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1062	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1063	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1064	continue
				1065
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1066	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1067
				1068	def AbbreviateSourceNames(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1069	for k in self.src.file_map.keys():
				1070	b = os.path.basename(k)
				1071	self.src_basenames[b] = k
				1072	b = re.sub("[0-9]+", "#", b)
				1073	self.src_numpatterns[b] = k
				1074
				1075	@staticmethod
				1076	def AssertPartition(total, seq):
				1077	"""Assert that all the RangeSets in 'seq' form a partition of the
				1078	'total' RangeSet (ie, they are nonintersecting and their union
				1079	equals 'total')."""
				1080	so_far = RangeSet()
				1081	for i in seq:
				1082	assert not so_far.overlaps(i)
				1083	so_far = so_far.union(i)
				1084	assert so_far == total