Blame - tools/releasetools/blockimgdiff.py - platform_build_make

blob: bb2f16d7c4955cb1a4b2a24dca1222c868b04438 [file] [log] [blame]

Doug Zongker	424296a	2014-09-02 08:53:09 -0700	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	15	from __future__ import print_function
				16
				17	from collections import deque, OrderedDict
				18	from hashlib import sha1
Tao Bao	d47d8e1	2015-05-21 14:09:49 -0700	[diff] [blame]	19	import common
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	20	import heapq
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	21	import itertools
				22	import multiprocessing
				23	import os
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	24	import re
				25	import subprocess
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	26	import threading
				27	import tempfile
				28
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	29	from rangelib import RangeSet
				30
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	31
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	32	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				33
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	34
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	35	def compute_patch(src, tgt, imgdiff=False):
				36	srcfd, srcfile = tempfile.mkstemp(prefix="src-")
				37	tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
				38	patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
				39	os.close(patchfd)
				40
				41	try:
				42	with os.fdopen(srcfd, "wb") as f_src:
				43	for p in src:
				44	f_src.write(p)
				45
				46	with os.fdopen(tgtfd, "wb") as f_tgt:
				47	for p in tgt:
				48	f_tgt.write(p)
				49	try:
				50	os.unlink(patchfile)
				51	except OSError:
				52	pass
				53	if imgdiff:
				54	p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
				55	stdout=open("/dev/null", "a"),
				56	stderr=subprocess.STDOUT)
				57	else:
				58	p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
				59
				60	if p:
				61	raise ValueError("diff failed: " + str(p))
				62
				63	with open(patchfile, "rb") as f:
				64	return f.read()
				65	finally:
				66	try:
				67	os.unlink(srcfile)
				68	os.unlink(tgtfile)
				69	os.unlink(patchfile)
				70	except OSError:
				71	pass
				72
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	73
				74	class Image(object):
				75	def ReadRangeSet(self, ranges):
				76	raise NotImplementedError
				77
Tao Bao	5fcaaef	2015-06-01 13:40:49 -0700	[diff] [blame]	78	def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	79	raise NotImplementedError
				80
				81
				82	class EmptyImage(Image):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	83	"""A zero-length image."""
				84	blocksize = 4096
				85	care_map = RangeSet()
Tao Bao	5ece99d	2015-05-12 11:42:31 -0700	[diff] [blame]	86	clobbered_blocks = RangeSet()
Tao Bao	2fd2c9b	2015-07-09 17:37:49 -0700	[diff] [blame]	87	extended = RangeSet()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	88	total_blocks = 0
				89	file_map = {}
				90	def ReadRangeSet(self, ranges):
				91	return ()
Tao Bao	5fcaaef	2015-06-01 13:40:49 -0700	[diff] [blame]	92	def TotalSha1(self, include_clobbered_blocks=False):
				93	# EmptyImage always carries empty clobbered_blocks, so
				94	# include_clobbered_blocks can be ignored.
				95	assert self.clobbered_blocks.size() == 0
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	96	return sha1().hexdigest()
				97
				98
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	99	class DataImage(Image):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
Tao Bao	28f6f9c	2015-09-05 20:35:32 -0700	[diff] [blame]	109	padded = False
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	110	if partial > 0:
				111	if trim:
				112	self.data = self.data[:-partial]
				113	elif pad:
				114	self.data += '\0' * (self.blocksize - partial)
Tao Bao	28f6f9c	2015-09-05 20:35:32 -0700	[diff] [blame]	115	padded = True
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	116	else:
				117	raise ValueError(("data for DataImage must be multiple of %d bytes "
				118	"unless trim or pad is specified") %
				119	(self.blocksize,))
				120
				121	assert len(self.data) % self.blocksize == 0
				122
				123	self.total_blocks = len(self.data) / self.blocksize
				124	self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao	28f6f9c	2015-09-05 20:35:32 -0700	[diff] [blame]	125	# When the last block is padded, we always write the whole block even for
				126	# incremental OTAs. Because otherwise the last block may get skipped if
				127	# unchanged for an incremental, but would fail the post-install
				128	# verification if it has non-zero contents in the padding bytes.
				129	# Bug: 23828506
				130	if padded:
				131	self.clobbered_blocks = RangeSet(
				132	data=(self.total_blocks-1, self.total_blocks))
				133	else:
				134	self.clobbered_blocks = RangeSet()
Tao Bao	2fd2c9b	2015-07-09 17:37:49 -0700	[diff] [blame]	135	self.extended = RangeSet()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	136
				137	zero_blocks = []
				138	nonzero_blocks = []
				139	reference = '\0' * self.blocksize
				140
Tao Bao	28f6f9c	2015-09-05 20:35:32 -0700	[diff] [blame]	141	for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	142	d = self.data[iself.blocksize : (i+1)self.blocksize]
				143	if d == reference:
				144	zero_blocks.append(i)
				145	zero_blocks.append(i+1)
				146	else:
				147	nonzero_blocks.append(i)
				148	nonzero_blocks.append(i+1)
				149
				150	self.file_map = {"__ZERO": RangeSet(zero_blocks),
				151	"__NONZERO": RangeSet(nonzero_blocks)}
				152
Tao Bao	28f6f9c	2015-09-05 20:35:32 -0700	[diff] [blame]	153	if self.clobbered_blocks:
				154	self.file_map["__COPY"] = self.clobbered_blocks
				155
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	156	def ReadRangeSet(self, ranges):
				157	return [self.data[sself.blocksize:eself.blocksize] for (s, e) in ranges]
				158
Tao Bao	5fcaaef	2015-06-01 13:40:49 -0700	[diff] [blame]	159	def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao	28f6f9c	2015-09-05 20:35:32 -0700	[diff] [blame]	160	if not include_clobbered_blocks:
				161	ranges = self.care_map.subtract(self.clobbered_blocks)
				162	return sha1(self.ReadRangeSet(ranges)).hexdigest()
				163	else:
				164	return sha1(self.data).hexdigest()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	165
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	166
				167	class Transfer(object):
				168	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
				169	self.tgt_name = tgt_name
				170	self.src_name = src_name
				171	self.tgt_ranges = tgt_ranges
				172	self.src_ranges = src_ranges
				173	self.style = style
				174	self.intact = (getattr(tgt_ranges, "monotonic", False) and
				175	getattr(src_ranges, "monotonic", False))
Tao Bao	b8c8717	2015-03-19 19:42:12 -0700	[diff] [blame]	176
				177	# We use OrderedDict rather than dict so that the output is repeatable;
				178	# otherwise it would depend on the hash values of the Transfer objects.
				179	self.goes_before = OrderedDict()
				180	self.goes_after = OrderedDict()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	181
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	182	self.stash_before = []
				183	self.use_stash = []
				184
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	185	self.id = len(by_id)
				186	by_id.append(self)
				187
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	188	def NetStashChange(self):
				189	return (sum(sr.size() for (_, sr) in self.stash_before) -
				190	sum(sr.size() for (_, sr) in self.use_stash))
				191
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	192	def ConvertToNew(self):
				193	assert self.style != "new"
				194	self.use_stash = []
				195	self.style = "new"
				196	self.src_ranges = RangeSet()
				197
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	198	def __str__(self):
				199	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				200	" to " + str(self.tgt_ranges) + ">")
				201
				202
				203	# BlockImageDiff works on two image objects. An image object is
				204	# anything that provides the following attributes:
				205	#
				206	# blocksize: the size in bytes of a block, currently must be 4096.
				207	#
				208	# total_blocks: the total size of the partition/image, in blocks.
				209	#
				210	# care_map: a RangeSet containing which blocks (in the range [0,
				211	# total_blocks) we actually care about; i.e. which blocks contain
				212	# data.
				213	#
				214	# file_map: a dict that partitions the blocks contained in care_map
				215	# into smaller domains that are useful for doing diffs on.
				216	# (Typically a domain is a file, and the key in file_map is the
				217	# pathname.)
				218	#
Tao Bao	5ece99d	2015-05-12 11:42:31 -0700	[diff] [blame]	219	# clobbered_blocks: a RangeSet containing which blocks contain data
				220	# but may be altered by the FS. They need to be excluded when
				221	# verifying the partition integrity.
				222	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	223	# ReadRangeSet(): a function that takes a RangeSet and returns the
				224	# data contained in the image blocks of that RangeSet. The data
				225	# is returned as a list or tuple of strings; concatenating the
				226	# elements together should produce the requested data.
				227	# Implementations are free to break up the data into list/tuple
				228	# elements in any way that is convenient.
				229	#
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	230	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				231	# hash of all the data in the image (ie, all the blocks in the
Tao Bao	5fcaaef	2015-06-01 13:40:49 -0700	[diff] [blame]	232	# care_map minus clobbered_blocks, or including the clobbered
				233	# blocks if include_clobbered_blocks is True).
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	234	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	235	# When creating a BlockImageDiff, the src image may be None, in which
				236	# case the list of transfers produced will never read from the
				237	# original image.
				238
				239	class BlockImageDiff(object):
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	240	def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	241	if threads is None:
				242	threads = multiprocessing.cpu_count() // 2
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	243	if threads == 0:
				244	threads = 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	245	self.threads = threads
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	246	self.version = version
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	247	self.transfers = []
				248	self.src_basenames = {}
				249	self.src_numpatterns = {}
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	250
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	251	assert version in (1, 2, 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	252
				253	self.tgt = tgt
				254	if src is None:
				255	src = EmptyImage()
				256	self.src = src
				257
				258	# The updater code that installs the patch always uses 4k blocks.
				259	assert tgt.blocksize == 4096
				260	assert src.blocksize == 4096
				261
				262	# The range sets in each filemap should comprise a partition of
				263	# the care map.
				264	self.AssertPartition(src.care_map, src.file_map.values())
				265	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				266
				267	def Compute(self, prefix):
				268	# When looking for a source file to use as the diff input for a
				269	# target file, we try:
				270	# 1) an exact path match if available, otherwise
				271	# 2) a exact basename match if available, otherwise
				272	# 3) a basename match after all runs of digits are replaced by
				273	# "#" if available, otherwise
				274	# 4) we have no source for this target.
				275	self.AbbreviateSourceNames()
				276	self.FindTransfers()
				277
				278	# Find the ordering dependencies among transfers (this is O(n^2)
				279	# in the number of transfers).
				280	self.GenerateDigraph()
				281	# Find a sequence of transfers that satisfies as many ordering
				282	# dependencies as possible (heuristically).
				283	self.FindVertexSequence()
				284	# Fix up the ordering dependencies that the sequence didn't
				285	# satisfy.
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	286	if self.version == 1:
				287	self.RemoveBackwardEdges()
				288	else:
				289	self.ReverseBackwardEdges()
				290	self.ImproveVertexSequence()
				291
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	292	# Ensure the runtime stash size is under the limit.
				293	if self.version >= 2 and common.OPTIONS.cache_size is not None:
				294	self.ReviseStashSize()
				295
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	296	# Double-check our work.
				297	self.AssertSequenceGood()
				298
				299	self.ComputePatches(prefix)
				300	self.WriteTransfers(prefix)
				301
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	302	def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	303	data = source.ReadRangeSet(ranges)
				304	ctx = sha1()
				305
				306	for p in data:
				307	ctx.update(p)
				308
				309	return ctx.hexdigest()
				310
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	311	def WriteTransfers(self, prefix):
				312	out = []
				313
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	314	total = 0
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	315
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	316	stashes = {}
				317	stashed_blocks = 0
				318	max_stashed_blocks = 0
				319
				320	free_stash_ids = []
				321	next_stash_id = 0
				322
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	323	for xf in self.transfers:
				324
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	325	if self.version < 2:
				326	assert not xf.stash_before
				327	assert not xf.use_stash
				328
				329	for s, sr in xf.stash_before:
				330	assert s not in stashes
				331	if free_stash_ids:
				332	sid = heapq.heappop(free_stash_ids)
				333	else:
				334	sid = next_stash_id
				335	next_stash_id += 1
				336	stashes[s] = sid
				337	stashed_blocks += sr.size()
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	338	if self.version == 2:
				339	out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
				340	else:
				341	sh = self.HashBlocks(self.src, sr)
				342	if sh in stashes:
				343	stashes[sh] += 1
				344	else:
				345	stashes[sh] = 1
				346	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	347
				348	if stashed_blocks > max_stashed_blocks:
				349	max_stashed_blocks = stashed_blocks
				350
Jesse Zhao	7ca20d1	2015-03-02 16:53:08 -0800	[diff] [blame]	351	free_string = []
				352
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	353	if self.version == 1:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	354	src_str = xf.src_ranges.to_string_raw()
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	355	elif self.version >= 2:
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	356
				357	# <# blocks> <src ranges>
				358	# OR
				359	# <# blocks> <src ranges> <src locs> <stash refs...>
				360	# OR
				361	# <# blocks> - <stash refs...>
				362
				363	size = xf.src_ranges.size()
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	364	src_str = [str(size)]
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	365
				366	unstashed_src_ranges = xf.src_ranges
				367	mapped_stashes = []
				368	for s, sr in xf.use_stash:
				369	sid = stashes.pop(s)
				370	stashed_blocks -= sr.size()
				371	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	372	sh = self.HashBlocks(self.src, sr)
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	373	sr = xf.src_ranges.map_within(sr)
				374	mapped_stashes.append(sr)
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	375	if self.version == 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	376	src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	377	else:
				378	assert sh in stashes
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	379	src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	380	stashes[sh] -= 1
				381	if stashes[sh] == 0:
Anthony King	c713d76	2015-11-03 00:23:11 +0000	[diff] [blame^]	382	free_string.append("free %s\n" % sh)
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	383	stashes.pop(sh)
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	384	heapq.heappush(free_stash_ids, sid)
				385
				386	if unstashed_src_ranges:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	387	src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	388	if xf.use_stash:
				389	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	390	src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	391	mapped_stashes.append(mapped_unstashed)
				392	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				393	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	394	src_str.insert(1, "-")
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	395	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				396
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	397	src_str = " ".join(src_str)
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	398
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	399	# all versions:
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	400	# zero <rangeset>
				401	# new <rangeset>
				402	# erase <rangeset>
				403	#
				404	# version 1:
				405	# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
				406	# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
				407	# move <src rangeset> <tgt rangeset>
				408	#
				409	# version 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	410	# bsdiff patchstart patchlen <tgt rangeset> <src_str>
				411	# imgdiff patchstart patchlen <tgt rangeset> <src_str>
				412	# move <tgt rangeset> <src_str>
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	413	#
				414	# version 3:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	415	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				416	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				417	# move hash <tgt rangeset> <src_str>
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	418
				419	tgt_size = xf.tgt_ranges.size()
				420
				421	if xf.style == "new":
				422	assert xf.tgt_ranges
				423	out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
				424	total += tgt_size
				425	elif xf.style == "move":
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	426	assert xf.tgt_ranges
				427	assert xf.src_ranges.size() == tgt_size
				428	if xf.src_ranges != xf.tgt_ranges:
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	429	if self.version == 1:
				430	out.append("%s %s %s\n" % (
				431	xf.style,
				432	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				433	elif self.version == 2:
				434	out.append("%s %s %s\n" % (
				435	xf.style,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	436	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	437	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	438	# take into account automatic stashing of overlapping blocks
				439	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	2fd2c9b	2015-07-09 17:37:49 -0700	[diff] [blame]	440	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	441	if temp_stash_usage > max_stashed_blocks:
				442	max_stashed_blocks = temp_stash_usage
				443
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	444	out.append("%s %s %s %s\n" % (
				445	xf.style,
				446	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	447	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	448	total += tgt_size
				449	elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	450	assert xf.tgt_ranges
				451	assert xf.src_ranges
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	452	if self.version == 1:
				453	out.append("%s %d %d %s %s\n" % (
				454	xf.style, xf.patch_start, xf.patch_len,
				455	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				456	elif self.version == 2:
				457	out.append("%s %d %d %s %s\n" % (
				458	xf.style, xf.patch_start, xf.patch_len,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	459	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	460	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	461	# take into account automatic stashing of overlapping blocks
				462	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	2fd2c9b	2015-07-09 17:37:49 -0700	[diff] [blame]	463	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	464	if temp_stash_usage > max_stashed_blocks:
				465	max_stashed_blocks = temp_stash_usage
				466
Sami Tolvanen	cac671a	2014-12-09 16:40:34 +0000	[diff] [blame]	467	out.append("%s %d %d %s %s %s %s\n" % (
				468	xf.style,
				469	xf.patch_start, xf.patch_len,
				470	self.HashBlocks(self.src, xf.src_ranges),
				471	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	472	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	473	total += tgt_size
				474	elif xf.style == "zero":
				475	assert xf.tgt_ranges
				476	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				477	if to_zero:
				478	out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
				479	total += to_zero.size()
				480	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	481	raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	482
Dan Albert	ee8323b	2015-03-27 16:49:01 -0700	[diff] [blame]	483	if free_string:
				484	out.append("".join(free_string))
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	485
Tao Bao	e964714	2015-08-07 19:49:45 -0700	[diff] [blame]	486	if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao	d47d8e1	2015-05-21 14:09:49 -0700	[diff] [blame]	487	# Sanity check: abort if we're going to need more stash space than
				488	# the allowed size (cache_size * threshold). There are two purposes
				489	# of having a threshold here. a) Part of the cache may have been
				490	# occupied by some recovery logs. b) It will buy us some time to deal
				491	# with the oversize issue.
				492	cache_size = common.OPTIONS.cache_size
				493	stash_threshold = common.OPTIONS.stash_threshold
				494	max_allowed = cache_size * stash_threshold
				495	assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
				496	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				497	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				498	self.tgt.blocksize, max_allowed, cache_size,
				499	stash_threshold)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	500
Tao Bao	2fd2c9b	2015-07-09 17:37:49 -0700	[diff] [blame]	501	# Zero out extended blocks as a workaround for bug 20881595.
				502	if self.tgt.extended:
				503	out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
				504
				505	# We erase all the blocks on the partition that a) don't contain useful
				506	# data in the new image and b) will not be touched by dm-verity.
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	507	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao	2fd2c9b	2015-07-09 17:37:49 -0700	[diff] [blame]	508	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				509	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				510	if new_dontcare:
				511	out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker	7b0ddf5	2014-09-09 12:38:47 -0700	[diff] [blame]	512
				513	out.insert(0, "%d\n" % (self.version,)) # format version number
				514	out.insert(1, str(total) + "\n")
				515	if self.version >= 2:
				516	# version 2 only: after the total block count, we give the number
				517	# of stash slots needed, and the maximum size needed (in blocks)
				518	out.insert(2, str(next_stash_id) + "\n")
				519	out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	520
				521	with open(prefix + ".transfer.list", "wb") as f:
				522	for i in out:
				523	f.write(i)
				524
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	525	if self.version >= 2:
Tao Bao	d47d8e1	2015-05-21 14:09:49 -0700	[diff] [blame]	526	max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao	e964714	2015-08-07 19:49:45 -0700	[diff] [blame]	527	OPTIONS = common.OPTIONS
				528	if OPTIONS.cache_size is not None:
				529	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				530	print("max stashed blocks: %d (%d bytes), "
				531	"limit: %d bytes (%.2f%%)\n" % (
				532	max_stashed_blocks, max_stashed_size, max_allowed,
				533	max_stashed_size * 100.0 / max_allowed))
				534	else:
				535	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
				536	max_stashed_blocks, max_stashed_size))
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	537
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	538	def ReviseStashSize(self):
				539	print("Revising stash size...")
				540	stashes = {}
				541
				542	# Create the map between a stash and its def/use points. For example, for a
				543	# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
				544	for xf in self.transfers:
				545	# Command xf defines (stores) all the stashes in stash_before.
				546	for idx, sr in xf.stash_before:
				547	stashes[idx] = (sr, xf)
				548
				549	# Record all the stashes command xf uses.
				550	for idx, _ in xf.use_stash:
				551	stashes[idx] += (xf,)
				552
				553	# Compute the maximum blocks available for stash based on /cache size and
				554	# the threshold.
				555	cache_size = common.OPTIONS.cache_size
				556	stash_threshold = common.OPTIONS.stash_threshold
				557	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				558
				559	stashed_blocks = 0
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	560	new_blocks = 0
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	561
				562	# Now go through all the commands. Compute the required stash size on the
				563	# fly. If a command requires excess stash than available, it deletes the
				564	# stash by replacing the command that uses the stash with a "new" command
				565	# instead.
				566	for xf in self.transfers:
				567	replaced_cmds = []
				568
				569	# xf.stash_before generates explicit stash commands.
				570	for idx, sr in xf.stash_before:
				571	if stashed_blocks + sr.size() > max_allowed:
				572	# We cannot stash this one for a later command. Find out the command
				573	# that will use this stash and replace the command with "new".
				574	use_cmd = stashes[idx][2]
				575	replaced_cmds.append(use_cmd)
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	576	print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	577	else:
				578	stashed_blocks += sr.size()
				579
				580	# xf.use_stash generates free commands.
				581	for _, sr in xf.use_stash:
				582	stashed_blocks -= sr.size()
				583
				584	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				585	# ComputePatches(), they both have the style of "diff".
				586	if xf.style == "diff" and self.version >= 3:
				587	assert xf.tgt_ranges and xf.src_ranges
				588	if xf.src_ranges.overlaps(xf.tgt_ranges):
				589	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				590	replaced_cmds.append(xf)
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	591	print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	592
				593	# Replace the commands in replaced_cmds with "new"s.
				594	for cmd in replaced_cmds:
				595	# It no longer uses any commands in "use_stash". Remove the def points
				596	# for all those stashes.
				597	for idx, sr in cmd.use_stash:
				598	def_cmd = stashes[idx][1]
				599	assert (idx, sr) in def_cmd.stash_before
				600	def_cmd.stash_before.remove((idx, sr))
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	601	new_blocks += sr.size()
Tao Bao	1fc6763	2015-08-17 09:45:13 -0700	[diff] [blame]	602
				603	cmd.ConvertToNew()
				604
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	605	print(" Total %d blocks are packed as new blocks due to insufficient "
				606	"cache size." % (new_blocks,))
				607
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	608	def ComputePatches(self, prefix):
				609	print("Reticulating splines...")
				610	diff_q = []
				611	patch_num = 0
				612	with open(prefix + ".new.dat", "wb") as new_f:
				613	for xf in self.transfers:
				614	if xf.style == "zero":
				615	pass
				616	elif xf.style == "new":
				617	for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
				618	new_f.write(piece)
				619	elif xf.style == "diff":
				620	src = self.src.ReadRangeSet(xf.src_ranges)
				621	tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
				622
				623	# We can't compare src and tgt directly because they may have
				624	# the same content but be broken up into blocks differently, eg:
				625	#
				626	# ["he", "llo"] vs ["h", "ello"]
				627	#
				628	# We want those to compare equal, ideally without having to
				629	# actually concatenate the strings (these may be tens of
				630	# megabytes).
				631
				632	src_sha1 = sha1()
				633	for p in src:
				634	src_sha1.update(p)
				635	tgt_sha1 = sha1()
				636	tgt_size = 0
				637	for p in tgt:
				638	tgt_sha1.update(p)
				639	tgt_size += len(p)
				640
				641	if src_sha1.digest() == tgt_sha1.digest():
				642	# These are identical; we don't need to generate a patch,
				643	# just issue copy commands on the device.
				644	xf.style = "move"
				645	else:
				646	# For files in zip format (eg, APKs, JARs, etc.) we would
				647	# like to use imgdiff -z if possible (because it usually
				648	# produces significantly smaller patches than bsdiff).
				649	# This is permissible if:
				650	#
				651	# - the source and target files are monotonic (ie, the
				652	# data is stored with blocks in increasing order), and
				653	# - we haven't removed any blocks from the source set.
				654	#
				655	# If these conditions are satisfied then appending all the
				656	# blocks in the set together in order will produce a valid
				657	# zip file (plus possibly extra zeros in the last block),
				658	# which is what imgdiff needs to operate. (imgdiff is
				659	# fine with extra zeros at the end of the file.)
				660	imgdiff = (xf.intact and
				661	xf.tgt_name.split(".")[-1].lower()
				662	in ("apk", "jar", "zip"))
				663	xf.style = "imgdiff" if imgdiff else "bsdiff"
				664	diff_q.append((tgt_size, src, tgt, xf, patch_num))
				665	patch_num += 1
				666
				667	else:
				668	assert False, "unknown style " + xf.style
				669
				670	if diff_q:
				671	if self.threads > 1:
				672	print("Computing patches (using %d threads)..." % (self.threads,))
				673	else:
				674	print("Computing patches...")
				675	diff_q.sort()
				676
				677	patches = [None] * patch_num
				678
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	679	# TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	680	lock = threading.Lock()
				681	def diff_worker():
				682	while True:
				683	with lock:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	684	if not diff_q:
				685	return
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	686	tgt_size, src, tgt, xf, patchnum = diff_q.pop()
				687	patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
				688	size = len(patch)
				689	with lock:
				690	patches[patchnum] = (patch, xf)
				691	print("%10d %10d (%6.2f%%) %7s %s" % (
				692	size, tgt_size, size * 100.0 / tgt_size, xf.style,
				693	xf.tgt_name if xf.tgt_name == xf.src_name else (
				694	xf.tgt_name + " (from " + xf.src_name + ")")))
				695
				696	threads = [threading.Thread(target=diff_worker)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	697	for _ in range(self.threads)]
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	698	for th in threads:
				699	th.start()
				700	while threads:
				701	threads.pop().join()
				702	else:
				703	patches = []
				704
				705	p = 0
				706	with open(prefix + ".patch.dat", "wb") as patch_f:
				707	for patch, xf in patches:
				708	xf.patch_start = p
				709	xf.patch_len = len(patch)
				710	patch_f.write(patch)
				711	p += len(patch)
				712
				713	def AssertSequenceGood(self):
				714	# Simulate the sequences of transfers we will output, and check that:
				715	# - we never read a block after writing it, and
				716	# - we write every block we care about exactly once.
				717
				718	# Start with no blocks having been touched yet.
				719	touched = RangeSet()
				720
				721	# Imagine processing the transfers in order.
				722	for xf in self.transfers:
				723	# Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	724
				725	x = xf.src_ranges
				726	if self.version >= 2:
				727	for _, sr in xf.use_stash:
				728	x = x.subtract(sr)
				729
				730	assert not touched.overlaps(x)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	731	# Check that the output blocks for this transfer haven't yet been touched.
				732	assert not touched.overlaps(xf.tgt_ranges)
				733	# Touch all the blocks written by this transfer.
				734	touched = touched.union(xf.tgt_ranges)
				735
				736	# Check that we've written every target block.
				737	assert touched == self.tgt.care_map
				738
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	739	def ImproveVertexSequence(self):
				740	print("Improving vertex order...")
				741
				742	# At this point our digraph is acyclic; we reversed any edges that
				743	# were backwards in the heuristically-generated sequence. The
				744	# previously-generated order is still acceptable, but we hope to
				745	# find a better order that needs less memory for stashed data.
				746	# Now we do a topological sort to generate a new vertex order,
				747	# using a greedy algorithm to choose which vertex goes next
				748	# whenever we have a choice.
				749
				750	# Make a copy of the edge set; this copy will get destroyed by the
				751	# algorithm.
				752	for xf in self.transfers:
				753	xf.incoming = xf.goes_after.copy()
				754	xf.outgoing = xf.goes_before.copy()
				755
				756	L = [] # the new vertex order
				757
				758	# S is the set of sources in the remaining graph; we always choose
				759	# the one that leaves the least amount of stashed data after it's
				760	# executed.
				761	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				762	if not u.incoming]
				763	heapq.heapify(S)
				764
				765	while S:
				766	_, _, xf = heapq.heappop(S)
				767	L.append(xf)
				768	for u in xf.outgoing:
				769	del u.incoming[xf]
				770	if not u.incoming:
				771	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				772
				773	# if this fails then our graph had a cycle.
				774	assert len(L) == len(self.transfers)
				775
				776	self.transfers = L
				777	for i, xf in enumerate(L):
				778	xf.order = i
				779
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	780	def RemoveBackwardEdges(self):
				781	print("Removing backward edges...")
				782	in_order = 0
				783	out_of_order = 0
				784	lost_source = 0
				785
				786	for xf in self.transfers:
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	787	lost = 0
				788	size = xf.src_ranges.size()
				789	for u in xf.goes_before:
				790	# xf should go before u
				791	if xf.order < u.order:
				792	# it does, hurray!
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	793	in_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	794	else:
				795	# it doesn't, boo. trim the blocks that u writes from xf's
				796	# source, so that xf can go after u.
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	797	out_of_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	798	assert xf.src_ranges.overlaps(u.tgt_ranges)
				799	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				800	xf.intact = False
				801
				802	if xf.style == "diff" and not xf.src_ranges:
				803	# nothing left to diff from; treat as new data
				804	xf.style = "new"
				805
				806	lost = size - xf.src_ranges.size()
				807	lost_source += lost
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	808
				809	print((" %d/%d dependencies (%.2f%%) were violated; "
				810	"%d source blocks removed.") %
				811	(out_of_order, in_order + out_of_order,
				812	(out_of_order * 100.0 / (in_order + out_of_order))
				813	if (in_order + out_of_order) else 0.0,
				814	lost_source))
				815
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	816	def ReverseBackwardEdges(self):
				817	print("Reversing backward edges...")
				818	in_order = 0
				819	out_of_order = 0
				820	stashes = 0
				821	stash_size = 0
				822
				823	for xf in self.transfers:
Doug Zongker	cf4fda7	2014-09-08 08:29:55 -0700	[diff] [blame]	824	for u in xf.goes_before.copy():
				825	# xf should go before u
				826	if xf.order < u.order:
				827	# it does, hurray!
				828	in_order += 1
				829	else:
				830	# it doesn't, boo. modify u to stash the blocks that it
				831	# writes that xf wants to read, and then require u to go
				832	# before xf.
				833	out_of_order += 1
				834
				835	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				836	assert overlap
				837
				838	u.stash_before.append((stashes, overlap))
				839	xf.use_stash.append((stashes, overlap))
				840	stashes += 1
				841	stash_size += overlap.size()
				842
				843	# reverse the edge direction; now xf must go after u
				844	del xf.goes_before[u]
				845	del u.goes_after[xf]
				846	xf.goes_after[u] = None # value doesn't matter
				847	u.goes_before[xf] = None
				848
				849	print((" %d/%d dependencies (%.2f%%) were violated; "
				850	"%d source blocks stashed.") %
				851	(out_of_order, in_order + out_of_order,
				852	(out_of_order * 100.0 / (in_order + out_of_order))
				853	if (in_order + out_of_order) else 0.0,
				854	stash_size))
				855
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	856	def FindVertexSequence(self):
				857	print("Finding vertex sequence...")
				858
				859	# This is based on "A Fast & Effective Heuristic for the Feedback
				860	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				861	# it as starting with the digraph G and moving all the vertices to
				862	# be on a horizontal line in some order, trying to minimize the
				863	# number of edges that end up pointing to the left. Left-pointing
				864	# edges will get removed to turn the digraph into a DAG. In this
				865	# case each edge has a weight which is the number of source blocks
				866	# we'll lose if that edge is removed; we try to minimize the total
				867	# weight rather than just the number of edges.
				868
				869	# Make a copy of the edge set; this copy will get destroyed by the
				870	# algorithm.
				871	for xf in self.transfers:
				872	xf.incoming = xf.goes_after.copy()
				873	xf.outgoing = xf.goes_before.copy()
				874
				875	# We use an OrderedDict instead of just a set so that the output
				876	# is repeatable; otherwise it would depend on the hash values of
				877	# the transfer objects.
				878	G = OrderedDict()
				879	for xf in self.transfers:
				880	G[xf] = None
				881	s1 = deque() # the left side of the sequence, built from left to right
				882	s2 = deque() # the right side of the sequence, built from right to left
				883
				884	while G:
				885
				886	# Put all sinks at the end of the sequence.
				887	while True:
				888	sinks = [u for u in G if not u.outgoing]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	889	if not sinks:
				890	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	891	for u in sinks:
				892	s2.appendleft(u)
				893	del G[u]
				894	for iu in u.incoming:
				895	del iu.outgoing[u]
				896
				897	# Put all the sources at the beginning of the sequence.
				898	while True:
				899	sources = [u for u in G if not u.incoming]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	900	if not sources:
				901	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	902	for u in sources:
				903	s1.append(u)
				904	del G[u]
				905	for iu in u.outgoing:
				906	del iu.incoming[u]
				907
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	908	if not G:
				909	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	910
				911	# Find the "best" vertex to put next. "Best" is the one that
				912	# maximizes the net difference in source blocks saved we get by
				913	# pretending it's a source rather than a sink.
				914
				915	max_d = None
				916	best_u = None
				917	for u in G:
				918	d = sum(u.outgoing.values()) - sum(u.incoming.values())
				919	if best_u is None or d > max_d:
				920	max_d = d
				921	best_u = u
				922
				923	u = best_u
				924	s1.append(u)
				925	del G[u]
				926	for iu in u.outgoing:
				927	del iu.incoming[u]
				928	for iu in u.incoming:
				929	del iu.outgoing[u]
				930
				931	# Now record the sequence in the 'order' field of each transfer,
				932	# and by rearranging self.transfers to be in the chosen sequence.
				933
				934	new_transfers = []
				935	for x in itertools.chain(s1, s2):
				936	x.order = len(new_transfers)
				937	new_transfers.append(x)
				938	del x.incoming
				939	del x.outgoing
				940
				941	self.transfers = new_transfers
				942
				943	def GenerateDigraph(self):
				944	print("Generating digraph...")
				945	for a in self.transfers:
				946	for b in self.transfers:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	947	if a is b:
				948	continue
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	949
				950	# If the blocks written by A are read by B, then B needs to go before A.
				951	i = a.tgt_ranges.intersect(b.src_ranges)
				952	if i:
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	953	if b.src_name == "__ZERO":
				954	# the cost of removing source blocks for the __ZERO domain
				955	# is (nearly) zero.
				956	size = 0
				957	else:
				958	size = i.size()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	959	b.goes_before[a] = size
				960	a.goes_after[b] = size
				961
				962	def FindTransfers(self):
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	963	"""Parse the file_map to generate all the transfers."""
				964
				965	def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
				966	split=False):
				967	"""Wrapper function for adding a Transfer().
				968
				969	For BBOTA v3, we need to stash source blocks for resumable feature.
				970	However, with the growth of file size and the shrink of the cache
				971	partition source blocks are too large to be stashed. If a file occupies
				972	too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
				973	into smaller pieces by getting multiple Transfer()s.
				974
				975	The downside is that after splitting, we can no longer use imgdiff but
				976	only bsdiff."""
				977
				978	MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
				979
				980	# We care about diff transfers only.
				981	if style != "diff" or not split:
				982	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				983	return
				984
				985	# Change nothing for small files.
				986	if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
				987	src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
				988	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				989	return
				990
				991	pieces = 0
				992	while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
				993	src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
				994	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				995	src_split_name = "%s-%d" % (src_name, pieces)
				996	tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				997	src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				998	Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
				999	by_id)
				1000
				1001	tgt_ranges = tgt_ranges.subtract(tgt_first)
				1002	src_ranges = src_ranges.subtract(src_first)
				1003	pieces += 1
				1004
				1005	# Handle remaining blocks.
				1006	if tgt_ranges.size() or src_ranges.size():
				1007	# Must be both non-empty.
				1008	assert tgt_ranges.size() and src_ranges.size()
				1009	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1010	src_split_name = "%s-%d" % (src_name, pieces)
				1011	Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
				1012	by_id)
				1013
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1014	empty = RangeSet()
				1015	for tgt_fn, tgt_ranges in self.tgt.file_map.items():
				1016	if tgt_fn == "__ZERO":
				1017	# the special "__ZERO" domain is all the blocks not contained
				1018	# in any file and that are filled with zeros. We have a
				1019	# special transfer style for zero blocks.
				1020	src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	1021	AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				1022	"zero", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1023	continue
				1024
Tao Bao	5ece99d	2015-05-12 11:42:31 -0700	[diff] [blame]	1025	elif tgt_fn == "__COPY":
				1026	# "__COPY" domain includes all the blocks not contained in any
				1027	# file and that need to be copied unconditionally to the target.
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	1028	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Bao	5ece99d	2015-05-12 11:42:31 -0700	[diff] [blame]	1029	continue
				1030
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1031	elif tgt_fn in self.src.file_map:
				1032	# Look for an exact pathname match in the source.
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	1033	AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				1034	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1035	continue
				1036
				1037	b = os.path.basename(tgt_fn)
				1038	if b in self.src_basenames:
				1039	# Look for an exact basename match in the source.
				1040	src_fn = self.src_basenames[b]
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	1041	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1042	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1043	continue
				1044
				1045	b = re.sub("[0-9]+", "#", b)
				1046	if b in self.src_numpatterns:
				1047	# Look for a 'number pattern' match (a basename match after
				1048	# all runs of digits are replaced by "#"). (This is useful
				1049	# for .so files that contain version numbers in the filename
				1050	# that get bumped.)
				1051	src_fn = self.src_numpatterns[b]
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	1052	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1053	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1054	continue
				1055
Tao Bao	937847a	2015-08-25 15:10:10 -0700	[diff] [blame]	1056	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1057
				1058	def AbbreviateSourceNames(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1059	for k in self.src.file_map.keys():
				1060	b = os.path.basename(k)
				1061	self.src_basenames[b] = k
				1062	b = re.sub("[0-9]+", "#", b)
				1063	self.src_numpatterns[b] = k
				1064
				1065	@staticmethod
				1066	def AssertPartition(total, seq):
				1067	"""Assert that all the RangeSets in 'seq' form a partition of the
				1068	'total' RangeSet (ie, they are nonintersecting and their union
				1069	equals 'total')."""
				1070	so_far = RangeSet()
				1071	for i in seq:
				1072	assert not so_far.overlaps(i)
				1073	so_far = so_far.union(i)
				1074	assert so_far == total