Blame - tools/releasetools/blockimgdiff.py - platform_build_make

blob: c7a9e2b5991298707ca86e891f098376cce393b8 [file] [log] [blame]

Doug Zongker	424296a	2014-09-02 08:53:09 -0700	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	15	from __future__ import print_function
				16
				17	from collections import deque, OrderedDict
				18	from hashlib import sha1
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	19	import common
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	20	import heapq
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	21	import itertools
				22	import multiprocessing
				23	import os
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	24	import re
				25	import subprocess
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	26	import threading
				27	import tempfile
				28
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	29	from rangelib import RangeSet
				30
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	31
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	32	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				33
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	34
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	35	def compute_patch(src, tgt, imgdiff=False):
				36	srcfd, srcfile = tempfile.mkstemp(prefix="src-")
				37	tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
				38	patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
				39	os.close(patchfd)
				40
				41	try:
				42	with os.fdopen(srcfd, "wb") as f_src:
				43	for p in src:
				44	f_src.write(p)
				45
				46	with os.fdopen(tgtfd, "wb") as f_tgt:
				47	for p in tgt:
				48	f_tgt.write(p)
				49	try:
				50	os.unlink(patchfile)
				51	except OSError:
				52	pass
				53	if imgdiff:
				54	p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
				55	stdout=open("/dev/null", "a"),
				56	stderr=subprocess.STDOUT)
				57	else:
				58	p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
				59
				60	if p:
				61	raise ValueError("diff failed: " + str(p))
				62
				63	with open(patchfile, "rb") as f:
				64	return f.read()
				65	finally:
				66	try:
				67	os.unlink(srcfile)
				68	os.unlink(tgtfile)
				69	os.unlink(patchfile)
				70	except OSError:
				71	pass
				72
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	73
				74	class Image(object):
				75	def ReadRangeSet(self, ranges):
				76	raise NotImplementedError
				77
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	78	def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	79	raise NotImplementedError
				80
				81
				82	class EmptyImage(Image):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	83	"""A zero-length image."""
				84	blocksize = 4096
				85	care_map = RangeSet()
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	86	clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	87	extended = RangeSet()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	88	total_blocks = 0
				89	file_map = {}
				90	def ReadRangeSet(self, ranges):
				91	return ()
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	92	def TotalSha1(self, include_clobbered_blocks=False):
				93	# EmptyImage always carries empty clobbered_blocks, so
				94	# include_clobbered_blocks can be ignored.
				95	assert self.clobbered_blocks.size() == 0
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	96	return sha1().hexdigest()
				97
				98
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	99	class DataImage(Image):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
				109	if partial > 0:
				110	if trim:
				111	self.data = self.data[:-partial]
				112	elif pad:
				113	self.data += '\0' * (self.blocksize - partial)
				114	else:
				115	raise ValueError(("data for DataImage must be multiple of %d bytes "
				116	"unless trim or pad is specified") %
				117	(self.blocksize,))
				118
				119	assert len(self.data) % self.blocksize == 0
				120
				121	self.total_blocks = len(self.data) / self.blocksize
				122	self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	123	self.clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	124	self.extended = RangeSet()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	125
				126	zero_blocks = []
				127	nonzero_blocks = []
				128	reference = '\0' * self.blocksize
				129
				130	for i in range(self.total_blocks):
				131	d = self.data[iself.blocksize : (i+1)self.blocksize]
				132	if d == reference:
				133	zero_blocks.append(i)
				134	zero_blocks.append(i+1)
				135	else:
				136	nonzero_blocks.append(i)
				137	nonzero_blocks.append(i+1)
				138
				139	self.file_map = {"__ZERO": RangeSet(zero_blocks),
				140	"__NONZERO": RangeSet(nonzero_blocks)}
				141
				142	def ReadRangeSet(self, ranges):
				143	return [self.data[sself.blocksize:eself.blocksize] for (s, e) in ranges]
				144
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	145	def TotalSha1(self, include_clobbered_blocks=False):
				146	# DataImage always carries empty clobbered_blocks, so
				147	# include_clobbered_blocks can be ignored.
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	148	assert self.clobbered_blocks.size() == 0
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	149	return sha1(self.data).hexdigest()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	150
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	151
				152	class Transfer(object):
				153	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
				154	self.tgt_name = tgt_name
				155	self.src_name = src_name
				156	self.tgt_ranges = tgt_ranges
				157	self.src_ranges = src_ranges
				158	self.style = style
				159	self.intact = (getattr(tgt_ranges, "monotonic", False) and
				160	getattr(src_ranges, "monotonic", False))
Tao Bao	b8c8717	2015-03-19 19:42:12 -0700	[diff] [blame]	161
				162	# We use OrderedDict rather than dict so that the output is repeatable;
				163	# otherwise it would depend on the hash values of the Transfer objects.
				164	self.goes_before = OrderedDict()
				165	self.goes_after = OrderedDict()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	166
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	167	self.stash_before = []
				168	self.use_stash = []
				169
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	170	self.id = len(by_id)
				171	by_id.append(self)
				172
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	173	def NetStashChange(self):
				174	return (sum(sr.size() for (_, sr) in self.stash_before) -
				175	sum(sr.size() for (_, sr) in self.use_stash))
				176
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	177	def ConvertToNew(self):
				178	assert self.style != "new"
				179	self.use_stash = []
				180	self.style = "new"
				181	self.src_ranges = RangeSet()
				182
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	183	def __str__(self):
				184	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				185	" to " + str(self.tgt_ranges) + ">")
				186
				187
				188	# BlockImageDiff works on two image objects. An image object is
				189	# anything that provides the following attributes:
				190	#
				191	# blocksize: the size in bytes of a block, currently must be 4096.
				192	#
				193	# total_blocks: the total size of the partition/image, in blocks.
				194	#
				195	# care_map: a RangeSet containing which blocks (in the range [0,
				196	# total_blocks) we actually care about; i.e. which blocks contain
				197	# data.
				198	#
				199	# file_map: a dict that partitions the blocks contained in care_map
				200	# into smaller domains that are useful for doing diffs on.
				201	# (Typically a domain is a file, and the key in file_map is the
				202	# pathname.)
				203	#
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	204	# clobbered_blocks: a RangeSet containing which blocks contain data
				205	# but may be altered by the FS. They need to be excluded when
				206	# verifying the partition integrity.
				207	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	208	# ReadRangeSet(): a function that takes a RangeSet and returns the
				209	# data contained in the image blocks of that RangeSet. The data
				210	# is returned as a list or tuple of strings; concatenating the
				211	# elements together should produce the requested data.
				212	# Implementations are free to break up the data into list/tuple
				213	# elements in any way that is convenient.
				214	#
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	215	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				216	# hash of all the data in the image (ie, all the blocks in the
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	217	# care_map minus clobbered_blocks, or including the clobbered
				218	# blocks if include_clobbered_blocks is True).
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	219	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	220	# When creating a BlockImageDiff, the src image may be None, in which
				221	# case the list of transfers produced will never read from the
				222	# original image.
				223
				224	class BlockImageDiff(object):
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	225	def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	226	if threads is None:
				227	threads = multiprocessing.cpu_count() // 2
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	228	if threads == 0:
				229	threads = 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	230	self.threads = threads
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	231	self.version = version
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	232	self.transfers = []
				233	self.src_basenames = {}
				234	self.src_numpatterns = {}
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	235
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	236	assert version in (1, 2, 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	237
				238	self.tgt = tgt
				239	if src is None:
				240	src = EmptyImage()
				241	self.src = src
				242
				243	# The updater code that installs the patch always uses 4k blocks.
				244	assert tgt.blocksize == 4096
				245	assert src.blocksize == 4096
				246
				247	# The range sets in each filemap should comprise a partition of
				248	# the care map.
				249	self.AssertPartition(src.care_map, src.file_map.values())
				250	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				251
				252	def Compute(self, prefix):
				253	# When looking for a source file to use as the diff input for a
				254	# target file, we try:
				255	# 1) an exact path match if available, otherwise
				256	# 2) a exact basename match if available, otherwise
				257	# 3) a basename match after all runs of digits are replaced by
				258	# "#" if available, otherwise
				259	# 4) we have no source for this target.
				260	self.AbbreviateSourceNames()
				261	self.FindTransfers()
				262
				263	# Find the ordering dependencies among transfers (this is O(n^2)
				264	# in the number of transfers).
				265	self.GenerateDigraph()
				266	# Find a sequence of transfers that satisfies as many ordering
				267	# dependencies as possible (heuristically).
				268	self.FindVertexSequence()
				269	# Fix up the ordering dependencies that the sequence didn't
				270	# satisfy.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	271	if self.version == 1:
				272	self.RemoveBackwardEdges()
				273	else:
				274	self.ReverseBackwardEdges()
				275	self.ImproveVertexSequence()
				276
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	277	# Ensure the runtime stash size is under the limit.
				278	if self.version >= 2 and common.OPTIONS.cache_size is not None:
				279	self.ReviseStashSize()
				280
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	281	# Double-check our work.
				282	self.AssertSequenceGood()
				283
				284	self.ComputePatches(prefix)
				285	self.WriteTransfers(prefix)
				286
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	287	def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	288	data = source.ReadRangeSet(ranges)
				289	ctx = sha1()
				290
				291	for p in data:
				292	ctx.update(p)
				293
				294	return ctx.hexdigest()
				295
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	296	def WriteTransfers(self, prefix):
				297	out = []
				298
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	299	total = 0
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	300
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	301	stashes = {}
				302	stashed_blocks = 0
				303	max_stashed_blocks = 0
				304
				305	free_stash_ids = []
				306	next_stash_id = 0
				307
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	308	for xf in self.transfers:
				309
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	310	if self.version < 2:
				311	assert not xf.stash_before
				312	assert not xf.use_stash
				313
				314	for s, sr in xf.stash_before:
				315	assert s not in stashes
				316	if free_stash_ids:
				317	sid = heapq.heappop(free_stash_ids)
				318	else:
				319	sid = next_stash_id
				320	next_stash_id += 1
				321	stashes[s] = sid
				322	stashed_blocks += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	323	if self.version == 2:
				324	out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
				325	else:
				326	sh = self.HashBlocks(self.src, sr)
				327	if sh in stashes:
				328	stashes[sh] += 1
				329	else:
				330	stashes[sh] = 1
				331	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	332
				333	if stashed_blocks > max_stashed_blocks:
				334	max_stashed_blocks = stashed_blocks
				335
Jesse Zhao	7b985f6	2015-03-02 16:53:08 -0800	[diff] [blame]	336	free_string = []
				337
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	338	if self.version == 1:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	339	src_str = xf.src_ranges.to_string_raw()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	340	elif self.version >= 2:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	341
				342	# <# blocks> <src ranges>
				343	# OR
				344	# <# blocks> <src ranges> <src locs> <stash refs...>
				345	# OR
				346	# <# blocks> - <stash refs...>
				347
				348	size = xf.src_ranges.size()
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	349	src_str = [str(size)]
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	350
				351	unstashed_src_ranges = xf.src_ranges
				352	mapped_stashes = []
				353	for s, sr in xf.use_stash:
				354	sid = stashes.pop(s)
				355	stashed_blocks -= sr.size()
				356	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	357	sh = self.HashBlocks(self.src, sr)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	358	sr = xf.src_ranges.map_within(sr)
				359	mapped_stashes.append(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	360	if self.version == 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	361	src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Bao	bb625d2	2015-08-13 14:44:15 -0700	[diff] [blame]	362	# A stash will be used only once. We need to free the stash
				363	# immediately after the use, instead of waiting for the automatic
				364	# clean-up at the end. Because otherwise it may take up extra space
				365	# and lead to OTA failures.
				366	# Bug: 23119955
				367	free_string.append("free %d\n" % (sid,))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	368	else:
				369	assert sh in stashes
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	370	src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	371	stashes[sh] -= 1
				372	if stashes[sh] == 0:
				373	free_string.append("free %s\n" % (sh))
				374	stashes.pop(sh)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	375	heapq.heappush(free_stash_ids, sid)
				376
				377	if unstashed_src_ranges:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	378	src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	379	if xf.use_stash:
				380	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	381	src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	382	mapped_stashes.append(mapped_unstashed)
				383	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				384	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	385	src_str.insert(1, "-")
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	386	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				387
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	388	src_str = " ".join(src_str)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	389
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	390	# all versions:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	391	# zero <rangeset>
				392	# new <rangeset>
				393	# erase <rangeset>
				394	#
				395	# version 1:
				396	# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
				397	# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
				398	# move <src rangeset> <tgt rangeset>
				399	#
				400	# version 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	401	# bsdiff patchstart patchlen <tgt rangeset> <src_str>
				402	# imgdiff patchstart patchlen <tgt rangeset> <src_str>
				403	# move <tgt rangeset> <src_str>
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	404	#
				405	# version 3:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	406	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				407	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				408	# move hash <tgt rangeset> <src_str>
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	409
				410	tgt_size = xf.tgt_ranges.size()
				411
				412	if xf.style == "new":
				413	assert xf.tgt_ranges
				414	out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
				415	total += tgt_size
				416	elif xf.style == "move":
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	417	assert xf.tgt_ranges
				418	assert xf.src_ranges.size() == tgt_size
				419	if xf.src_ranges != xf.tgt_ranges:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	420	if self.version == 1:
				421	out.append("%s %s %s\n" % (
				422	xf.style,
				423	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				424	elif self.version == 2:
				425	out.append("%s %s %s\n" % (
				426	xf.style,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	427	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	428	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	429	# take into account automatic stashing of overlapping blocks
				430	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	431	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	432	if temp_stash_usage > max_stashed_blocks:
				433	max_stashed_blocks = temp_stash_usage
				434
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	435	out.append("%s %s %s %s\n" % (
				436	xf.style,
				437	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	438	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	439	total += tgt_size
				440	elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	441	assert xf.tgt_ranges
				442	assert xf.src_ranges
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	443	if self.version == 1:
				444	out.append("%s %d %d %s %s\n" % (
				445	xf.style, xf.patch_start, xf.patch_len,
				446	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				447	elif self.version == 2:
				448	out.append("%s %d %d %s %s\n" % (
				449	xf.style, xf.patch_start, xf.patch_len,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	450	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	451	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	452	# take into account automatic stashing of overlapping blocks
				453	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	454	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	455	if temp_stash_usage > max_stashed_blocks:
				456	max_stashed_blocks = temp_stash_usage
				457
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	458	out.append("%s %d %d %s %s %s %s\n" % (
				459	xf.style,
				460	xf.patch_start, xf.patch_len,
				461	self.HashBlocks(self.src, xf.src_ranges),
				462	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	463	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	464	total += tgt_size
				465	elif xf.style == "zero":
				466	assert xf.tgt_ranges
				467	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				468	if to_zero:
				469	out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
				470	total += to_zero.size()
				471	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	472	raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	473
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	474	if free_string:
				475	out.append("".join(free_string))
				476
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	477	if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	478	# Sanity check: abort if we're going to need more stash space than
				479	# the allowed size (cache_size * threshold). There are two purposes
				480	# of having a threshold here. a) Part of the cache may have been
				481	# occupied by some recovery logs. b) It will buy us some time to deal
				482	# with the oversize issue.
				483	cache_size = common.OPTIONS.cache_size
				484	stash_threshold = common.OPTIONS.stash_threshold
				485	max_allowed = cache_size * stash_threshold
				486	assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
				487	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				488	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				489	self.tgt.blocksize, max_allowed, cache_size,
				490	stash_threshold)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	491
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	492	# Zero out extended blocks as a workaround for bug 20881595.
				493	if self.tgt.extended:
				494	out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
				495
				496	# We erase all the blocks on the partition that a) don't contain useful
				497	# data in the new image and b) will not be touched by dm-verity.
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	498	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	499	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				500	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				501	if new_dontcare:
				502	out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker	e985f6f	2014-09-09 12:38:47 -0700	[diff] [blame]	503
				504	out.insert(0, "%d\n" % (self.version,)) # format version number
				505	out.insert(1, str(total) + "\n")
				506	if self.version >= 2:
				507	# version 2 only: after the total block count, we give the number
				508	# of stash slots needed, and the maximum size needed (in blocks)
				509	out.insert(2, str(next_stash_id) + "\n")
				510	out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	511
				512	with open(prefix + ".transfer.list", "wb") as f:
				513	for i in out:
				514	f.write(i)
				515
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	516	if self.version >= 2:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	517	max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	518	OPTIONS = common.OPTIONS
				519	if OPTIONS.cache_size is not None:
				520	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				521	print("max stashed blocks: %d (%d bytes), "
				522	"limit: %d bytes (%.2f%%)\n" % (
				523	max_stashed_blocks, max_stashed_size, max_allowed,
				524	max_stashed_size * 100.0 / max_allowed))
				525	else:
				526	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
				527	max_stashed_blocks, max_stashed_size))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	528
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	529	def ReviseStashSize(self):
				530	print("Revising stash size...")
				531	stashes = {}
				532
				533	# Create the map between a stash and its def/use points. For example, for a
				534	# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
				535	for xf in self.transfers:
				536	# Command xf defines (stores) all the stashes in stash_before.
				537	for idx, sr in xf.stash_before:
				538	stashes[idx] = (sr, xf)
				539
				540	# Record all the stashes command xf uses.
				541	for idx, _ in xf.use_stash:
				542	stashes[idx] += (xf,)
				543
				544	# Compute the maximum blocks available for stash based on /cache size and
				545	# the threshold.
				546	cache_size = common.OPTIONS.cache_size
				547	stash_threshold = common.OPTIONS.stash_threshold
				548	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				549
				550	stashed_blocks = 0
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	551	new_blocks = 0
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	552
				553	# Now go through all the commands. Compute the required stash size on the
				554	# fly. If a command requires excess stash than available, it deletes the
				555	# stash by replacing the command that uses the stash with a "new" command
				556	# instead.
				557	for xf in self.transfers:
				558	replaced_cmds = []
				559
				560	# xf.stash_before generates explicit stash commands.
				561	for idx, sr in xf.stash_before:
				562	if stashed_blocks + sr.size() > max_allowed:
				563	# We cannot stash this one for a later command. Find out the command
				564	# that will use this stash and replace the command with "new".
				565	use_cmd = stashes[idx][2]
				566	replaced_cmds.append(use_cmd)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	567	print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	568	else:
				569	stashed_blocks += sr.size()
				570
				571	# xf.use_stash generates free commands.
				572	for _, sr in xf.use_stash:
				573	stashed_blocks -= sr.size()
				574
				575	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				576	# ComputePatches(), they both have the style of "diff".
				577	if xf.style == "diff" and self.version >= 3:
				578	assert xf.tgt_ranges and xf.src_ranges
				579	if xf.src_ranges.overlaps(xf.tgt_ranges):
				580	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				581	replaced_cmds.append(xf)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	582	print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	583
				584	# Replace the commands in replaced_cmds with "new"s.
				585	for cmd in replaced_cmds:
				586	# It no longer uses any commands in "use_stash". Remove the def points
				587	# for all those stashes.
				588	for idx, sr in cmd.use_stash:
				589	def_cmd = stashes[idx][1]
				590	assert (idx, sr) in def_cmd.stash_before
				591	def_cmd.stash_before.remove((idx, sr))
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	592	new_blocks += sr.size()
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	593
				594	cmd.ConvertToNew()
				595
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	596	print(" Total %d blocks are packed as new blocks due to insufficient "
				597	"cache size." % (new_blocks,))
				598
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	599	def ComputePatches(self, prefix):
				600	print("Reticulating splines...")
				601	diff_q = []
				602	patch_num = 0
				603	with open(prefix + ".new.dat", "wb") as new_f:
				604	for xf in self.transfers:
				605	if xf.style == "zero":
				606	pass
				607	elif xf.style == "new":
				608	for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
				609	new_f.write(piece)
				610	elif xf.style == "diff":
				611	src = self.src.ReadRangeSet(xf.src_ranges)
				612	tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
				613
				614	# We can't compare src and tgt directly because they may have
				615	# the same content but be broken up into blocks differently, eg:
				616	#
				617	# ["he", "llo"] vs ["h", "ello"]
				618	#
				619	# We want those to compare equal, ideally without having to
				620	# actually concatenate the strings (these may be tens of
				621	# megabytes).
				622
				623	src_sha1 = sha1()
				624	for p in src:
				625	src_sha1.update(p)
				626	tgt_sha1 = sha1()
				627	tgt_size = 0
				628	for p in tgt:
				629	tgt_sha1.update(p)
				630	tgt_size += len(p)
				631
				632	if src_sha1.digest() == tgt_sha1.digest():
				633	# These are identical; we don't need to generate a patch,
				634	# just issue copy commands on the device.
				635	xf.style = "move"
				636	else:
				637	# For files in zip format (eg, APKs, JARs, etc.) we would
				638	# like to use imgdiff -z if possible (because it usually
				639	# produces significantly smaller patches than bsdiff).
				640	# This is permissible if:
				641	#
				642	# - the source and target files are monotonic (ie, the
				643	# data is stored with blocks in increasing order), and
				644	# - we haven't removed any blocks from the source set.
				645	#
				646	# If these conditions are satisfied then appending all the
				647	# blocks in the set together in order will produce a valid
				648	# zip file (plus possibly extra zeros in the last block),
				649	# which is what imgdiff needs to operate. (imgdiff is
				650	# fine with extra zeros at the end of the file.)
				651	imgdiff = (xf.intact and
				652	xf.tgt_name.split(".")[-1].lower()
				653	in ("apk", "jar", "zip"))
				654	xf.style = "imgdiff" if imgdiff else "bsdiff"
				655	diff_q.append((tgt_size, src, tgt, xf, patch_num))
				656	patch_num += 1
				657
				658	else:
				659	assert False, "unknown style " + xf.style
				660
				661	if diff_q:
				662	if self.threads > 1:
				663	print("Computing patches (using %d threads)..." % (self.threads,))
				664	else:
				665	print("Computing patches...")
				666	diff_q.sort()
				667
				668	patches = [None] * patch_num
				669
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	670	# TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	671	lock = threading.Lock()
				672	def diff_worker():
				673	while True:
				674	with lock:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	675	if not diff_q:
				676	return
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	677	tgt_size, src, tgt, xf, patchnum = diff_q.pop()
				678	patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
				679	size = len(patch)
				680	with lock:
				681	patches[patchnum] = (patch, xf)
				682	print("%10d %10d (%6.2f%%) %7s %s" % (
				683	size, tgt_size, size * 100.0 / tgt_size, xf.style,
				684	xf.tgt_name if xf.tgt_name == xf.src_name else (
				685	xf.tgt_name + " (from " + xf.src_name + ")")))
				686
				687	threads = [threading.Thread(target=diff_worker)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	688	for _ in range(self.threads)]
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	689	for th in threads:
				690	th.start()
				691	while threads:
				692	threads.pop().join()
				693	else:
				694	patches = []
				695
				696	p = 0
				697	with open(prefix + ".patch.dat", "wb") as patch_f:
				698	for patch, xf in patches:
				699	xf.patch_start = p
				700	xf.patch_len = len(patch)
				701	patch_f.write(patch)
				702	p += len(patch)
				703
				704	def AssertSequenceGood(self):
				705	# Simulate the sequences of transfers we will output, and check that:
				706	# - we never read a block after writing it, and
				707	# - we write every block we care about exactly once.
				708
				709	# Start with no blocks having been touched yet.
				710	touched = RangeSet()
				711
				712	# Imagine processing the transfers in order.
				713	for xf in self.transfers:
				714	# Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	715
				716	x = xf.src_ranges
				717	if self.version >= 2:
				718	for _, sr in xf.use_stash:
				719	x = x.subtract(sr)
				720
				721	assert not touched.overlaps(x)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	722	# Check that the output blocks for this transfer haven't yet been touched.
				723	assert not touched.overlaps(xf.tgt_ranges)
				724	# Touch all the blocks written by this transfer.
				725	touched = touched.union(xf.tgt_ranges)
				726
				727	# Check that we've written every target block.
				728	assert touched == self.tgt.care_map
				729
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	730	def ImproveVertexSequence(self):
				731	print("Improving vertex order...")
				732
				733	# At this point our digraph is acyclic; we reversed any edges that
				734	# were backwards in the heuristically-generated sequence. The
				735	# previously-generated order is still acceptable, but we hope to
				736	# find a better order that needs less memory for stashed data.
				737	# Now we do a topological sort to generate a new vertex order,
				738	# using a greedy algorithm to choose which vertex goes next
				739	# whenever we have a choice.
				740
				741	# Make a copy of the edge set; this copy will get destroyed by the
				742	# algorithm.
				743	for xf in self.transfers:
				744	xf.incoming = xf.goes_after.copy()
				745	xf.outgoing = xf.goes_before.copy()
				746
				747	L = [] # the new vertex order
				748
				749	# S is the set of sources in the remaining graph; we always choose
				750	# the one that leaves the least amount of stashed data after it's
				751	# executed.
				752	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				753	if not u.incoming]
				754	heapq.heapify(S)
				755
				756	while S:
				757	_, _, xf = heapq.heappop(S)
				758	L.append(xf)
				759	for u in xf.outgoing:
				760	del u.incoming[xf]
				761	if not u.incoming:
				762	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				763
				764	# if this fails then our graph had a cycle.
				765	assert len(L) == len(self.transfers)
				766
				767	self.transfers = L
				768	for i, xf in enumerate(L):
				769	xf.order = i
				770
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	771	def RemoveBackwardEdges(self):
				772	print("Removing backward edges...")
				773	in_order = 0
				774	out_of_order = 0
				775	lost_source = 0
				776
				777	for xf in self.transfers:
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	778	lost = 0
				779	size = xf.src_ranges.size()
				780	for u in xf.goes_before:
				781	# xf should go before u
				782	if xf.order < u.order:
				783	# it does, hurray!
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	784	in_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	785	else:
				786	# it doesn't, boo. trim the blocks that u writes from xf's
				787	# source, so that xf can go after u.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	788	out_of_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	789	assert xf.src_ranges.overlaps(u.tgt_ranges)
				790	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				791	xf.intact = False
				792
				793	if xf.style == "diff" and not xf.src_ranges:
				794	# nothing left to diff from; treat as new data
				795	xf.style = "new"
				796
				797	lost = size - xf.src_ranges.size()
				798	lost_source += lost
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	799
				800	print((" %d/%d dependencies (%.2f%%) were violated; "
				801	"%d source blocks removed.") %
				802	(out_of_order, in_order + out_of_order,
				803	(out_of_order * 100.0 / (in_order + out_of_order))
				804	if (in_order + out_of_order) else 0.0,
				805	lost_source))
				806
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	807	def ReverseBackwardEdges(self):
				808	print("Reversing backward edges...")
				809	in_order = 0
				810	out_of_order = 0
				811	stashes = 0
				812	stash_size = 0
				813
				814	for xf in self.transfers:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	815	for u in xf.goes_before.copy():
				816	# xf should go before u
				817	if xf.order < u.order:
				818	# it does, hurray!
				819	in_order += 1
				820	else:
				821	# it doesn't, boo. modify u to stash the blocks that it
				822	# writes that xf wants to read, and then require u to go
				823	# before xf.
				824	out_of_order += 1
				825
				826	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				827	assert overlap
				828
				829	u.stash_before.append((stashes, overlap))
				830	xf.use_stash.append((stashes, overlap))
				831	stashes += 1
				832	stash_size += overlap.size()
				833
				834	# reverse the edge direction; now xf must go after u
				835	del xf.goes_before[u]
				836	del u.goes_after[xf]
				837	xf.goes_after[u] = None # value doesn't matter
				838	u.goes_before[xf] = None
				839
				840	print((" %d/%d dependencies (%.2f%%) were violated; "
				841	"%d source blocks stashed.") %
				842	(out_of_order, in_order + out_of_order,
				843	(out_of_order * 100.0 / (in_order + out_of_order))
				844	if (in_order + out_of_order) else 0.0,
				845	stash_size))
				846
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	847	def FindVertexSequence(self):
				848	print("Finding vertex sequence...")
				849
				850	# This is based on "A Fast & Effective Heuristic for the Feedback
				851	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				852	# it as starting with the digraph G and moving all the vertices to
				853	# be on a horizontal line in some order, trying to minimize the
				854	# number of edges that end up pointing to the left. Left-pointing
				855	# edges will get removed to turn the digraph into a DAG. In this
				856	# case each edge has a weight which is the number of source blocks
				857	# we'll lose if that edge is removed; we try to minimize the total
				858	# weight rather than just the number of edges.
				859
				860	# Make a copy of the edge set; this copy will get destroyed by the
				861	# algorithm.
				862	for xf in self.transfers:
				863	xf.incoming = xf.goes_after.copy()
				864	xf.outgoing = xf.goes_before.copy()
				865
				866	# We use an OrderedDict instead of just a set so that the output
				867	# is repeatable; otherwise it would depend on the hash values of
				868	# the transfer objects.
				869	G = OrderedDict()
				870	for xf in self.transfers:
				871	G[xf] = None
				872	s1 = deque() # the left side of the sequence, built from left to right
				873	s2 = deque() # the right side of the sequence, built from right to left
				874
				875	while G:
				876
				877	# Put all sinks at the end of the sequence.
				878	while True:
				879	sinks = [u for u in G if not u.outgoing]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	880	if not sinks:
				881	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	882	for u in sinks:
				883	s2.appendleft(u)
				884	del G[u]
				885	for iu in u.incoming:
				886	del iu.outgoing[u]
				887
				888	# Put all the sources at the beginning of the sequence.
				889	while True:
				890	sources = [u for u in G if not u.incoming]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	891	if not sources:
				892	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	893	for u in sources:
				894	s1.append(u)
				895	del G[u]
				896	for iu in u.outgoing:
				897	del iu.incoming[u]
				898
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	899	if not G:
				900	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	901
				902	# Find the "best" vertex to put next. "Best" is the one that
				903	# maximizes the net difference in source blocks saved we get by
				904	# pretending it's a source rather than a sink.
				905
				906	max_d = None
				907	best_u = None
				908	for u in G:
				909	d = sum(u.outgoing.values()) - sum(u.incoming.values())
				910	if best_u is None or d > max_d:
				911	max_d = d
				912	best_u = u
				913
				914	u = best_u
				915	s1.append(u)
				916	del G[u]
				917	for iu in u.outgoing:
				918	del iu.incoming[u]
				919	for iu in u.incoming:
				920	del iu.outgoing[u]
				921
				922	# Now record the sequence in the 'order' field of each transfer,
				923	# and by rearranging self.transfers to be in the chosen sequence.
				924
				925	new_transfers = []
				926	for x in itertools.chain(s1, s2):
				927	x.order = len(new_transfers)
				928	new_transfers.append(x)
				929	del x.incoming
				930	del x.outgoing
				931
				932	self.transfers = new_transfers
				933
				934	def GenerateDigraph(self):
				935	print("Generating digraph...")
				936	for a in self.transfers:
				937	for b in self.transfers:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	938	if a is b:
				939	continue
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	940
				941	# If the blocks written by A are read by B, then B needs to go before A.
				942	i = a.tgt_ranges.intersect(b.src_ranges)
				943	if i:
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	944	if b.src_name == "__ZERO":
				945	# the cost of removing source blocks for the __ZERO domain
				946	# is (nearly) zero.
				947	size = 0
				948	else:
				949	size = i.size()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	950	b.goes_before[a] = size
				951	a.goes_after[b] = size
				952
				953	def FindTransfers(self):
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	954	"""Parse the file_map to generate all the transfers."""
				955
				956	def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
				957	split=False):
				958	"""Wrapper function for adding a Transfer().
				959
				960	For BBOTA v3, we need to stash source blocks for resumable feature.
				961	However, with the growth of file size and the shrink of the cache
				962	partition source blocks are too large to be stashed. If a file occupies
				963	too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
				964	into smaller pieces by getting multiple Transfer()s.
				965
				966	The downside is that after splitting, we can no longer use imgdiff but
				967	only bsdiff."""
				968
				969	MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
				970
				971	# We care about diff transfers only.
				972	if style != "diff" or not split:
				973	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				974	return
				975
				976	# Change nothing for small files.
				977	if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
				978	src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
				979	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				980	return
				981
				982	pieces = 0
				983	while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
				984	src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
				985	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				986	src_split_name = "%s-%d" % (src_name, pieces)
				987	tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				988	src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				989	Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
				990	by_id)
				991
				992	tgt_ranges = tgt_ranges.subtract(tgt_first)
				993	src_ranges = src_ranges.subtract(src_first)
				994	pieces += 1
				995
				996	# Handle remaining blocks.
				997	if tgt_ranges.size() or src_ranges.size():
				998	# Must be both non-empty.
				999	assert tgt_ranges.size() and src_ranges.size()
				1000	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1001	src_split_name = "%s-%d" % (src_name, pieces)
				1002	Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
				1003	by_id)
				1004
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1005	empty = RangeSet()
				1006	for tgt_fn, tgt_ranges in self.tgt.file_map.items():
				1007	if tgt_fn == "__ZERO":
				1008	# the special "__ZERO" domain is all the blocks not contained
				1009	# in any file and that are filled with zeros. We have a
				1010	# special transfer style for zero blocks.
				1011	src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	1012	AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				1013	"zero", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1014	continue
				1015
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1016	elif tgt_fn == "__COPY":
				1017	# "__COPY" domain includes all the blocks not contained in any
				1018	# file and that need to be copied unconditionally to the target.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	1019	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1020	continue
				1021
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1022	elif tgt_fn in self.src.file_map:
				1023	# Look for an exact pathname match in the source.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	1024	AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				1025	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1026	continue
				1027
				1028	b = os.path.basename(tgt_fn)
				1029	if b in self.src_basenames:
				1030	# Look for an exact basename match in the source.
				1031	src_fn = self.src_basenames[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	1032	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1033	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1034	continue
				1035
				1036	b = re.sub("[0-9]+", "#", b)
				1037	if b in self.src_numpatterns:
				1038	# Look for a 'number pattern' match (a basename match after
				1039	# all runs of digits are replaced by "#"). (This is useful
				1040	# for .so files that contain version numbers in the filename
				1041	# that get bumped.)
				1042	src_fn = self.src_numpatterns[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	1043	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1044	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1045	continue
				1046
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame^]	1047	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1048
				1049	def AbbreviateSourceNames(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1050	for k in self.src.file_map.keys():
				1051	b = os.path.basename(k)
				1052	self.src_basenames[b] = k
				1053	b = re.sub("[0-9]+", "#", b)
				1054	self.src_numpatterns[b] = k
				1055
				1056	@staticmethod
				1057	def AssertPartition(total, seq):
				1058	"""Assert that all the RangeSets in 'seq' form a partition of the
				1059	'total' RangeSet (ie, they are nonintersecting and their union
				1060	equals 'total')."""
				1061	so_far = RangeSet()
				1062	for i in seq:
				1063	assert not so_far.overlaps(i)
				1064	so_far = so_far.union(i)
				1065	assert so_far == total