Blame - tools/releasetools/blockimgdiff.py - platform_build_make

blob: 199783d92d3b862ddda50b687f03f0f5dd0cc664 [file] [log] [blame]

Doug Zongker	424296a	2014-09-02 08:53:09 -0700	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	15	from __future__ import print_function
				16
				17	from collections import deque, OrderedDict
				18	from hashlib import sha1
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	19	import common
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	20	import heapq
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	21	import itertools
				22	import multiprocessing
				23	import os
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	24	import re
				25	import subprocess
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	26	import threading
				27	import tempfile
				28
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	29	from rangelib import RangeSet
				30
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	31
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	32	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				33
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	34
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	35	def compute_patch(src, tgt, imgdiff=False):
				36	srcfd, srcfile = tempfile.mkstemp(prefix="src-")
				37	tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
				38	patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
				39	os.close(patchfd)
				40
				41	try:
				42	with os.fdopen(srcfd, "wb") as f_src:
				43	for p in src:
				44	f_src.write(p)
				45
				46	with os.fdopen(tgtfd, "wb") as f_tgt:
				47	for p in tgt:
				48	f_tgt.write(p)
				49	try:
				50	os.unlink(patchfile)
				51	except OSError:
				52	pass
				53	if imgdiff:
				54	p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
				55	stdout=open("/dev/null", "a"),
				56	stderr=subprocess.STDOUT)
				57	else:
				58	p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
				59
				60	if p:
				61	raise ValueError("diff failed: " + str(p))
				62
				63	with open(patchfile, "rb") as f:
				64	return f.read()
				65	finally:
				66	try:
				67	os.unlink(srcfile)
				68	os.unlink(tgtfile)
				69	os.unlink(patchfile)
				70	except OSError:
				71	pass
				72
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	73
				74	class Image(object):
				75	def ReadRangeSet(self, ranges):
				76	raise NotImplementedError
				77
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	78	def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	79	raise NotImplementedError
				80
				81
				82	class EmptyImage(Image):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	83	"""A zero-length image."""
				84	blocksize = 4096
				85	care_map = RangeSet()
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	86	clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	87	extended = RangeSet()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	88	total_blocks = 0
				89	file_map = {}
				90	def ReadRangeSet(self, ranges):
				91	return ()
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	92	def TotalSha1(self, include_clobbered_blocks=False):
				93	# EmptyImage always carries empty clobbered_blocks, so
				94	# include_clobbered_blocks can be ignored.
				95	assert self.clobbered_blocks.size() == 0
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	96	return sha1().hexdigest()
				97
				98
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	99	class DataImage(Image):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
				109	if partial > 0:
				110	if trim:
				111	self.data = self.data[:-partial]
				112	elif pad:
				113	self.data += '\0' * (self.blocksize - partial)
				114	else:
				115	raise ValueError(("data for DataImage must be multiple of %d bytes "
				116	"unless trim or pad is specified") %
				117	(self.blocksize,))
				118
				119	assert len(self.data) % self.blocksize == 0
				120
				121	self.total_blocks = len(self.data) / self.blocksize
				122	self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	123	self.clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	124	self.extended = RangeSet()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	125
				126	zero_blocks = []
				127	nonzero_blocks = []
				128	reference = '\0' * self.blocksize
				129
				130	for i in range(self.total_blocks):
				131	d = self.data[iself.blocksize : (i+1)self.blocksize]
				132	if d == reference:
				133	zero_blocks.append(i)
				134	zero_blocks.append(i+1)
				135	else:
				136	nonzero_blocks.append(i)
				137	nonzero_blocks.append(i+1)
				138
				139	self.file_map = {"__ZERO": RangeSet(zero_blocks),
				140	"__NONZERO": RangeSet(nonzero_blocks)}
				141
				142	def ReadRangeSet(self, ranges):
				143	return [self.data[sself.blocksize:eself.blocksize] for (s, e) in ranges]
				144
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	145	def TotalSha1(self, include_clobbered_blocks=False):
				146	# DataImage always carries empty clobbered_blocks, so
				147	# include_clobbered_blocks can be ignored.
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	148	assert self.clobbered_blocks.size() == 0
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	149	return sha1(self.data).hexdigest()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	150
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	151
				152	class Transfer(object):
				153	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
				154	self.tgt_name = tgt_name
				155	self.src_name = src_name
				156	self.tgt_ranges = tgt_ranges
				157	self.src_ranges = src_ranges
				158	self.style = style
				159	self.intact = (getattr(tgt_ranges, "monotonic", False) and
				160	getattr(src_ranges, "monotonic", False))
Tao Bao	b8c8717	2015-03-19 19:42:12 -0700	[diff] [blame]	161
				162	# We use OrderedDict rather than dict so that the output is repeatable;
				163	# otherwise it would depend on the hash values of the Transfer objects.
				164	self.goes_before = OrderedDict()
				165	self.goes_after = OrderedDict()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	166
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	167	self.stash_before = []
				168	self.use_stash = []
				169
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	170	self.id = len(by_id)
				171	by_id.append(self)
				172
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	173	def NetStashChange(self):
				174	return (sum(sr.size() for (_, sr) in self.stash_before) -
				175	sum(sr.size() for (_, sr) in self.use_stash))
				176
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame^]	177	def ConvertToNew(self):
				178	assert self.style != "new"
				179	self.use_stash = []
				180	self.style = "new"
				181	self.src_ranges = RangeSet()
				182
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	183	def __str__(self):
				184	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				185	" to " + str(self.tgt_ranges) + ">")
				186
				187
				188	# BlockImageDiff works on two image objects. An image object is
				189	# anything that provides the following attributes:
				190	#
				191	# blocksize: the size in bytes of a block, currently must be 4096.
				192	#
				193	# total_blocks: the total size of the partition/image, in blocks.
				194	#
				195	# care_map: a RangeSet containing which blocks (in the range [0,
				196	# total_blocks) we actually care about; i.e. which blocks contain
				197	# data.
				198	#
				199	# file_map: a dict that partitions the blocks contained in care_map
				200	# into smaller domains that are useful for doing diffs on.
				201	# (Typically a domain is a file, and the key in file_map is the
				202	# pathname.)
				203	#
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	204	# clobbered_blocks: a RangeSet containing which blocks contain data
				205	# but may be altered by the FS. They need to be excluded when
				206	# verifying the partition integrity.
				207	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	208	# ReadRangeSet(): a function that takes a RangeSet and returns the
				209	# data contained in the image blocks of that RangeSet. The data
				210	# is returned as a list or tuple of strings; concatenating the
				211	# elements together should produce the requested data.
				212	# Implementations are free to break up the data into list/tuple
				213	# elements in any way that is convenient.
				214	#
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	215	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				216	# hash of all the data in the image (ie, all the blocks in the
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	217	# care_map minus clobbered_blocks, or including the clobbered
				218	# blocks if include_clobbered_blocks is True).
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	219	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	220	# When creating a BlockImageDiff, the src image may be None, in which
				221	# case the list of transfers produced will never read from the
				222	# original image.
				223
				224	class BlockImageDiff(object):
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	225	def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	226	if threads is None:
				227	threads = multiprocessing.cpu_count() // 2
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	228	if threads == 0:
				229	threads = 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	230	self.threads = threads
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	231	self.version = version
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	232	self.transfers = []
				233	self.src_basenames = {}
				234	self.src_numpatterns = {}
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	235
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	236	assert version in (1, 2, 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	237
				238	self.tgt = tgt
				239	if src is None:
				240	src = EmptyImage()
				241	self.src = src
				242
				243	# The updater code that installs the patch always uses 4k blocks.
				244	assert tgt.blocksize == 4096
				245	assert src.blocksize == 4096
				246
				247	# The range sets in each filemap should comprise a partition of
				248	# the care map.
				249	self.AssertPartition(src.care_map, src.file_map.values())
				250	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				251
				252	def Compute(self, prefix):
				253	# When looking for a source file to use as the diff input for a
				254	# target file, we try:
				255	# 1) an exact path match if available, otherwise
				256	# 2) a exact basename match if available, otherwise
				257	# 3) a basename match after all runs of digits are replaced by
				258	# "#" if available, otherwise
				259	# 4) we have no source for this target.
				260	self.AbbreviateSourceNames()
				261	self.FindTransfers()
				262
				263	# Find the ordering dependencies among transfers (this is O(n^2)
				264	# in the number of transfers).
				265	self.GenerateDigraph()
				266	# Find a sequence of transfers that satisfies as many ordering
				267	# dependencies as possible (heuristically).
				268	self.FindVertexSequence()
				269	# Fix up the ordering dependencies that the sequence didn't
				270	# satisfy.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	271	if self.version == 1:
				272	self.RemoveBackwardEdges()
				273	else:
				274	self.ReverseBackwardEdges()
				275	self.ImproveVertexSequence()
				276
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame^]	277	# Ensure the runtime stash size is under the limit.
				278	if self.version >= 2 and common.OPTIONS.cache_size is not None:
				279	self.ReviseStashSize()
				280
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	281	# Double-check our work.
				282	self.AssertSequenceGood()
				283
				284	self.ComputePatches(prefix)
				285	self.WriteTransfers(prefix)
				286
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	287	def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	288	data = source.ReadRangeSet(ranges)
				289	ctx = sha1()
				290
				291	for p in data:
				292	ctx.update(p)
				293
				294	return ctx.hexdigest()
				295
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	296	def WriteTransfers(self, prefix):
				297	out = []
				298
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	299	total = 0
				300	performs_read = False
				301
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	302	stashes = {}
				303	stashed_blocks = 0
				304	max_stashed_blocks = 0
				305
				306	free_stash_ids = []
				307	next_stash_id = 0
				308
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	309	for xf in self.transfers:
				310
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	311	if self.version < 2:
				312	assert not xf.stash_before
				313	assert not xf.use_stash
				314
				315	for s, sr in xf.stash_before:
				316	assert s not in stashes
				317	if free_stash_ids:
				318	sid = heapq.heappop(free_stash_ids)
				319	else:
				320	sid = next_stash_id
				321	next_stash_id += 1
				322	stashes[s] = sid
				323	stashed_blocks += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	324	if self.version == 2:
				325	out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
				326	else:
				327	sh = self.HashBlocks(self.src, sr)
				328	if sh in stashes:
				329	stashes[sh] += 1
				330	else:
				331	stashes[sh] = 1
				332	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	333
				334	if stashed_blocks > max_stashed_blocks:
				335	max_stashed_blocks = stashed_blocks
				336
Jesse Zhao	7b985f6	2015-03-02 16:53:08 -0800	[diff] [blame]	337	free_string = []
				338
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	339	if self.version == 1:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	340	src_str = xf.src_ranges.to_string_raw()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	341	elif self.version >= 2:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	342
				343	# <# blocks> <src ranges>
				344	# OR
				345	# <# blocks> <src ranges> <src locs> <stash refs...>
				346	# OR
				347	# <# blocks> - <stash refs...>
				348
				349	size = xf.src_ranges.size()
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	350	src_str = [str(size)]
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	351
				352	unstashed_src_ranges = xf.src_ranges
				353	mapped_stashes = []
				354	for s, sr in xf.use_stash:
				355	sid = stashes.pop(s)
				356	stashed_blocks -= sr.size()
				357	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	358	sh = self.HashBlocks(self.src, sr)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	359	sr = xf.src_ranges.map_within(sr)
				360	mapped_stashes.append(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	361	if self.version == 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	362	src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Bao	bb625d2	2015-08-13 14:44:15 -0700	[diff] [blame]	363	# A stash will be used only once. We need to free the stash
				364	# immediately after the use, instead of waiting for the automatic
				365	# clean-up at the end. Because otherwise it may take up extra space
				366	# and lead to OTA failures.
				367	# Bug: 23119955
				368	free_string.append("free %d\n" % (sid,))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	369	else:
				370	assert sh in stashes
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	371	src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	372	stashes[sh] -= 1
				373	if stashes[sh] == 0:
				374	free_string.append("free %s\n" % (sh))
				375	stashes.pop(sh)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	376	heapq.heappush(free_stash_ids, sid)
				377
				378	if unstashed_src_ranges:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	379	src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	380	if xf.use_stash:
				381	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	382	src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	383	mapped_stashes.append(mapped_unstashed)
				384	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				385	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	386	src_str.insert(1, "-")
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	387	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				388
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	389	src_str = " ".join(src_str)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	390
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	391	# all versions:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	392	# zero <rangeset>
				393	# new <rangeset>
				394	# erase <rangeset>
				395	#
				396	# version 1:
				397	# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
				398	# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
				399	# move <src rangeset> <tgt rangeset>
				400	#
				401	# version 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	402	# bsdiff patchstart patchlen <tgt rangeset> <src_str>
				403	# imgdiff patchstart patchlen <tgt rangeset> <src_str>
				404	# move <tgt rangeset> <src_str>
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	405	#
				406	# version 3:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	407	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				408	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				409	# move hash <tgt rangeset> <src_str>
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	410
				411	tgt_size = xf.tgt_ranges.size()
				412
				413	if xf.style == "new":
				414	assert xf.tgt_ranges
				415	out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
				416	total += tgt_size
				417	elif xf.style == "move":
				418	performs_read = True
				419	assert xf.tgt_ranges
				420	assert xf.src_ranges.size() == tgt_size
				421	if xf.src_ranges != xf.tgt_ranges:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	422	if self.version == 1:
				423	out.append("%s %s %s\n" % (
				424	xf.style,
				425	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				426	elif self.version == 2:
				427	out.append("%s %s %s\n" % (
				428	xf.style,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	429	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	430	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	431	# take into account automatic stashing of overlapping blocks
				432	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	433	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	434	if temp_stash_usage > max_stashed_blocks:
				435	max_stashed_blocks = temp_stash_usage
				436
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	437	out.append("%s %s %s %s\n" % (
				438	xf.style,
				439	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	440	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	441	total += tgt_size
				442	elif xf.style in ("bsdiff", "imgdiff"):
				443	performs_read = True
				444	assert xf.tgt_ranges
				445	assert xf.src_ranges
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	446	if self.version == 1:
				447	out.append("%s %d %d %s %s\n" % (
				448	xf.style, xf.patch_start, xf.patch_len,
				449	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				450	elif self.version == 2:
				451	out.append("%s %d %d %s %s\n" % (
				452	xf.style, xf.patch_start, xf.patch_len,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	453	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	454	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	455	# take into account automatic stashing of overlapping blocks
				456	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	457	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	458	if temp_stash_usage > max_stashed_blocks:
				459	max_stashed_blocks = temp_stash_usage
				460
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	461	out.append("%s %d %d %s %s %s %s\n" % (
				462	xf.style,
				463	xf.patch_start, xf.patch_len,
				464	self.HashBlocks(self.src, xf.src_ranges),
				465	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	466	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	467	total += tgt_size
				468	elif xf.style == "zero":
				469	assert xf.tgt_ranges
				470	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				471	if to_zero:
				472	out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
				473	total += to_zero.size()
				474	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	475	raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	476
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	477	if free_string:
				478	out.append("".join(free_string))
				479
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	480	if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	481	# Sanity check: abort if we're going to need more stash space than
				482	# the allowed size (cache_size * threshold). There are two purposes
				483	# of having a threshold here. a) Part of the cache may have been
				484	# occupied by some recovery logs. b) It will buy us some time to deal
				485	# with the oversize issue.
				486	cache_size = common.OPTIONS.cache_size
				487	stash_threshold = common.OPTIONS.stash_threshold
				488	max_allowed = cache_size * stash_threshold
				489	assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
				490	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				491	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				492	self.tgt.blocksize, max_allowed, cache_size,
				493	stash_threshold)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	494
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	495	# Zero out extended blocks as a workaround for bug 20881595.
				496	if self.tgt.extended:
				497	out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
				498
				499	# We erase all the blocks on the partition that a) don't contain useful
				500	# data in the new image and b) will not be touched by dm-verity.
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	501	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	502	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				503	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				504	if new_dontcare:
				505	out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker	e985f6f	2014-09-09 12:38:47 -0700	[diff] [blame]	506
				507	out.insert(0, "%d\n" % (self.version,)) # format version number
				508	out.insert(1, str(total) + "\n")
				509	if self.version >= 2:
				510	# version 2 only: after the total block count, we give the number
				511	# of stash slots needed, and the maximum size needed (in blocks)
				512	out.insert(2, str(next_stash_id) + "\n")
				513	out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	514
				515	with open(prefix + ".transfer.list", "wb") as f:
				516	for i in out:
				517	f.write(i)
				518
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	519	if self.version >= 2:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	520	max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	521	OPTIONS = common.OPTIONS
				522	if OPTIONS.cache_size is not None:
				523	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				524	print("max stashed blocks: %d (%d bytes), "
				525	"limit: %d bytes (%.2f%%)\n" % (
				526	max_stashed_blocks, max_stashed_size, max_allowed,
				527	max_stashed_size * 100.0 / max_allowed))
				528	else:
				529	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
				530	max_stashed_blocks, max_stashed_size))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	531
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame^]	532	def ReviseStashSize(self):
				533	print("Revising stash size...")
				534	stashes = {}
				535
				536	# Create the map between a stash and its def/use points. For example, for a
				537	# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
				538	for xf in self.transfers:
				539	# Command xf defines (stores) all the stashes in stash_before.
				540	for idx, sr in xf.stash_before:
				541	stashes[idx] = (sr, xf)
				542
				543	# Record all the stashes command xf uses.
				544	for idx, _ in xf.use_stash:
				545	stashes[idx] += (xf,)
				546
				547	# Compute the maximum blocks available for stash based on /cache size and
				548	# the threshold.
				549	cache_size = common.OPTIONS.cache_size
				550	stash_threshold = common.OPTIONS.stash_threshold
				551	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				552
				553	stashed_blocks = 0
				554
				555	# Now go through all the commands. Compute the required stash size on the
				556	# fly. If a command requires excess stash than available, it deletes the
				557	# stash by replacing the command that uses the stash with a "new" command
				558	# instead.
				559	for xf in self.transfers:
				560	replaced_cmds = []
				561
				562	# xf.stash_before generates explicit stash commands.
				563	for idx, sr in xf.stash_before:
				564	if stashed_blocks + sr.size() > max_allowed:
				565	# We cannot stash this one for a later command. Find out the command
				566	# that will use this stash and replace the command with "new".
				567	use_cmd = stashes[idx][2]
				568	replaced_cmds.append(use_cmd)
				569	print(" %s replaced due to an explicit stash of %d blocks." % (
				570	use_cmd, sr.size()))
				571	else:
				572	stashed_blocks += sr.size()
				573
				574	# xf.use_stash generates free commands.
				575	for _, sr in xf.use_stash:
				576	stashed_blocks -= sr.size()
				577
				578	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				579	# ComputePatches(), they both have the style of "diff".
				580	if xf.style == "diff" and self.version >= 3:
				581	assert xf.tgt_ranges and xf.src_ranges
				582	if xf.src_ranges.overlaps(xf.tgt_ranges):
				583	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				584	replaced_cmds.append(xf)
				585	print(" %s replaced due to an implicit stash of %d blocks." % (
				586	xf, xf.src_ranges.size()))
				587
				588	# Replace the commands in replaced_cmds with "new"s.
				589	for cmd in replaced_cmds:
				590	# It no longer uses any commands in "use_stash". Remove the def points
				591	# for all those stashes.
				592	for idx, sr in cmd.use_stash:
				593	def_cmd = stashes[idx][1]
				594	assert (idx, sr) in def_cmd.stash_before
				595	def_cmd.stash_before.remove((idx, sr))
				596
				597	cmd.ConvertToNew()
				598
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	599	def ComputePatches(self, prefix):
				600	print("Reticulating splines...")
				601	diff_q = []
				602	patch_num = 0
				603	with open(prefix + ".new.dat", "wb") as new_f:
				604	for xf in self.transfers:
				605	if xf.style == "zero":
				606	pass
				607	elif xf.style == "new":
				608	for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
				609	new_f.write(piece)
				610	elif xf.style == "diff":
				611	src = self.src.ReadRangeSet(xf.src_ranges)
				612	tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
				613
				614	# We can't compare src and tgt directly because they may have
				615	# the same content but be broken up into blocks differently, eg:
				616	#
				617	# ["he", "llo"] vs ["h", "ello"]
				618	#
				619	# We want those to compare equal, ideally without having to
				620	# actually concatenate the strings (these may be tens of
				621	# megabytes).
				622
				623	src_sha1 = sha1()
				624	for p in src:
				625	src_sha1.update(p)
				626	tgt_sha1 = sha1()
				627	tgt_size = 0
				628	for p in tgt:
				629	tgt_sha1.update(p)
				630	tgt_size += len(p)
				631
				632	if src_sha1.digest() == tgt_sha1.digest():
				633	# These are identical; we don't need to generate a patch,
				634	# just issue copy commands on the device.
				635	xf.style = "move"
				636	else:
				637	# For files in zip format (eg, APKs, JARs, etc.) we would
				638	# like to use imgdiff -z if possible (because it usually
				639	# produces significantly smaller patches than bsdiff).
				640	# This is permissible if:
				641	#
				642	# - the source and target files are monotonic (ie, the
				643	# data is stored with blocks in increasing order), and
				644	# - we haven't removed any blocks from the source set.
				645	#
				646	# If these conditions are satisfied then appending all the
				647	# blocks in the set together in order will produce a valid
				648	# zip file (plus possibly extra zeros in the last block),
				649	# which is what imgdiff needs to operate. (imgdiff is
				650	# fine with extra zeros at the end of the file.)
				651	imgdiff = (xf.intact and
				652	xf.tgt_name.split(".")[-1].lower()
				653	in ("apk", "jar", "zip"))
				654	xf.style = "imgdiff" if imgdiff else "bsdiff"
				655	diff_q.append((tgt_size, src, tgt, xf, patch_num))
				656	patch_num += 1
				657
				658	else:
				659	assert False, "unknown style " + xf.style
				660
				661	if diff_q:
				662	if self.threads > 1:
				663	print("Computing patches (using %d threads)..." % (self.threads,))
				664	else:
				665	print("Computing patches...")
				666	diff_q.sort()
				667
				668	patches = [None] * patch_num
				669
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	670	# TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	671	lock = threading.Lock()
				672	def diff_worker():
				673	while True:
				674	with lock:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	675	if not diff_q:
				676	return
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	677	tgt_size, src, tgt, xf, patchnum = diff_q.pop()
				678	patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
				679	size = len(patch)
				680	with lock:
				681	patches[patchnum] = (patch, xf)
				682	print("%10d %10d (%6.2f%%) %7s %s" % (
				683	size, tgt_size, size * 100.0 / tgt_size, xf.style,
				684	xf.tgt_name if xf.tgt_name == xf.src_name else (
				685	xf.tgt_name + " (from " + xf.src_name + ")")))
				686
				687	threads = [threading.Thread(target=diff_worker)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	688	for _ in range(self.threads)]
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	689	for th in threads:
				690	th.start()
				691	while threads:
				692	threads.pop().join()
				693	else:
				694	patches = []
				695
				696	p = 0
				697	with open(prefix + ".patch.dat", "wb") as patch_f:
				698	for patch, xf in patches:
				699	xf.patch_start = p
				700	xf.patch_len = len(patch)
				701	patch_f.write(patch)
				702	p += len(patch)
				703
				704	def AssertSequenceGood(self):
				705	# Simulate the sequences of transfers we will output, and check that:
				706	# - we never read a block after writing it, and
				707	# - we write every block we care about exactly once.
				708
				709	# Start with no blocks having been touched yet.
				710	touched = RangeSet()
				711
				712	# Imagine processing the transfers in order.
				713	for xf in self.transfers:
				714	# Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	715
				716	x = xf.src_ranges
				717	if self.version >= 2:
				718	for _, sr in xf.use_stash:
				719	x = x.subtract(sr)
				720
				721	assert not touched.overlaps(x)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	722	# Check that the output blocks for this transfer haven't yet been touched.
				723	assert not touched.overlaps(xf.tgt_ranges)
				724	# Touch all the blocks written by this transfer.
				725	touched = touched.union(xf.tgt_ranges)
				726
				727	# Check that we've written every target block.
				728	assert touched == self.tgt.care_map
				729
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	730	def ImproveVertexSequence(self):
				731	print("Improving vertex order...")
				732
				733	# At this point our digraph is acyclic; we reversed any edges that
				734	# were backwards in the heuristically-generated sequence. The
				735	# previously-generated order is still acceptable, but we hope to
				736	# find a better order that needs less memory for stashed data.
				737	# Now we do a topological sort to generate a new vertex order,
				738	# using a greedy algorithm to choose which vertex goes next
				739	# whenever we have a choice.
				740
				741	# Make a copy of the edge set; this copy will get destroyed by the
				742	# algorithm.
				743	for xf in self.transfers:
				744	xf.incoming = xf.goes_after.copy()
				745	xf.outgoing = xf.goes_before.copy()
				746
				747	L = [] # the new vertex order
				748
				749	# S is the set of sources in the remaining graph; we always choose
				750	# the one that leaves the least amount of stashed data after it's
				751	# executed.
				752	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				753	if not u.incoming]
				754	heapq.heapify(S)
				755
				756	while S:
				757	_, _, xf = heapq.heappop(S)
				758	L.append(xf)
				759	for u in xf.outgoing:
				760	del u.incoming[xf]
				761	if not u.incoming:
				762	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				763
				764	# if this fails then our graph had a cycle.
				765	assert len(L) == len(self.transfers)
				766
				767	self.transfers = L
				768	for i, xf in enumerate(L):
				769	xf.order = i
				770
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	771	def RemoveBackwardEdges(self):
				772	print("Removing backward edges...")
				773	in_order = 0
				774	out_of_order = 0
				775	lost_source = 0
				776
				777	for xf in self.transfers:
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	778	lost = 0
				779	size = xf.src_ranges.size()
				780	for u in xf.goes_before:
				781	# xf should go before u
				782	if xf.order < u.order:
				783	# it does, hurray!
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	784	in_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	785	else:
				786	# it doesn't, boo. trim the blocks that u writes from xf's
				787	# source, so that xf can go after u.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	788	out_of_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	789	assert xf.src_ranges.overlaps(u.tgt_ranges)
				790	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				791	xf.intact = False
				792
				793	if xf.style == "diff" and not xf.src_ranges:
				794	# nothing left to diff from; treat as new data
				795	xf.style = "new"
				796
				797	lost = size - xf.src_ranges.size()
				798	lost_source += lost
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	799
				800	print((" %d/%d dependencies (%.2f%%) were violated; "
				801	"%d source blocks removed.") %
				802	(out_of_order, in_order + out_of_order,
				803	(out_of_order * 100.0 / (in_order + out_of_order))
				804	if (in_order + out_of_order) else 0.0,
				805	lost_source))
				806
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	807	def ReverseBackwardEdges(self):
				808	print("Reversing backward edges...")
				809	in_order = 0
				810	out_of_order = 0
				811	stashes = 0
				812	stash_size = 0
				813
				814	for xf in self.transfers:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	815	for u in xf.goes_before.copy():
				816	# xf should go before u
				817	if xf.order < u.order:
				818	# it does, hurray!
				819	in_order += 1
				820	else:
				821	# it doesn't, boo. modify u to stash the blocks that it
				822	# writes that xf wants to read, and then require u to go
				823	# before xf.
				824	out_of_order += 1
				825
				826	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				827	assert overlap
				828
				829	u.stash_before.append((stashes, overlap))
				830	xf.use_stash.append((stashes, overlap))
				831	stashes += 1
				832	stash_size += overlap.size()
				833
				834	# reverse the edge direction; now xf must go after u
				835	del xf.goes_before[u]
				836	del u.goes_after[xf]
				837	xf.goes_after[u] = None # value doesn't matter
				838	u.goes_before[xf] = None
				839
				840	print((" %d/%d dependencies (%.2f%%) were violated; "
				841	"%d source blocks stashed.") %
				842	(out_of_order, in_order + out_of_order,
				843	(out_of_order * 100.0 / (in_order + out_of_order))
				844	if (in_order + out_of_order) else 0.0,
				845	stash_size))
				846
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	847	def FindVertexSequence(self):
				848	print("Finding vertex sequence...")
				849
				850	# This is based on "A Fast & Effective Heuristic for the Feedback
				851	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				852	# it as starting with the digraph G and moving all the vertices to
				853	# be on a horizontal line in some order, trying to minimize the
				854	# number of edges that end up pointing to the left. Left-pointing
				855	# edges will get removed to turn the digraph into a DAG. In this
				856	# case each edge has a weight which is the number of source blocks
				857	# we'll lose if that edge is removed; we try to minimize the total
				858	# weight rather than just the number of edges.
				859
				860	# Make a copy of the edge set; this copy will get destroyed by the
				861	# algorithm.
				862	for xf in self.transfers:
				863	xf.incoming = xf.goes_after.copy()
				864	xf.outgoing = xf.goes_before.copy()
				865
				866	# We use an OrderedDict instead of just a set so that the output
				867	# is repeatable; otherwise it would depend on the hash values of
				868	# the transfer objects.
				869	G = OrderedDict()
				870	for xf in self.transfers:
				871	G[xf] = None
				872	s1 = deque() # the left side of the sequence, built from left to right
				873	s2 = deque() # the right side of the sequence, built from right to left
				874
				875	while G:
				876
				877	# Put all sinks at the end of the sequence.
				878	while True:
				879	sinks = [u for u in G if not u.outgoing]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	880	if not sinks:
				881	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	882	for u in sinks:
				883	s2.appendleft(u)
				884	del G[u]
				885	for iu in u.incoming:
				886	del iu.outgoing[u]
				887
				888	# Put all the sources at the beginning of the sequence.
				889	while True:
				890	sources = [u for u in G if not u.incoming]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	891	if not sources:
				892	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	893	for u in sources:
				894	s1.append(u)
				895	del G[u]
				896	for iu in u.outgoing:
				897	del iu.incoming[u]
				898
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	899	if not G:
				900	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	901
				902	# Find the "best" vertex to put next. "Best" is the one that
				903	# maximizes the net difference in source blocks saved we get by
				904	# pretending it's a source rather than a sink.
				905
				906	max_d = None
				907	best_u = None
				908	for u in G:
				909	d = sum(u.outgoing.values()) - sum(u.incoming.values())
				910	if best_u is None or d > max_d:
				911	max_d = d
				912	best_u = u
				913
				914	u = best_u
				915	s1.append(u)
				916	del G[u]
				917	for iu in u.outgoing:
				918	del iu.incoming[u]
				919	for iu in u.incoming:
				920	del iu.outgoing[u]
				921
				922	# Now record the sequence in the 'order' field of each transfer,
				923	# and by rearranging self.transfers to be in the chosen sequence.
				924
				925	new_transfers = []
				926	for x in itertools.chain(s1, s2):
				927	x.order = len(new_transfers)
				928	new_transfers.append(x)
				929	del x.incoming
				930	del x.outgoing
				931
				932	self.transfers = new_transfers
				933
				934	def GenerateDigraph(self):
				935	print("Generating digraph...")
				936	for a in self.transfers:
				937	for b in self.transfers:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	938	if a is b:
				939	continue
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	940
				941	# If the blocks written by A are read by B, then B needs to go before A.
				942	i = a.tgt_ranges.intersect(b.src_ranges)
				943	if i:
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	944	if b.src_name == "__ZERO":
				945	# the cost of removing source blocks for the __ZERO domain
				946	# is (nearly) zero.
				947	size = 0
				948	else:
				949	size = i.size()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	950	b.goes_before[a] = size
				951	a.goes_after[b] = size
				952
				953	def FindTransfers(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	954	empty = RangeSet()
				955	for tgt_fn, tgt_ranges in self.tgt.file_map.items():
				956	if tgt_fn == "__ZERO":
				957	# the special "__ZERO" domain is all the blocks not contained
				958	# in any file and that are filled with zeros. We have a
				959	# special transfer style for zero blocks.
				960	src_ranges = self.src.file_map.get("__ZERO", empty)
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	961	Transfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				962	"zero", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	963	continue
				964
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	965	elif tgt_fn == "__COPY":
				966	# "__COPY" domain includes all the blocks not contained in any
				967	# file and that need to be copied unconditionally to the target.
				968	Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
				969	continue
				970
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	971	elif tgt_fn in self.src.file_map:
				972	# Look for an exact pathname match in the source.
				973	Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				974	"diff", self.transfers)
				975	continue
				976
				977	b = os.path.basename(tgt_fn)
				978	if b in self.src_basenames:
				979	# Look for an exact basename match in the source.
				980	src_fn = self.src_basenames[b]
				981	Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				982	"diff", self.transfers)
				983	continue
				984
				985	b = re.sub("[0-9]+", "#", b)
				986	if b in self.src_numpatterns:
				987	# Look for a 'number pattern' match (a basename match after
				988	# all runs of digits are replaced by "#"). (This is useful
				989	# for .so files that contain version numbers in the filename
				990	# that get bumped.)
				991	src_fn = self.src_numpatterns[b]
				992	Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				993	"diff", self.transfers)
				994	continue
				995
				996	Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
				997
				998	def AbbreviateSourceNames(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	999	for k in self.src.file_map.keys():
				1000	b = os.path.basename(k)
				1001	self.src_basenames[b] = k
				1002	b = re.sub("[0-9]+", "#", b)
				1003	self.src_numpatterns[b] = k
				1004
				1005	@staticmethod
				1006	def AssertPartition(total, seq):
				1007	"""Assert that all the RangeSets in 'seq' form a partition of the
				1008	'total' RangeSet (ie, they are nonintersecting and their union
				1009	equals 'total')."""
				1010	so_far = RangeSet()
				1011	for i in seq:
				1012	assert not so_far.overlaps(i)
				1013	so_far = so_far.union(i)
				1014	assert so_far == total