Blame - tools/releasetools/blockimgdiff.py - platform_build_make

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

18

import common

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

22

import multiprocessing

23

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

24

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

25

import re

26

import subprocess

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

29

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

30

from collections import deque, OrderedDict

31

from hashlib import sha1

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

32

from rangelib import RangeSet

33

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

34

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

35

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

36

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

37

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

38

def compute_patch(srcfile, tgtfile, imgdiff=False):

39

patchfile = common.MakeTempFile(prefix="patch-")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

40

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

41

if imgdiff:

42

p = subprocess.call(

43

["imgdiff", "-z", srcfile, tgtfile, patchfile],

44

stdout=open(os.devnull, 'w'),

45

stderr=subprocess.STDOUT)

46

else:

47

p = subprocess.call(

48

["bsdiff", srcfile, tgtfile, patchfile],

49

stdout=open(os.devnull, 'w'),

50

stderr=subprocess.STDOUT)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

51

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

52

if p:

53

raise ValueError("diff failed: " + str(p))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

54

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

55

with open(patchfile, "rb") as f:

56

return f.read()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

57

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

58

59

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

60

def RangeSha1(self, ranges):

61

raise NotImplementedError

62

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

63

def ReadRangeSet(self, ranges):

64

raise NotImplementedError

65

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

66

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

67

raise NotImplementedError

68

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

69

def WriteRangeDataToFd(self, ranges, fd):

70

raise NotImplementedError

71

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

72

73

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

74

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

75

76

def __init__(self):

77

self.blocksize = 4096

78

self.care_map = RangeSet()

79

self.clobbered_blocks = RangeSet()

80

self.extended = RangeSet()

81

self.total_blocks = 0

82

self.file_map = {}

83

84

def RangeSha1(self, ranges):

85

return sha1().hexdigest()

86

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

87

def ReadRangeSet(self, ranges):

88

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

89

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

90

def TotalSha1(self, include_clobbered_blocks=False):

91

# EmptyImage always carries empty clobbered_blocks, so

92

# include_clobbered_blocks can be ignored.

93

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

94

return sha1().hexdigest()

95

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

96

def WriteRangeDataToFd(self, ranges, fd):

97

raise ValueError("Can't write data from EmptyImage to file")

98

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

100

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

101

"""An image wrapped around a single string of data."""

102

103

def __init__(self, data, trim=False, pad=False):

104

self.data = data

105

self.blocksize = 4096

106

107

assert not (trim and pad)

108

109

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

110

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

111

if partial > 0:

112

if trim:

113

self.data = self.data[:-partial]

114

elif pad:

115

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

116

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

117

else:

118

raise ValueError(("data for DataImage must be multiple of %d bytes "

119

"unless trim or pad is specified") %

120

(self.blocksize,))

121

122

assert len(self.data) % self.blocksize == 0

123

124

self.total_blocks = len(self.data) / self.blocksize

125

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

126

# When the last block is padded, we always write the whole block even for

127

# incremental OTAs. Because otherwise the last block may get skipped if

128

# unchanged for an incremental, but would fail the post-install

129

# verification if it has non-zero contents in the padding bytes.

130

# Bug: 23828506

131

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

132

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

133

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

134

clobbered_blocks = []

135

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

136

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

141

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

142

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

143

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

144

if d == reference:

145

zero_blocks.append(i)

146

zero_blocks.append(i+1)

147

else:

148

nonzero_blocks.append(i)

149

nonzero_blocks.append(i+1)

150

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

151

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

152

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

153

self.file_map = dict()

154

if zero_blocks:

155

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

156

if nonzero_blocks:

157

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

158

if clobbered_blocks:

159

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

160

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

161

def _GetRangeData(self, ranges):

162

for s, e in ranges:

163

yield self.data[s*self.blocksize:e*self.blocksize]

164

165

def RangeSha1(self, ranges):

166

h = sha1()

167

for data in self._GetRangeData(ranges):

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

171

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

172

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

173

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

174

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

175

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

176

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

177

else:

178

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

179

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

180

def WriteRangeDataToFd(self, ranges, fd):

181

for data in self._GetRangeData(ranges):

182

fd.write(data)

183

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

184

185

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

186

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

187

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

188

self.tgt_name = tgt_name

189

self.src_name = src_name

190

self.tgt_ranges = tgt_ranges

191

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

192

self.tgt_sha1 = tgt_sha1

193

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

194

self.style = style

195

self.intact = (getattr(tgt_ranges, "monotonic", False) and

196

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

197

198

# We use OrderedDict rather than dict so that the output is repeatable;

199

# otherwise it would depend on the hash values of the Transfer objects.

200

self.goes_before = OrderedDict()

201

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

202

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

203

self.stash_before = []

204

self.use_stash = []

205

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

209

def NetStashChange(self):

210

return (sum(sr.size() for (_, sr) in self.stash_before) -

211

sum(sr.size() for (_, sr) in self.use_stash))

212

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

213

def ConvertToNew(self):

214

assert self.style != "new"

215

self.use_stash = []

216

self.style = "new"

217

self.src_ranges = RangeSet()

218

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

219

def __str__(self):

220

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

221

" to " + str(self.tgt_ranges) + ">")

222

223

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

224

@functools.total_ordering

225

class HeapItem(object):

226

def __init__(self, item):

227

self.item = item

228

# Negate the score since python's heap is a min-heap and we want

229

# the maximum score.

230

self.score = -item.score

def clear(self):

self.item = None

def __bool__(self):

return self.item is None

235

def __eq__(self, other):

236

return self.score == other.score

237

def __le__(self, other):

238

return self.score <= other.score

239

240

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

241

# BlockImageDiff works on two image objects. An image object is

242

# anything that provides the following attributes:

243

#

244

# blocksize: the size in bytes of a block, currently must be 4096.

245

#

246

# total_blocks: the total size of the partition/image, in blocks.

247

#

248

# care_map: a RangeSet containing which blocks (in the range [0,

249

# total_blocks) we actually care about; i.e. which blocks contain

250

# data.

251

#

252

# file_map: a dict that partitions the blocks contained in care_map

253

# into smaller domains that are useful for doing diffs on.

254

# (Typically a domain is a file, and the key in file_map is the

255

# pathname.)

256

#

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

257

# clobbered_blocks: a RangeSet containing which blocks contain data

258

# but may be altered by the FS. They need to be excluded when

259

# verifying the partition integrity.

260

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

261

# ReadRangeSet(): a function that takes a RangeSet and returns the

262

# data contained in the image blocks of that RangeSet. The data

263

# is returned as a list or tuple of strings; concatenating the

264

# elements together should produce the requested data.

265

# Implementations are free to break up the data into list/tuple

266

# elements in any way that is convenient.

267

#

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

268

# RangeSha1(): a function that returns (as a hex string) the SHA-1

269

# hash of all the data in the specified range.

270

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

271

# TotalSha1(): a function that returns (as a hex string) the SHA-1

272

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

273

# care_map minus clobbered_blocks, or including the clobbered

274

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

275

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

276

# When creating a BlockImageDiff, the src image may be None, in which

277

# case the list of transfers produced will never read from the

278

# original image.

279

280

class BlockImageDiff(object):

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

281

def __init__(self, tgt, src=None, threads=None, version=4,

282

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

283

if threads is None:

284

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

285

if threads == 0:

286

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

287

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

288

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

289

self.transfers = []

290

self.src_basenames = {}

291

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

292

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

293

self.touched_src_ranges = RangeSet()

294

self.touched_src_sha1 = None

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

295

self.disable_imgdiff = disable_imgdiff

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

296

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

297

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

305

assert tgt.blocksize == 4096

306

assert src.blocksize == 4096

307

308

# The range sets in each filemap should comprise a partition of

309

# the care map.

310

self.AssertPartition(src.care_map, src.file_map.values())

311

self.AssertPartition(tgt.care_map, tgt.file_map.values())

312

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

313

@property

314

def max_stashed_size(self):

315

return self._max_stashed_size

316

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

317

def Compute(self, prefix):

318

# When looking for a source file to use as the diff input for a

319

# target file, we try:

320

# 1) an exact path match if available, otherwise

321

# 2) a exact basename match if available, otherwise

322

# 3) a basename match after all runs of digits are replaced by

323

# "#" if available, otherwise

324

# 4) we have no source for this target.

325

self.AbbreviateSourceNames()

326

self.FindTransfers()

327

328

# Find the ordering dependencies among transfers (this is O(n^2)

329

# in the number of transfers).

330

self.GenerateDigraph()

331

# Find a sequence of transfers that satisfies as many ordering

332

# dependencies as possible (heuristically).

333

self.FindVertexSequence()

334

# Fix up the ordering dependencies that the sequence didn't

335

# satisfy.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

336

self.ReverseBackwardEdges()

337

self.ImproveVertexSequence()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

338

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

339

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

340

if common.OPTIONS.cache_size is not None:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

341

self.ReviseStashSize()

342

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

343

# Double-check our work.

344

self.AssertSequenceGood()

345

346

self.ComputePatches(prefix)

347

self.WriteTransfers(prefix)

348

349

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

350

def WriteSplitTransfers(out, style, target_blocks):

351

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

352

353

This prevents the target size of one command from being too large; and

354

might help to avoid fsync errors on some devices."""

355

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

356

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

357

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

358

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

359

while target_blocks:

360

blocks_to_write = target_blocks.first(blocks_limit)

361

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

362

total += blocks_to_write.size()

363

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

364

return total

365

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

366

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

367

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

368

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

369

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

370

# id. 'stashes' records the map from 'hash' to the ref count. The stash

371

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

372

stashes = {}

373

stashed_blocks = 0

374

max_stashed_blocks = 0

375

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

376

for xf in self.transfers:

377

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

378

for _, sr in xf.stash_before:

379

sh = self.src.RangeSha1(sr)

380

if sh in stashes:

381

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

382

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

383

stashes[sh] = 1

384

stashed_blocks += sr.size()

385

self.touched_src_ranges = self.touched_src_ranges.union(sr)

386

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

387

388

if stashed_blocks > max_stashed_blocks:

389

max_stashed_blocks = stashed_blocks

390

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

391

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

392

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

393

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

394

# <# blocks> <src ranges>

395

# OR

396

# <# blocks> <src ranges> <src locs> <stash refs...>

397

# OR

398

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

399

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

400

size = xf.src_ranges.size()

401

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

402

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

403

unstashed_src_ranges = xf.src_ranges

404

mapped_stashes = []

405

for _, sr in xf.use_stash:

406

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

407

sh = self.src.RangeSha1(sr)

408

sr = xf.src_ranges.map_within(sr)

409

mapped_stashes.append(sr)

410

assert sh in stashes

411

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

412

stashes[sh] -= 1

413

if stashes[sh] == 0:

414

free_string.append("free %s\n" % (sh,))

415

free_size += sr.size()

416

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

417

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

418

if unstashed_src_ranges:

419

src_str.insert(1, unstashed_src_ranges.to_string_raw())

420

if xf.use_stash:

421

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

422

src_str.insert(2, mapped_unstashed.to_string_raw())

423

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

424

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

425

else:

426

src_str.insert(1, "-")

427

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

428

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

429

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

430

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

431

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

432

# zero <rangeset>

433

# new <rangeset>

434

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

435

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

436

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

437

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

438

439

tgt_size = xf.tgt_ranges.size()

440

441

if xf.style == "new":

442

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

443

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

444

total += tgt_size

445

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

446

assert xf.tgt_ranges

447

assert xf.src_ranges.size() == tgt_size

448

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

449

# take into account automatic stashing of overlapping blocks

450

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

451

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

452

if temp_stash_usage > max_stashed_blocks:

453

max_stashed_blocks = temp_stash_usage

454

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

455

self.touched_src_ranges = self.touched_src_ranges.union(

456

xf.src_ranges)

457

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

458

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

459

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

460

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

461

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

462

total += tgt_size

463

elif xf.style in ("bsdiff", "imgdiff"):

464

assert xf.tgt_ranges

465

assert xf.src_ranges

466

# take into account automatic stashing of overlapping blocks

467

if xf.src_ranges.overlaps(xf.tgt_ranges):

468

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

469

if temp_stash_usage > max_stashed_blocks:

470

max_stashed_blocks = temp_stash_usage

471

472

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

473

474

out.append("%s %d %d %s %s %s %s\n" % (

475

xf.style,

476

xf.patch_start, xf.patch_len,

477

xf.src_sha1,

478

xf.tgt_sha1,

479

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

480

total += tgt_size

481

elif xf.style == "zero":

482

assert xf.tgt_ranges

483

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

484

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

485

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

486

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

487

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

488

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

489

if free_string:

490

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

491

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

492

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

493

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

494

# Sanity check: abort if we're going to need more stash space than

495

# the allowed size (cache_size * threshold). There are two purposes

496

# of having a threshold here. a) Part of the cache may have been

497

# occupied by some recovery logs. b) It will buy us some time to deal

498

# with the oversize issue.

499

cache_size = common.OPTIONS.cache_size

500

stash_threshold = common.OPTIONS.stash_threshold

501

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

502

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

503

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

504

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

505

self.tgt.blocksize, max_allowed, cache_size,

506

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

507

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

508

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

509

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

510

# Zero out extended blocks as a workaround for bug 20881595.

511

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

512

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

513

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

514

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

515

516

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

517

# data in the new image; b) will not be touched by dm-verity. Out of those

518

# blocks, we erase the ones that won't be used in this update at the

519

# beginning of an update. The rest would be erased at the end. This is to

520

# work around the eMMC issue observed on some devices, which may otherwise

521

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

522

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

523

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

524

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

525

526

erase_first = new_dontcare.subtract(self.touched_src_ranges)

527

if erase_first:

528

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

529

530

erase_last = new_dontcare.subtract(erase_first)

531

if erase_last:

532

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

533

534

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

535

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

536

# v3+: the number of stash slots is unused.

537

out.insert(2, "0\n")

538

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

539

540

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

544

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

545

OPTIONS = common.OPTIONS

546

if OPTIONS.cache_size is not None:

547

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

548

print("max stashed blocks: %d (%d bytes), "

549

"limit: %d bytes (%.2f%%)\n" % (

550

max_stashed_blocks, self._max_stashed_size, max_allowed,

551

self._max_stashed_size * 100.0 / max_allowed))

552

else:

553

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

554

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

555

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

556

def ReviseStashSize(self):

557

print("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

558

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

559

560

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

561

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

562

for xf in self.transfers:

563

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

564

for stash_raw_id, sr in xf.stash_before:

565

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

566

567

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

568

for stash_raw_id, _ in xf.use_stash:

569

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

570

571

# Compute the maximum blocks available for stash based on /cache size and

572

# the threshold.

573

cache_size = common.OPTIONS.cache_size

574

stash_threshold = common.OPTIONS.stash_threshold

575

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

576

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

577

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

578

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

579

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

580

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

581

582

# Now go through all the commands. Compute the required stash size on the

583

# fly. If a command requires excess stash than available, it deletes the

584

# stash by replacing the command that uses the stash with a "new" command

585

# instead.

586

for xf in self.transfers:

587

replaced_cmds = []

588

589

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

590

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

591

# Check the post-command stashed_blocks.

592

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

593

sh = self.src.RangeSha1(sr)

594

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

595

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

596

597

if stashed_blocks_after > max_allowed:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

598

# We cannot stash this one for a later command. Find out the command

599

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

600

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

601

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

602

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

603

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

604

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

605

if sh in stashes:

606

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

607

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

608

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

609

stashed_blocks = stashed_blocks_after

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

610

611

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

612

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

613

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

614

assert xf.tgt_ranges and xf.src_ranges

615

if xf.src_ranges.overlaps(xf.tgt_ranges):

616

if stashed_blocks + xf.src_ranges.size() > max_allowed:

617

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

618

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

619

620

# Replace the commands in replaced_cmds with "new"s.

621

for cmd in replaced_cmds:

622

# It no longer uses any commands in "use_stash". Remove the def points

623

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

624

for stash_raw_id, sr in cmd.use_stash:

625

def_cmd = stash_map[stash_raw_id][1]

626

assert (stash_raw_id, sr) in def_cmd.stash_before

627

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

628

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

629

# Add up blocks that violates space limit and print total number to

630

# screen later.

631

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

632

cmd.ConvertToNew()

633

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

634

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

635

for _, sr in xf.use_stash:

636

sh = self.src.RangeSha1(sr)

637

assert sh in stashes

638

stashes[sh] -= 1

639

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

640

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

641

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

642

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

643

num_of_bytes = new_blocks * self.tgt.blocksize

644

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

645

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

304ee27

2016-12-19 11:01:38 -0800

[diff] [blame]

646

return new_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

647

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

648

def ComputePatches(self, prefix):

649

print("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

650

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

651

patch_num = 0

652

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

653

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

654

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

655

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

656

print("%10d %10d (%6.2f%%) %7s %s %s" % (

657

tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,

658

str(xf.tgt_ranges)))

659

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

660

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

661

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

662

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

663

print("%10d %10d (%6.2f%%) %7s %s %s" % (

664

tgt_size, tgt_size, 100.0, xf.style,

665

xf.tgt_name, str(xf.tgt_ranges)))

666

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

667

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

668

# We can't compare src and tgt directly because they may have

669

# the same content but be broken up into blocks differently, eg:

670

#

671

# ["he", "llo"] vs ["h", "ello"]

672

#

673

# We want those to compare equal, ideally without having to

674

# actually concatenate the strings (these may be tens of

675

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

676

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

677

# These are identical; we don't need to generate a patch,

678

# just issue copy commands on the device.

679

xf.style = "move"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

680

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

681

if xf.src_ranges != xf.tgt_ranges:

682

print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (

683

tgt_size, tgt_size, 100.0, xf.style,

684

xf.tgt_name if xf.tgt_name == xf.src_name else (

685

xf.tgt_name + " (from " + xf.src_name + ")"),

686

str(xf.tgt_ranges), str(xf.src_ranges)))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

687

else:

688

# For files in zip format (eg, APKs, JARs, etc.) we would

689

# like to use imgdiff -z if possible (because it usually

690

# produces significantly smaller patches than bsdiff).

691

# This is permissible if:

692

#

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

693

# - imgdiff is not disabled, and

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

694

# - the source and target files are monotonic (ie, the

695

# data is stored with blocks in increasing order), and

696

# - we haven't removed any blocks from the source set.

697

#

698

# If these conditions are satisfied then appending all the

699

# blocks in the set together in order will produce a valid

700

# zip file (plus possibly extra zeros in the last block),

701

# which is what imgdiff needs to operate. (imgdiff is

702

# fine with extra zeros at the end of the file.)

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

703

imgdiff = (not self.disable_imgdiff and xf.intact and

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

704

xf.tgt_name.split(".")[-1].lower()

705

in ("apk", "jar", "zip"))

706

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

707

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

712

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

713

if diff_queue:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

714

if self.threads > 1:

715

print("Computing patches (using %d threads)..." % (self.threads,))

716

else:

717

print("Computing patches...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

718

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

719

diff_total = len(diff_queue)

720

patches = [None] * diff_total

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

721

if sys.stdout.isatty():

722

global diff_done

723

diff_done = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

724

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

725

# Using multiprocessing doesn't give additional benefits, due to the

726

# pattern of the code. The diffing work is done by subprocess.call, which

727

# already runs in a separate process (not affected much by the GIL -

728

# Global Interpreter Lock). Using multiprocess also requires either a)

729

# writing the diff input files in the main process before forking, or b)

730

# reopening the image file (SparseImage) in the worker processes. Doing

731

# neither of them further improves the performance.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

732

lock = threading.Lock()

733

def diff_worker():

734

while True:

735

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

736

if not diff_queue:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

737

return

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

738

xf_index, imgdiff, patch_index = diff_queue.pop()

739

740

xf = self.transfers[xf_index]

741

src_ranges = xf.src_ranges

742

tgt_ranges = xf.tgt_ranges

743

744

# Needs lock since WriteRangeDataToFd() is stateful (calling seek).

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

745

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

746

src_file = common.MakeTempFile(prefix="src-")

747

with open(src_file, "wb") as fd:

748

self.src.WriteRangeDataToFd(src_ranges, fd)

749

750

tgt_file = common.MakeTempFile(prefix="tgt-")

751

with open(tgt_file, "wb") as fd:

752

self.tgt.WriteRangeDataToFd(tgt_ranges, fd)

753

754

try:

755

patch = compute_patch(src_file, tgt_file, imgdiff)

756

except ValueError as e:

757

raise ValueError(

758

"Failed to generate diff for %s: src=%s, tgt=%s: %s" % (

759

xf.tgt_name, xf.src_ranges, xf.tgt_ranges, e.message))

760

761

with lock:

762

patches[patch_index] = (xf_index, patch)

763

if sys.stdout.isatty():

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

764

global diff_done

765

diff_done += 1

766

progress = diff_done * 100 / diff_total

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

767

# '\033[K' is to clear to EOL.

768

print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')

769

sys.stdout.flush()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

770

771

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

772

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

777

778

if sys.stdout.isatty():

779

print('\n')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

else:

patches = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

783

offset = 0

784

with open(prefix + ".patch.dat", "wb") as patch_fd:

785

for index, patch in patches:

786

xf = self.transfers[index]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

787

xf.patch_len = len(patch)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

788

xf.patch_start = offset

789

offset += xf.patch_len

790

patch_fd.write(patch)

791

792

if common.OPTIONS.verbose:

793

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

794

print("%10d %10d (%6.2f%%) %7s %s %s %s" % (

795

xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,

796

xf.style,

797

xf.tgt_name if xf.tgt_name == xf.src_name else (

798

xf.tgt_name + " (from " + xf.src_name + ")"),

799

xf.tgt_ranges, xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

800

801

def AssertSequenceGood(self):

802

# Simulate the sequences of transfers we will output, and check that:

803

# - we never read a block after writing it, and

804

# - we write every block we care about exactly once.

805

806

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

807

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

808

809

# Imagine processing the transfers in order.

810

for xf in self.transfers:

811

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

812

813

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

814

for _, sr in xf.use_stash:

815

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

816

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

817

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

818

# Source image could be larger. Don't check the blocks that are in the

819

# source image only. Since they are not in 'touched', and won't ever

820

# be touched.

821

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

822

assert touched[i] == 0

823

824

# Check that the output blocks for this transfer haven't yet

825

# been touched, and touch all the blocks written by this

826

# transfer.

827

for s, e in xf.tgt_ranges:

828

for i in range(s, e):

829

assert touched[i] == 0

830

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

831

832

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

833

for s, e in self.tgt.care_map:

834

for i in range(s, e):

835

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

836

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

837

def ImproveVertexSequence(self):

838

print("Improving vertex order...")

839

840

# At this point our digraph is acyclic; we reversed any edges that

841

# were backwards in the heuristically-generated sequence. The

842

# previously-generated order is still acceptable, but we hope to

843

# find a better order that needs less memory for stashed data.

844

# Now we do a topological sort to generate a new vertex order,

845

# using a greedy algorithm to choose which vertex goes next

846

# whenever we have a choice.

847

848

# Make a copy of the edge set; this copy will get destroyed by the

849

# algorithm.

850

for xf in self.transfers:

851

xf.incoming = xf.goes_after.copy()

852

xf.outgoing = xf.goes_before.copy()

853

854

L = [] # the new vertex order

855

856

# S is the set of sources in the remaining graph; we always choose

857

# the one that leaves the least amount of stashed data after it's

858

# executed.

859

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

865

L.append(xf)

866

for u in xf.outgoing:

867

del u.incoming[xf]

868

if not u.incoming:

869

heapq.heappush(S, (u.NetStashChange(), u.order, u))

870

871

# if this fails then our graph had a cycle.

872

assert len(L) == len(self.transfers)

873

874

self.transfers = L

875

for i, xf in enumerate(L):

876

xf.order = i

877

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

878

def RemoveBackwardEdges(self):

879

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

885

lost = 0

886

size = xf.src_ranges.size()

887

for u in xf.goes_before:

888

# xf should go before u

889

if xf.order < u.order:

890

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

891

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

892

else:

893

# it doesn't, boo. trim the blocks that u writes from xf's

894

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

895

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

896

assert xf.src_ranges.overlaps(u.tgt_ranges)

897

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

898

xf.intact = False

899

900

if xf.style == "diff" and not xf.src_ranges:

901

# nothing left to diff from; treat as new data

902

xf.style = "new"

903

904

lost = size - xf.src_ranges.size()

905

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

906

907

print((" %d/%d dependencies (%.2f%%) were violated; "

908

"%d source blocks removed.") %

909

(out_of_order, in_order + out_of_order,

910

(out_of_order * 100.0 / (in_order + out_of_order))

911

if (in_order + out_of_order) else 0.0,

912

lost_source))

913

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

914

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

915

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

916

917

For each transfer, make sure it properly stashes the blocks it touches and

918

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

919

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

920

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

921

it is possible to have multiple pairs with different 'stash_raw_id's. Each

922

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

923

blocks will be written to the same stash slot in WriteTransfers().

924

"""

925

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

926

print("Reversing backward edges...")

927

in_order = 0

928

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

929

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

930

stash_size = 0

931

932

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

933

for u in xf.goes_before.copy():

934

# xf should go before u

935

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

940

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

945

assert overlap

946

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

947

u.stash_before.append((stash_raw_id, overlap))

948

xf.use_stash.append((stash_raw_id, overlap))

949

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

950

stash_size += overlap.size()

951

952

# reverse the edge direction; now xf must go after u

953

del xf.goes_before[u]

954

del u.goes_after[xf]

955

xf.goes_after[u] = None # value doesn't matter

956

u.goes_before[xf] = None

957

958

print((" %d/%d dependencies (%.2f%%) were violated; "

959

"%d source blocks stashed.") %

960

(out_of_order, in_order + out_of_order,

961

(out_of_order * 100.0 / (in_order + out_of_order))

962

if (in_order + out_of_order) else 0.0,

963

stash_size))

964

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

965

def FindVertexSequence(self):

966

print("Finding vertex sequence...")

967

968

# This is based on "A Fast & Effective Heuristic for the Feedback

969

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

970

# it as starting with the digraph G and moving all the vertices to

971

# be on a horizontal line in some order, trying to minimize the

972

# number of edges that end up pointing to the left. Left-pointing

973

# edges will get removed to turn the digraph into a DAG. In this

974

# case each edge has a weight which is the number of source blocks

975

# we'll lose if that edge is removed; we try to minimize the total

976

# weight rather than just the number of edges.

977

978

# Make a copy of the edge set; this copy will get destroyed by the

979

# algorithm.

980

for xf in self.transfers:

981

xf.incoming = xf.goes_after.copy()

982

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

983

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

984

985

# We use an OrderedDict instead of just a set so that the output

986

# is repeatable; otherwise it would depend on the hash values of

987

# the transfer objects.

988

G = OrderedDict()

989

for xf in self.transfers:

990

G[xf] = None

991

s1 = deque() # the left side of the sequence, built from left to right

992

s2 = deque() # the right side of the sequence, built from right to left

993

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

994

heap = []

995

for xf in self.transfers:

996

xf.heap_item = HeapItem(xf)

997

heap.append(xf.heap_item)

998

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

999

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1000

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1001

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1002

# { key1: None, key2: None, ... }.

1003

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1004

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1005

1006

def adjust_score(iu, delta):

1007

iu.score += delta

1008

iu.heap_item.clear()

1009

iu.heap_item = HeapItem(iu)

1010

heapq.heappush(heap, iu.heap_item)

1011

1012

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1013

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1014

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1015

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1016

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1017

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1018

s2.appendleft(u)

1019

del G[u]

1020

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1021

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1022

if not iu.outgoing:

1023

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1024

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1025

1026

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1027

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1028

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1029

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1030

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1031

s1.append(u)

1032

del G[u]

1033

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1034

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1035

if not iu.incoming:

1036

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1037

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1038

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1039

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1040

1041

# Find the "best" vertex to put next. "Best" is the one that

1042

# maximizes the net difference in source blocks saved we get by

1043

# pretending it's a source rather than a sink.

1044

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1045

while True:

1046

u = heapq.heappop(heap)

1047

if u and u.item in G:

1048

u = u.item

1049

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1050

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1051

s1.append(u)

1052

del G[u]

1053

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1054

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1055

if not iu.incoming:

1056

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1057

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1058

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1059

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1060

if not iu.outgoing:

1061

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1062

1063

# Now record the sequence in the 'order' field of each transfer,

1064

# and by rearranging self.transfers to be in the chosen sequence.

1065

1066

new_transfers = []

1067

for x in itertools.chain(s1, s2):

1068

x.order = len(new_transfers)

1069

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1074

1075

def GenerateDigraph(self):

1076

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1077

1078

# Each item of source_ranges will be:

1079

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1080

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1081

source_ranges = []

1082

for b in self.transfers:

1083

for s, e in b.src_ranges:

1084

if e > len(source_ranges):

1085

source_ranges.extend([None] * (e-len(source_ranges)))

1086

for i in range(s, e):

1087

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1088

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1089

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1090

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1091

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1092

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1093

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1094

for s, e in a.tgt_ranges:

1095

for i in range(s, e):

1096

if i >= len(source_ranges): break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1097

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1098

if source_ranges[i] is not None:

1099

for j in source_ranges[i]:

1100

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1101

1102

for b in intersections:

1103

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1104

1105

# If the blocks written by A are read by B, then B needs to go before A.

1106

i = a.tgt_ranges.intersect(b.src_ranges)

1107

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1108

if b.src_name == "__ZERO":

1109

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1114

b.goes_before[a] = size

1115

a.goes_after[b] = size

1116

1117

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1118

"""Parse the file_map to generate all the transfers."""

1119

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1120

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,

1121

style, by_id):

1122

"""Add one or multiple Transfer()s by splitting large files.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1123

1124

For BBOTA v3, we need to stash source blocks for resumable feature.

1125

However, with the growth of file size and the shrink of the cache

1126

partition source blocks are too large to be stashed. If a file occupies

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1127

too many blocks, we split it into smaller pieces by getting multiple

1128

Transfer()s.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1129

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1130

The downside is that after splitting, we may increase the package size

1131

since the split pieces don't align well. According to our experiments,

1132

1/8 of the cache size as the per-piece limit appears to be optimal.

1133

Compared to the fixed 1024-block limit, it reduces the overall package

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1134

size by 30% for volantis, and 20% for angler and bullhead."""

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1135

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1136

# Possibly split large files into smaller chunks.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1137

pieces = 0

1138

cache_size = common.OPTIONS.cache_size

1139

split_threshold = 0.125

1140

max_blocks_per_transfer = int(cache_size * split_threshold /

1141

self.tgt.blocksize)

1142

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1143

# Change nothing for small files.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1144

if (tgt_ranges.size() <= max_blocks_per_transfer and

1145

src_ranges.size() <= max_blocks_per_transfer):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1146

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1147

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1148

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1149

return

1150

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1151

while (tgt_ranges.size() > max_blocks_per_transfer and

1152

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1153

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1154

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1155

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1156

src_first = src_ranges.first(max_blocks_per_transfer)

1157

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1158

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1159

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1160

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1161

1162

tgt_ranges = tgt_ranges.subtract(tgt_first)

1163

src_ranges = src_ranges.subtract(src_first)

1164

pieces += 1

1165

1166

# Handle remaining blocks.

1167

if tgt_ranges.size() or src_ranges.size():

1168

# Must be both non-empty.

1169

assert tgt_ranges.size() and src_ranges.size()

1170

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1171

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1172

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1173

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1174

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1175

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1176

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1177

split=False):

1178

"""Wrapper function for adding a Transfer()."""

1179

1180

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1181

# otherwise add the Transfer() as is.

1182

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1183

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1184

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1185

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1186

return

1187

1188

# Handle .odex files specially to analyze the block-wise difference. If

1189

# most of the blocks are identical with only few changes (e.g. header),

1190

# we will patch the changed blocks only. This avoids stashing unchanged

1191

# blocks while patching. We limit the analysis to files without size

1192

# changes only. This is to avoid sacrificing the OTA generation cost too

1193

# much.

1194

if (tgt_name.split(".")[-1].lower() == 'odex' and

1195

tgt_ranges.size() == src_ranges.size()):

1196

1197

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1198

# few blocks remain identical, we lose the opportunity to use imgdiff

1199

# that may have better compression ratio than bsdiff.

1200

crop_threshold = 0.5

1201

1202

tgt_skipped = RangeSet()

1203

src_skipped = RangeSet()

1204

tgt_size = tgt_ranges.size()

1205

tgt_changed = 0

1206

for src_block, tgt_block in zip(src_ranges.next_item(),

1207

tgt_ranges.next_item()):

1208

src_rs = RangeSet(str(src_block))

1209

tgt_rs = RangeSet(str(tgt_block))

1210

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1211

tgt_skipped = tgt_skipped.union(tgt_rs)

1212

src_skipped = src_skipped.union(src_rs)

1213

else:

1214

tgt_changed += tgt_rs.size()

1215

1216

# Terminate early if no clear sign of benefits.

1217

if tgt_changed > tgt_size * crop_threshold:

1218

break

1219

1220

if tgt_changed < tgt_size * crop_threshold:

1221

assert tgt_changed + tgt_skipped.size() == tgt_size

1222

print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,

1223

tgt_skipped.size() * 100.0 / tgt_size, tgt_name))

1224

AddSplitTransfers(

1225

"%s-skipped" % (tgt_name,),

1226

"%s-skipped" % (src_name,),

1227

tgt_skipped, src_skipped, style, by_id)

1228

1229

# Intentionally change the file extension to avoid being imgdiff'd as

1230

# the files are no longer in their original format.

1231

tgt_name = "%s-cropped" % (tgt_name,)

1232

src_name = "%s-cropped" % (src_name,)

1233

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1234

src_ranges = src_ranges.subtract(src_skipped)

1235

1236

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

1241

AddSplitTransfers(

1242

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1243

1244

print("Finding transfers...")

1245

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1246

empty = RangeSet()

1247

for tgt_fn, tgt_ranges in self.tgt.file_map.items():

1248

if tgt_fn == "__ZERO":

1249

# the special "__ZERO" domain is all the blocks not contained

1250

# in any file and that are filled with zeros. We have a

1251

# special transfer style for zero blocks.

1252

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1253

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1254

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1255

continue

1256

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1257

elif tgt_fn == "__COPY":

1258

# "__COPY" domain includes all the blocks not contained in any

1259

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1260

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1261

continue

1262

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1263

elif tgt_fn in self.src.file_map:

1264

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1265

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1266

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1267

continue

1268

1269

b = os.path.basename(tgt_fn)

1270

if b in self.src_basenames:

1271

# Look for an exact basename match in the source.

1272

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1273

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1274

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1275

continue

1276

1277

b = re.sub("[0-9]+", "#", b)

1278

if b in self.src_numpatterns:

1279

# Look for a 'number pattern' match (a basename match after

1280

# all runs of digits are replaced by "#"). (This is useful

1281

# for .so files that contain version numbers in the filename

1282

# that get bumped.)

1283

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1284

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1285

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1286

continue

1287

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1288

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1289

1290

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1291

for k in self.src.file_map.keys():

1292

b = os.path.basename(k)

1293

self.src_basenames[b] = k

1294

b = re.sub("[0-9]+", "#", b)

1295

self.src_numpatterns[b] = k

1296

1297

@staticmethod

1298

def AssertPartition(total, seq):

1299

"""Assert that all the RangeSets in 'seq' form a partition of the

1300

'total' RangeSet (ie, they are nonintersecting and their union

1301

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1302

Doug Zongker