blob: ef7ff923db5ef5dc6c7cc9608b533e587f47810d [file] [log] [blame]
Doug Zongkerfc44a512014-08-26 13:10:25 -07001import bisect
2import os
3import sys
4import struct
5import pprint
6from hashlib import sha1
7
8from rangelib import *
9
10class SparseImage(object):
11 """Wraps a sparse image file (and optional file map) into an image
12 object suitable for passing to BlockImageDiff."""
13
14 def __init__(self, simg_fn, file_map_fn=None):
15 self.simg_f = f = open(simg_fn, "rb")
16
17 header_bin = f.read(28)
18 header = struct.unpack("<I4H4I", header_bin)
19
20 magic = header[0]
21 major_version = header[1]
22 minor_version = header[2]
23 file_hdr_sz = header[3]
24 chunk_hdr_sz = header[4]
25 self.blocksize = blk_sz = header[5]
26 self.total_blocks = total_blks = header[6]
27 total_chunks = header[7]
28 image_checksum = header[8]
29
30 if magic != 0xED26FF3A:
31 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
32 if major_version != 1 or minor_version != 0:
33 raise ValueError("I know about version 1.0, but this is version %u.%u" %
34 (major_version, minor_version))
35 if file_hdr_sz != 28:
36 raise ValueError("File header size was expected to be 28, but is %u." %
37 (file_hdr_sz,))
38 if chunk_hdr_sz != 12:
39 raise ValueError("Chunk header size was expected to be 12, but is %u." %
40 (chunk_hdr_sz,))
41
42 print("Total of %u %u-byte output blocks in %u input chunks."
43 % (total_blks, blk_sz, total_chunks))
44
45 pos = 0 # in blocks
46 care_data = []
47 self.offset_map = offset_map = []
48
49 for i in range(total_chunks):
50 header_bin = f.read(12)
51 header = struct.unpack("<2H2I", header_bin)
52 chunk_type = header[0]
53 reserved1 = header[1]
54 chunk_sz = header[2]
55 total_sz = header[3]
56 data_sz = total_sz - 12
57
58 if chunk_type == 0xCAC1:
59 if data_sz != (chunk_sz * blk_sz):
60 raise ValueError(
61 "Raw chunk input size (%u) does not match output size (%u)" %
62 (data_sz, chunk_sz * blk_sz))
63 else:
64 care_data.append(pos)
65 care_data.append(pos + chunk_sz)
66 offset_map.append((pos, chunk_sz, f.tell()))
67 pos += chunk_sz
68 f.seek(data_sz, os.SEEK_CUR)
69
70 elif chunk_type == 0xCAC2:
71 raise ValueError("Fill chunks are not supported")
72
73 elif chunk_type == 0xCAC3:
74 if data_sz != 0:
75 raise ValueError("Don't care chunk input size is non-zero (%u)" %
76 (data_sz))
77 else:
78 pos += chunk_sz
79
80 elif chunk_type == 0xCAC4:
81 raise ValueError("CRC32 chunks are not supported")
82
83 else:
84 raise ValueError("Unknown chunk type 0x%04X not supported" %
85 (chunk_type,))
86
87 self.care_map = RangeSet(care_data)
88 self.offset_index = [i[0] for i in offset_map]
89
90 if file_map_fn:
91 self.LoadFileBlockMap(file_map_fn)
92 else:
93 self.file_map = {"__DATA": self.care_map}
94
95 def ReadRangeSet(self, ranges):
96 return [d for d in self._GetRangeData(ranges)]
97
98 def TotalSha1(self):
99 """Return the SHA-1 hash of all data in the 'care' regions of this image."""
100 h = sha1()
101 for d in self._GetRangeData(self.care_map):
102 h.update(d)
103 return h.hexdigest()
104
105 def _GetRangeData(self, ranges):
106 """Generator that produces all the image data in 'ranges'. The
107 number of individual pieces returned is arbitrary (and in
108 particular is not necessarily equal to the number of ranges in
109 'ranges'.
110
111 This generator is stateful -- it depends on the open file object
112 contained in this SparseImage, so you should not try to run two
113 instances of this generator on the same object simultaneously."""
114
115 f = self.simg_f
116 for s, e in ranges:
117 to_read = e-s
118 idx = bisect.bisect_right(self.offset_index, s) - 1
119 chunk_start, chunk_len, filepos = self.offset_map[idx]
120
121 # for the first chunk we may be starting partway through it.
122 p = filepos + ((s - chunk_start) * self.blocksize)
123 remain = chunk_len - (s - chunk_start)
124
125 f.seek(p, os.SEEK_SET)
126 this_read = min(remain, to_read)
127 yield f.read(this_read * self.blocksize)
128 to_read -= this_read
129
130 while to_read > 0:
131 # continue with following chunks if this range spans multiple chunks.
132 idx += 1
133 chunk_start, chunk_len, filepos = self.offset_map[idx]
134 f.seek(filepos, os.SEEK_SET)
135 this_read = min(chunk_len, to_read)
136 yield f.read(this_read * self.blocksize)
137 to_read -= this_read
138
139 def LoadFileBlockMap(self, fn):
140 remaining = self.care_map
141 self.file_map = out = {}
142
143 with open(fn) as f:
144 for line in f:
145 fn, ranges = line.split(None, 1)
146 ranges = RangeSet.parse(ranges)
147 out[fn] = ranges
148 assert ranges.size() == ranges.intersect(remaining).size()
149 remaining = remaining.subtract(ranges)
150
151 # For all the remaining blocks in the care_map (ie, those that
152 # aren't part of the data for any file), divide them into blocks
153 # that are all zero and blocks that aren't. (Zero blocks are
154 # handled specially because (1) there are usually a lot of them
155 # and (2) bsdiff handles files with long sequences of repeated
156 # bytes especially poorly.)
157
158 zero_blocks = []
159 nonzero_blocks = []
160 reference = '\0' * self.blocksize
161
162 f = self.simg_f
163 for s, e in remaining:
164 for b in range(s, e):
165 idx = bisect.bisect_right(self.offset_index, b) - 1
166 chunk_start, chunk_len, filepos = self.offset_map[idx]
167 filepos += (b-chunk_start) * self.blocksize
168 f.seek(filepos, os.SEEK_SET)
169 data = f.read(self.blocksize)
170
171 if data == reference:
172 zero_blocks.append(b)
173 zero_blocks.append(b+1)
174 else:
175 nonzero_blocks.append(b)
176 nonzero_blocks.append(b+1)
177
178 out["__ZERO"] = RangeSet(data=zero_blocks)
179 out["__NONZERO"] = RangeSet(data=nonzero_blocks)
180
181 def ResetFileMap(self):
182 """Throw away the file map and treat the entire image as
183 undifferentiated data."""
184 self.file_map = {"__DATA": self.care_map}