blob: 6b70fe16c91a831a1d030ac1ecf53b93d4cf009c [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015import bisect
16import os
17import sys
18import struct
19import pprint
20from hashlib import sha1
21
22from rangelib import *
23
24class SparseImage(object):
25 """Wraps a sparse image file (and optional file map) into an image
26 object suitable for passing to BlockImageDiff."""
27
28 def __init__(self, simg_fn, file_map_fn=None):
29 self.simg_f = f = open(simg_fn, "rb")
30
31 header_bin = f.read(28)
32 header = struct.unpack("<I4H4I", header_bin)
33
34 magic = header[0]
35 major_version = header[1]
36 minor_version = header[2]
37 file_hdr_sz = header[3]
38 chunk_hdr_sz = header[4]
39 self.blocksize = blk_sz = header[5]
40 self.total_blocks = total_blks = header[6]
41 total_chunks = header[7]
42 image_checksum = header[8]
43
44 if magic != 0xED26FF3A:
45 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
46 if major_version != 1 or minor_version != 0:
47 raise ValueError("I know about version 1.0, but this is version %u.%u" %
48 (major_version, minor_version))
49 if file_hdr_sz != 28:
50 raise ValueError("File header size was expected to be 28, but is %u." %
51 (file_hdr_sz,))
52 if chunk_hdr_sz != 12:
53 raise ValueError("Chunk header size was expected to be 12, but is %u." %
54 (chunk_hdr_sz,))
55
56 print("Total of %u %u-byte output blocks in %u input chunks."
57 % (total_blks, blk_sz, total_chunks))
58
59 pos = 0 # in blocks
60 care_data = []
61 self.offset_map = offset_map = []
62
63 for i in range(total_chunks):
64 header_bin = f.read(12)
65 header = struct.unpack("<2H2I", header_bin)
66 chunk_type = header[0]
67 reserved1 = header[1]
68 chunk_sz = header[2]
69 total_sz = header[3]
70 data_sz = total_sz - 12
71
72 if chunk_type == 0xCAC1:
73 if data_sz != (chunk_sz * blk_sz):
74 raise ValueError(
75 "Raw chunk input size (%u) does not match output size (%u)" %
76 (data_sz, chunk_sz * blk_sz))
77 else:
78 care_data.append(pos)
79 care_data.append(pos + chunk_sz)
80 offset_map.append((pos, chunk_sz, f.tell()))
81 pos += chunk_sz
82 f.seek(data_sz, os.SEEK_CUR)
83
84 elif chunk_type == 0xCAC2:
85 raise ValueError("Fill chunks are not supported")
86
87 elif chunk_type == 0xCAC3:
88 if data_sz != 0:
89 raise ValueError("Don't care chunk input size is non-zero (%u)" %
90 (data_sz))
91 else:
92 pos += chunk_sz
93
94 elif chunk_type == 0xCAC4:
95 raise ValueError("CRC32 chunks are not supported")
96
97 else:
98 raise ValueError("Unknown chunk type 0x%04X not supported" %
99 (chunk_type,))
100
101 self.care_map = RangeSet(care_data)
102 self.offset_index = [i[0] for i in offset_map]
103
104 if file_map_fn:
105 self.LoadFileBlockMap(file_map_fn)
106 else:
107 self.file_map = {"__DATA": self.care_map}
108
109 def ReadRangeSet(self, ranges):
110 return [d for d in self._GetRangeData(ranges)]
111
112 def TotalSha1(self):
113 """Return the SHA-1 hash of all data in the 'care' regions of this image."""
114 h = sha1()
115 for d in self._GetRangeData(self.care_map):
116 h.update(d)
117 return h.hexdigest()
118
119 def _GetRangeData(self, ranges):
120 """Generator that produces all the image data in 'ranges'. The
121 number of individual pieces returned is arbitrary (and in
122 particular is not necessarily equal to the number of ranges in
123 'ranges'.
124
125 This generator is stateful -- it depends on the open file object
126 contained in this SparseImage, so you should not try to run two
127 instances of this generator on the same object simultaneously."""
128
129 f = self.simg_f
130 for s, e in ranges:
131 to_read = e-s
132 idx = bisect.bisect_right(self.offset_index, s) - 1
133 chunk_start, chunk_len, filepos = self.offset_map[idx]
134
135 # for the first chunk we may be starting partway through it.
136 p = filepos + ((s - chunk_start) * self.blocksize)
137 remain = chunk_len - (s - chunk_start)
138
139 f.seek(p, os.SEEK_SET)
140 this_read = min(remain, to_read)
141 yield f.read(this_read * self.blocksize)
142 to_read -= this_read
143
144 while to_read > 0:
145 # continue with following chunks if this range spans multiple chunks.
146 idx += 1
147 chunk_start, chunk_len, filepos = self.offset_map[idx]
148 f.seek(filepos, os.SEEK_SET)
149 this_read = min(chunk_len, to_read)
150 yield f.read(this_read * self.blocksize)
151 to_read -= this_read
152
153 def LoadFileBlockMap(self, fn):
154 remaining = self.care_map
155 self.file_map = out = {}
156
157 with open(fn) as f:
158 for line in f:
159 fn, ranges = line.split(None, 1)
160 ranges = RangeSet.parse(ranges)
161 out[fn] = ranges
162 assert ranges.size() == ranges.intersect(remaining).size()
163 remaining = remaining.subtract(ranges)
164
165 # For all the remaining blocks in the care_map (ie, those that
166 # aren't part of the data for any file), divide them into blocks
167 # that are all zero and blocks that aren't. (Zero blocks are
168 # handled specially because (1) there are usually a lot of them
169 # and (2) bsdiff handles files with long sequences of repeated
170 # bytes especially poorly.)
171
172 zero_blocks = []
173 nonzero_blocks = []
174 reference = '\0' * self.blocksize
175
176 f = self.simg_f
177 for s, e in remaining:
178 for b in range(s, e):
179 idx = bisect.bisect_right(self.offset_index, b) - 1
180 chunk_start, chunk_len, filepos = self.offset_map[idx]
181 filepos += (b-chunk_start) * self.blocksize
182 f.seek(filepos, os.SEEK_SET)
183 data = f.read(self.blocksize)
184
185 if data == reference:
186 zero_blocks.append(b)
187 zero_blocks.append(b+1)
188 else:
189 nonzero_blocks.append(b)
190 nonzero_blocks.append(b+1)
191
192 out["__ZERO"] = RangeSet(data=zero_blocks)
193 out["__NONZERO"] = RangeSet(data=nonzero_blocks)
194
195 def ResetFileMap(self):
196 """Throw away the file map and treat the entire image as
197 undifferentiated data."""
198 self.file_map = {"__DATA": self.care_map}