blob: f0b5feaeefaf5bfe504db1e400822651fa5a1a84 [file] [log] [blame]
Doug Zongker02d444b2009-05-27 18:24:03 -07001/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This program constructs binary patches for images -- such as boot.img
19 * and recovery.img -- that consist primarily of large chunks of gzipped
20 * data interspersed with uncompressed data. Doing a naive bsdiff of
21 * these files is not useful because small changes in the data lead to
22 * large changes in the compressed bitstream; bsdiff patches of gzipped
23 * data are typically as large as the data itself.
24 *
25 * To patch these usefully, we break the source and target images up into
26 * chunks of two types: "normal" and "gzip". Normal chunks are simply
27 * patched using a plain bsdiff. Gzip chunks are first expanded, then a
28 * bsdiff is applied to the uncompressed data, then the patched data is
29 * gzipped using the same encoder parameters. Patched chunks are
30 * concatenated together to create the output file; the output image
31 * should be *exactly* the same series of bytes as the target image used
32 * originally to generate the patch.
33 *
34 * To work well with this tool, the gzipped sections of the target
35 * image must have been generated using the same deflate encoder that
36 * is available in applypatch, namely, the one in the zlib library.
37 * In practice this means that images should be compressed using the
38 * "minigzip" tool included in the zlib distribution, not the GNU gzip
39 * program.
40 *
41 * An "imgdiff" patch consists of a header describing the chunk structure
42 * of the file and any encoding parameters needed for the gzipped
43 * chunks, followed by N bsdiff patches, one per chunk.
44 *
45 * For a diff to be generated, the source and target images must have the
46 * same "chunk" structure: that is, the same number of gzipped and normal
47 * chunks in the same order. Android boot and recovery images currently
48 * consist of five chunks: a small normal header, a gzipped kernel, a
49 * small normal section, a gzipped ramdisk, and finally a small normal
50 * footer.
51 *
52 * Caveats: we locate gzipped sections within the source and target
53 * images by searching for the byte sequence 1f8b0800: 1f8b is the gzip
54 * magic number; 08 specifies the "deflate" encoding [the only encoding
55 * supported by the gzip standard]; and 00 is the flags byte. We do not
56 * currently support any extra header fields (which would be indicated by
57 * a nonzero flags byte). We also don't handle the case when that byte
58 * sequence appears spuriously in the file. (Note that it would have to
59 * occur spuriously within a normal chunk to be a problem.)
60 *
61 *
62 * The imgdiff patch header looks like this:
63 *
64 * "IMGDIFF1" (8) [magic number and version]
65 * chunk count (4)
66 * for each chunk:
67 * chunk type (4) [CHUNK_NORMAL or CHUNK_GZIP]
68 * source start (8)
69 * source len (8)
70 * bsdiff patch offset (8) [from start of patch file]
71 * if chunk type == CHUNK_GZIP:
72 * source expanded len (8) [size of uncompressed source]
73 * target expected len (8) [size of uncompressed target]
74 * gzip level (4)
75 * method (4)
76 * windowBits (4)
77 * memLevel (4)
78 * strategy (4)
79 * gzip header len (4)
80 * gzip header (gzip header len)
81 * gzip footer (8)
82 *
83 * All integers are little-endian. "source start" and "source len"
84 * specify the section of the input image that comprises this chunk,
85 * including the gzip header and footer for gzip chunks. "source
86 * expanded len" is the size of the uncompressed source data. "target
87 * expected len" is the size of the uncompressed data after applying
88 * the bsdiff patch. The next five parameters specify the zlib
89 * parameters to be used when compressing the patched data, and the
90 * next three specify the header and footer to be wrapped around the
91 * compressed data to create the output chunk (so that header contents
92 * like the timestamp are recreated exactly).
93 *
94 * After the header there are 'chunk count' bsdiff patches; the offset
95 * of each from the beginning of the file is specified in the header.
96 */
97
98#include <errno.h>
99#include <stdio.h>
100#include <stdlib.h>
101#include <string.h>
102#include <sys/stat.h>
103#include <unistd.h>
104
105#include "zlib.h"
106#include "imgdiff.h"
107
108typedef struct {
109 int type; // CHUNK_NORMAL or CHUNK_GZIP
110 size_t start; // offset of chunk in original image file
111
112 size_t len;
113 unsigned char* data; // data to be patched (ie, uncompressed, for
114 // gzip chunks)
115
116 // everything else is for CHUNK_GZIP chunks only:
117
118 size_t gzip_header_len;
119 unsigned char* gzip_header;
120 unsigned char* gzip_footer;
121
122 // original (compressed) gzip data, including header and footer
123 size_t gzip_len;
124 unsigned char* gzip_data;
125
126 // deflate encoder parameters
127 int level, method, windowBits, memLevel, strategy;
128} ImageChunk;
129
130/*
131 * Read the given file and break it up into chunks, putting the number
132 * of chunks and their info in *num_chunks and **chunks,
133 * respectively. Returns a malloc'd block of memory containing the
134 * contents of the file; various pointers in the output chunk array
135 * will point into this block of memory. The caller should free the
136 * return value when done with all the chunks. Returns NULL on
137 * failure.
138 */
139unsigned char* ReadImage(const char* filename,
140 int* num_chunks, ImageChunk** chunks) {
141 struct stat st;
142 if (stat(filename, &st) != 0) {
143 fprintf(stderr, "failed to stat \"%s\": %s\n", filename, strerror(errno));
144 return NULL;
145 }
146
147 unsigned char* img = malloc(st.st_size + 4);
148 FILE* f = fopen(filename, "rb");
149 if (fread(img, 1, st.st_size, f) != st.st_size) {
150 fprintf(stderr, "failed to read \"%s\" %s\n", filename, strerror(errno));
151 fclose(f);
152 return NULL;
153 }
154 fclose(f);
155
156 // append 4 zero bytes to the data so we can always search for the
157 // four-byte string 1f8b0800 starting at any point in the actual
158 // file data, without special-casing the end of the data.
159 memset(img+st.st_size, 0, 4);
160
161 size_t pos = 0;
162
163 *num_chunks = 0;
164 *chunks = NULL;
165
166 while (pos < st.st_size) {
167 unsigned char* p = img+pos;
168
169 // Reallocate the list for every chunk; we expect the number of
170 // chunks to be small (5 for typical boot and recovery images).
171 ++*num_chunks;
172 *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk));
173 ImageChunk* curr = *chunks + (*num_chunks-1);
174 curr->start = pos;
175
176 if (st.st_size - pos >= 4 &&
177 p[0] == 0x1f && p[1] == 0x8b &&
178 p[2] == 0x08 && // deflate compression
179 p[3] == 0x00) { // no header flags
180 // 'pos' is the offset of the start of a gzip chunk.
181
182 curr->type = CHUNK_GZIP;
183 curr->gzip_header_len = GZIP_HEADER_LEN;
184 curr->gzip_header = p;
185
186 // We must decompress this chunk in order to discover where it
187 // ends, and so we can put the uncompressed data and its length
188 // into curr->data and curr->len;
189
190 size_t allocated = 32768;
191 curr->len = 0;
192 curr->data = malloc(allocated);
193 curr->gzip_data = p;
194
195 z_stream strm;
196 strm.zalloc = Z_NULL;
197 strm.zfree = Z_NULL;
198 strm.opaque = Z_NULL;
199 strm.avail_in = st.st_size - (pos + curr->gzip_header_len);
200 strm.next_in = p + GZIP_HEADER_LEN;
201
202 // -15 means we are decoding a 'raw' deflate stream; zlib will
203 // not expect zlib headers.
204 int ret = inflateInit2(&strm, -15);
205
206 do {
207 strm.avail_out = allocated - curr->len;
208 strm.next_out = curr->data + curr->len;
209 ret = inflate(&strm, Z_NO_FLUSH);
210 curr->len = allocated - strm.avail_out;
211 if (strm.avail_out == 0) {
212 allocated *= 2;
213 curr->data = realloc(curr->data, allocated);
214 }
215 } while (ret != Z_STREAM_END);
216
217 curr->gzip_len = st.st_size - strm.avail_in - pos + GZIP_FOOTER_LEN;
218 pos = st.st_size - strm.avail_in;
219 inflateEnd(&strm);
220
221 // consume the gzip footer.
222 curr->gzip_footer = img+pos;
223 pos += GZIP_FOOTER_LEN;
224 p = img+pos;
225
226 // The footer (that we just skipped over) contains the size of
227 // the uncompressed data. Double-check to make sure that it
228 // matches the size of the data we got when we actually did
229 // the decompression.
230 size_t footer_size = p[-4] + (p[-3] << 8) + (p[-2] << 16) + (p[-1] << 24);
231 if (footer_size != curr->len) {
232 fprintf(stderr, "Error: footer size %d != decompressed size %d\n",
233 footer_size, curr->len);
234 free(img);
235 return NULL;
236 }
237 } else {
238 // 'pos' is not the offset of the start of a gzip chunk, so scan
239 // forward until we find a gzip header.
240 curr->type = CHUNK_NORMAL;
241 curr->data = p;
242
243 for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) {
244 if (p[curr->len] == 0x1f &&
245 p[curr->len+1] == 0x8b &&
246 p[curr->len+2] == 0x08 &&
247 p[curr->len+3] == 0x00) {
248 break;
249 }
250 }
251 pos += curr->len;
252 }
253 }
254
255 return img;
256}
257
258#define BUFFER_SIZE 32768
259
260/*
261 * Takes the uncompressed data stored in the chunk, compresses it
262 * using the zlib parameters stored in the chunk, and checks that it
263 * matches exactly the compressed data we started with (also stored in
264 * the chunk). Return 0 on success.
265 */
266int TryReconstruction(ImageChunk* chunk, unsigned char* out) {
267 size_t p = chunk->gzip_header_len;
268
269 z_stream strm;
270 strm.zalloc = Z_NULL;
271 strm.zfree = Z_NULL;
272 strm.opaque = Z_NULL;
273 strm.avail_in = chunk->len;
274 strm.next_in = chunk->data;
275 int ret;
276 ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits,
277 chunk->memLevel, chunk->strategy);
278 do {
279 strm.avail_out = BUFFER_SIZE;
280 strm.next_out = out;
281 ret = deflate(&strm, Z_FINISH);
282 size_t have = BUFFER_SIZE - strm.avail_out;
283
284 if (memcmp(out, chunk->gzip_data+p, have) != 0) {
285 // mismatch; data isn't the same.
286 deflateEnd(&strm);
287 return -1;
288 }
289 p += have;
290 } while (ret != Z_STREAM_END);
291 deflateEnd(&strm);
292 if (p + GZIP_FOOTER_LEN != chunk->gzip_len) {
293 // mismatch; ran out of data before we should have.
294 return -1;
295 }
296 return 0;
297}
298
299/*
300 * Verify that we can reproduce exactly the same compressed data that
301 * we started with. Sets the level, method, windowBits, memLevel, and
302 * strategy fields in the chunk to the encoding parameters needed to
303 * produce the right output. Returns 0 on success.
304 */
305int ReconstructGzipChunk(ImageChunk* chunk) {
306 if (chunk->type != CHUNK_GZIP) {
307 fprintf(stderr, "attempt to reconstruct non-gzip chunk\n");
308 return -1;
309 }
310
311 size_t p = 0;
312 unsigned char* out = malloc(BUFFER_SIZE);
313
314 // We only check two combinations of encoder parameters: level 6
315 // (the default) and level 9 (the maximum).
316 for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) {
317 chunk->windowBits = -15; // 32kb window; negative to indicate a raw stream.
318 chunk->memLevel = 8; // the default value.
319 chunk->method = Z_DEFLATED;
320 chunk->strategy = Z_DEFAULT_STRATEGY;
321
322 if (TryReconstruction(chunk, out) == 0) {
323 free(out);
324 return 0;
325 }
326 }
327
328 free(out);
329 return -1;
330}
331
332/** Write a 4-byte value to f in little-endian order. */
333void Write4(int value, FILE* f) {
334 fputc(value & 0xff, f);
335 fputc((value >> 8) & 0xff, f);
336 fputc((value >> 16) & 0xff, f);
337 fputc((value >> 24) & 0xff, f);
338}
339
340/** Write an 8-byte value to f in little-endian order. */
341void Write8(long long value, FILE* f) {
342 fputc(value & 0xff, f);
343 fputc((value >> 8) & 0xff, f);
344 fputc((value >> 16) & 0xff, f);
345 fputc((value >> 24) & 0xff, f);
346 fputc((value >> 32) & 0xff, f);
347 fputc((value >> 40) & 0xff, f);
348 fputc((value >> 48) & 0xff, f);
349 fputc((value >> 56) & 0xff, f);
350}
351
352
353/*
354 * Given source and target chunks, compute a bsdiff patch between them
355 * by running bsdiff in a subprocess. Return the patch data, placing
356 * its length in *size. Return NULL on failure. We expect the bsdiff
357 * program to be in the path.
358 */
359unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) {
360 char stemp[] = "/tmp/imgdiff-src-XXXXXX";
361 char ttemp[] = "/tmp/imgdiff-tgt-XXXXXX";
362 char ptemp[] = "/tmp/imgdiff-patch-XXXXXX";
363 mkstemp(stemp);
364 mkstemp(ttemp);
365 mkstemp(ptemp);
366
367 FILE* f = fopen(stemp, "wb");
368 if (f == NULL) {
369 fprintf(stderr, "failed to open src chunk %s: %s\n",
370 stemp, strerror(errno));
371 return NULL;
372 }
373 if (fwrite(src->data, 1, src->len, f) != src->len) {
374 fprintf(stderr, "failed to write src chunk to %s: %s\n",
375 stemp, strerror(errno));
376 return NULL;
377 }
378 fclose(f);
379
380 f = fopen(ttemp, "wb");
381 if (f == NULL) {
382 fprintf(stderr, "failed to open tgt chunk %s: %s\n",
383 ttemp, strerror(errno));
384 return NULL;
385 }
386 if (fwrite(tgt->data, 1, tgt->len, f) != tgt->len) {
387 fprintf(stderr, "failed to write tgt chunk to %s: %s\n",
388 ttemp, strerror(errno));
389 return NULL;
390 }
391 fclose(f);
392
393 char cmd[200];
394 sprintf(cmd, "bsdiff %s %s %s", stemp, ttemp, ptemp);
395 if (system(cmd) != 0) {
396 fprintf(stderr, "failed to run bsdiff: %s\n", strerror(errno));
397 return NULL;
398 }
399
400 struct stat st;
401 if (stat(ptemp, &st) != 0) {
402 fprintf(stderr, "failed to stat patch file %s: %s\n",
403 ptemp, strerror(errno));
404 return NULL;
405 }
406
407 unsigned char* data = malloc(st.st_size);
408 *size = st.st_size;
409
410 f = fopen(ptemp, "rb");
411 if (f == NULL) {
412 fprintf(stderr, "failed to open patch %s: %s\n", ptemp, strerror(errno));
413 return NULL;
414 }
415 if (fread(data, 1, st.st_size, f) != st.st_size) {
416 fprintf(stderr, "failed to read patch %s: %s\n", ptemp, strerror(errno));
417 return NULL;
418 }
419 fclose(f);
420
421 unlink(stemp);
422 unlink(ttemp);
423 unlink(ptemp);
424
425 return data;
426}
427
428/*
429 * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob
430 * of uninterpreted data). The resulting patch will likely be about
431 * as big as the target file, but it lets us handle the case of images
432 * where some gzip chunks are reconstructible but others aren't (by
433 * treating the ones that aren't as normal chunks).
434 */
435void ChangeGzipChunkToNormal(ImageChunk* ch) {
436 ch->type = CHUNK_NORMAL;
437 free(ch->data);
438 ch->data = ch->gzip_data;
439 ch->len = ch->gzip_len;
440}
441
442int main(int argc, char** argv) {
443 if (argc != 4) {
444 fprintf(stderr, "usage: %s <src-img> <tgt-img> <patch-file>\n", argv[0]);
445 return 2;
446 }
447
448 int num_src_chunks;
449 ImageChunk* src_chunks;
450 if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) {
451 fprintf(stderr, "failed to break apart source image\n");
452 return 1;
453 }
454
455 int num_tgt_chunks;
456 ImageChunk* tgt_chunks;
457 if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) {
458 fprintf(stderr, "failed to break apart target image\n");
459 return 1;
460 }
461
462 // Verify that the source and target images have the same chunk
463 // structure (ie, the same sequence of gzip and normal chunks).
464
465 if (num_src_chunks != num_tgt_chunks) {
466 fprintf(stderr, "source and target don't have same number of chunks!\n");
467 return 1;
468 }
469 int i;
470 for (i = 0; i < num_src_chunks; ++i) {
471 if (src_chunks[i].type != tgt_chunks[i].type) {
472 fprintf(stderr, "source and target don't have same chunk "
473 "structure! (chunk %d)\n", i);
474 return 1;
475 }
476 }
477
478 // Confirm that given the uncompressed chunk data in the target, we
479 // can recompress it and get exactly the same bits as are in the
480 // input target image. If this fails, treat the chunk as a normal
481 // non-gzipped chunk.
482
483 for (i = 0; i < num_tgt_chunks; ++i) {
484 if (tgt_chunks[i].type == CHUNK_GZIP) {
485 if (ReconstructGzipChunk(tgt_chunks+i) < 0) {
486 printf("failed to reconstruct target gzip chunk %d; "
487 "treating as normal chunk\n", i);
488 ChangeGzipChunkToNormal(tgt_chunks+i);
489 ChangeGzipChunkToNormal(src_chunks+i);
490 } else {
491 printf("reconstructed target gzip chunk %d\n", i);
492 }
493 }
494 }
495
496 // Compute bsdiff patches for each chunk's data (the uncompressed
497 // data, in the case of gzip chunks).
498
499 unsigned char** patch_data = malloc(num_src_chunks * sizeof(unsigned char*));
500 size_t* patch_size = malloc(num_src_chunks * sizeof(size_t));
501 for (i = 0; i < num_src_chunks; ++i) {
502 patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i);
503 printf("patch %d is %d bytes (of %d)\n", i, patch_size[i],
504 tgt_chunks[i].type == CHUNK_NORMAL ? tgt_chunks[i].len : tgt_chunks[i].gzip_len);
505
506 }
507
508 // Figure out how big the imgdiff file header is going to be, so
509 // that we can correctly compute the offset of each bsdiff patch
510 // within the file.
511
512 size_t total_header_size = 12;
513 for (i = 0; i < num_src_chunks; ++i) {
514 total_header_size += 4 + 8*3;
515 if (src_chunks[i].type == CHUNK_GZIP) {
516 total_header_size += 8*2 + 4*6 + tgt_chunks[i].gzip_header_len + 8;
517 }
518 }
519
520 size_t offset = total_header_size;
521
522 FILE* f = fopen(argv[3], "wb");
523
524 // Write out the headers.
525
526 fwrite("IMGDIFF1", 1, 8, f);
527 Write4(num_src_chunks, f);
528 for (i = 0; i < num_tgt_chunks; ++i) {
529 Write4(tgt_chunks[i].type, f);
530 Write8(src_chunks[i].start, f);
531 Write8(src_chunks[i].type == CHUNK_NORMAL ? src_chunks[i].len :
532 (src_chunks[i].gzip_len + src_chunks[i].gzip_header_len + 8), f);
533 Write8(offset, f);
534
535 if (tgt_chunks[i].type == CHUNK_GZIP) {
536 Write8(src_chunks[i].len, f);
537 Write8(tgt_chunks[i].len, f);
538 Write4(tgt_chunks[i].level, f);
539 Write4(tgt_chunks[i].method, f);
540 Write4(tgt_chunks[i].windowBits, f);
541 Write4(tgt_chunks[i].memLevel, f);
542 Write4(tgt_chunks[i].strategy, f);
543 Write4(tgt_chunks[i].gzip_header_len, f);
544 fwrite(tgt_chunks[i].gzip_header, 1, tgt_chunks[i].gzip_header_len, f);
545 fwrite(tgt_chunks[i].gzip_footer, 1, GZIP_FOOTER_LEN, f);
546 }
547
548 offset += patch_size[i];
549 }
550
551 // Append each chunk's bsdiff patch, in order.
552
553 for (i = 0; i < num_tgt_chunks; ++i) {
554 fwrite(patch_data[i], 1, patch_size[i], f);
555 }
556
557 fclose(f);
558
559 return 0;
560}