Blame - tools/applypatch/imgdiff.c - platform_build_make

blob: f0b5feaeefaf5bfe504db1e400822651fa5a1a84 [file] [log] [blame]

Doug Zongker	02d444b	2009-05-27 18:24:03 -0700	[diff] [blame^]	1	/*
				2	* Copyright (C) 2009 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	/*
				18	* This program constructs binary patches for images -- such as boot.img
				19	* and recovery.img -- that consist primarily of large chunks of gzipped
				20	* data interspersed with uncompressed data. Doing a naive bsdiff of
				21	* these files is not useful because small changes in the data lead to
				22	* large changes in the compressed bitstream; bsdiff patches of gzipped
				23	* data are typically as large as the data itself.
				24	*
				25	* To patch these usefully, we break the source and target images up into
				26	* chunks of two types: "normal" and "gzip". Normal chunks are simply
				27	* patched using a plain bsdiff. Gzip chunks are first expanded, then a
				28	* bsdiff is applied to the uncompressed data, then the patched data is
				29	* gzipped using the same encoder parameters. Patched chunks are
				30	* concatenated together to create the output file; the output image
				31	* should be exactly the same series of bytes as the target image used
				32	* originally to generate the patch.
				33	*
				34	* To work well with this tool, the gzipped sections of the target
				35	* image must have been generated using the same deflate encoder that
				36	* is available in applypatch, namely, the one in the zlib library.
				37	* In practice this means that images should be compressed using the
				38	* "minigzip" tool included in the zlib distribution, not the GNU gzip
				39	* program.
				40	*
				41	* An "imgdiff" patch consists of a header describing the chunk structure
				42	* of the file and any encoding parameters needed for the gzipped
				43	* chunks, followed by N bsdiff patches, one per chunk.
				44	*
				45	* For a diff to be generated, the source and target images must have the
				46	* same "chunk" structure: that is, the same number of gzipped and normal
				47	* chunks in the same order. Android boot and recovery images currently
				48	* consist of five chunks: a small normal header, a gzipped kernel, a
				49	* small normal section, a gzipped ramdisk, and finally a small normal
				50	* footer.
				51	*
				52	* Caveats: we locate gzipped sections within the source and target
				53	* images by searching for the byte sequence 1f8b0800: 1f8b is the gzip
				54	* magic number; 08 specifies the "deflate" encoding [the only encoding
				55	* supported by the gzip standard]; and 00 is the flags byte. We do not
				56	* currently support any extra header fields (which would be indicated by
				57	* a nonzero flags byte). We also don't handle the case when that byte
				58	* sequence appears spuriously in the file. (Note that it would have to
				59	* occur spuriously within a normal chunk to be a problem.)
				60	*
				61	*
				62	* The imgdiff patch header looks like this:
				63	*
				64	* "IMGDIFF1" (8) [magic number and version]
				65	* chunk count (4)
				66	* for each chunk:
				67	* chunk type (4) [CHUNK_NORMAL or CHUNK_GZIP]
				68	* source start (8)
				69	* source len (8)
				70	* bsdiff patch offset (8) [from start of patch file]
				71	* if chunk type == CHUNK_GZIP:
				72	* source expanded len (8) [size of uncompressed source]
				73	* target expected len (8) [size of uncompressed target]
				74	* gzip level (4)
				75	* method (4)
				76	* windowBits (4)
				77	* memLevel (4)
				78	* strategy (4)
				79	* gzip header len (4)
				80	* gzip header (gzip header len)
				81	* gzip footer (8)
				82	*
				83	* All integers are little-endian. "source start" and "source len"
				84	* specify the section of the input image that comprises this chunk,
				85	* including the gzip header and footer for gzip chunks. "source
				86	* expanded len" is the size of the uncompressed source data. "target
				87	* expected len" is the size of the uncompressed data after applying
				88	* the bsdiff patch. The next five parameters specify the zlib
				89	* parameters to be used when compressing the patched data, and the
				90	* next three specify the header and footer to be wrapped around the
				91	* compressed data to create the output chunk (so that header contents
				92	* like the timestamp are recreated exactly).
				93	*
				94	* After the header there are 'chunk count' bsdiff patches; the offset
				95	* of each from the beginning of the file is specified in the header.
				96	*/
				97
				98	#include <errno.h>
				99	#include <stdio.h>
				100	#include <stdlib.h>
				101	#include <string.h>
				102	#include <sys/stat.h>
				103	#include <unistd.h>
				104
				105	#include "zlib.h"
				106	#include "imgdiff.h"
				107
				108	typedef struct {
				109	int type; // CHUNK_NORMAL or CHUNK_GZIP
				110	size_t start; // offset of chunk in original image file
				111
				112	size_t len;
				113	unsigned char* data; // data to be patched (ie, uncompressed, for
				114	// gzip chunks)
				115
				116	// everything else is for CHUNK_GZIP chunks only:
				117
				118	size_t gzip_header_len;
				119	unsigned char* gzip_header;
				120	unsigned char* gzip_footer;
				121
				122	// original (compressed) gzip data, including header and footer
				123	size_t gzip_len;
				124	unsigned char* gzip_data;
				125
				126	// deflate encoder parameters
				127	int level, method, windowBits, memLevel, strategy;
				128	} ImageChunk;
				129
				130	/*
				131	* Read the given file and break it up into chunks, putting the number
				132	* of chunks and their info in num_chunks and *chunks,
				133	* respectively. Returns a malloc'd block of memory containing the
				134	* contents of the file; various pointers in the output chunk array
				135	* will point into this block of memory. The caller should free the
				136	* return value when done with all the chunks. Returns NULL on
				137	* failure.
				138	*/
				139	unsigned char* ReadImage(const char* filename,
				140	int* num_chunks, ImageChunk** chunks) {
				141	struct stat st;
				142	if (stat(filename, &st) != 0) {
				143	fprintf(stderr, "failed to stat \"%s\": %s\n", filename, strerror(errno));
				144	return NULL;
				145	}
				146
				147	unsigned char* img = malloc(st.st_size + 4);
				148	FILE* f = fopen(filename, "rb");
				149	if (fread(img, 1, st.st_size, f) != st.st_size) {
				150	fprintf(stderr, "failed to read \"%s\" %s\n", filename, strerror(errno));
				151	fclose(f);
				152	return NULL;
				153	}
				154	fclose(f);
				155
				156	// append 4 zero bytes to the data so we can always search for the
				157	// four-byte string 1f8b0800 starting at any point in the actual
				158	// file data, without special-casing the end of the data.
				159	memset(img+st.st_size, 0, 4);
				160
				161	size_t pos = 0;
				162
				163	*num_chunks = 0;
				164	*chunks = NULL;
				165
				166	while (pos < st.st_size) {
				167	unsigned char* p = img+pos;
				168
				169	// Reallocate the list for every chunk; we expect the number of
				170	// chunks to be small (5 for typical boot and recovery images).
				171	++*num_chunks;
				172	chunks = realloc(chunks, num_chunks sizeof(ImageChunk));
				173	ImageChunk* curr = chunks + (num_chunks-1);
				174	curr->start = pos;
				175
				176	if (st.st_size - pos >= 4 &&
				177	p[0] == 0x1f && p[1] == 0x8b &&
				178	p[2] == 0x08 && // deflate compression
				179	p[3] == 0x00) { // no header flags
				180	// 'pos' is the offset of the start of a gzip chunk.
				181
				182	curr->type = CHUNK_GZIP;
				183	curr->gzip_header_len = GZIP_HEADER_LEN;
				184	curr->gzip_header = p;
				185
				186	// We must decompress this chunk in order to discover where it
				187	// ends, and so we can put the uncompressed data and its length
				188	// into curr->data and curr->len;
				189
				190	size_t allocated = 32768;
				191	curr->len = 0;
				192	curr->data = malloc(allocated);
				193	curr->gzip_data = p;
				194
				195	z_stream strm;
				196	strm.zalloc = Z_NULL;
				197	strm.zfree = Z_NULL;
				198	strm.opaque = Z_NULL;
				199	strm.avail_in = st.st_size - (pos + curr->gzip_header_len);
				200	strm.next_in = p + GZIP_HEADER_LEN;
				201
				202	// -15 means we are decoding a 'raw' deflate stream; zlib will
				203	// not expect zlib headers.
				204	int ret = inflateInit2(&strm, -15);
				205
				206	do {
				207	strm.avail_out = allocated - curr->len;
				208	strm.next_out = curr->data + curr->len;
				209	ret = inflate(&strm, Z_NO_FLUSH);
				210	curr->len = allocated - strm.avail_out;
				211	if (strm.avail_out == 0) {
				212	allocated *= 2;
				213	curr->data = realloc(curr->data, allocated);
				214	}
				215	} while (ret != Z_STREAM_END);
				216
				217	curr->gzip_len = st.st_size - strm.avail_in - pos + GZIP_FOOTER_LEN;
				218	pos = st.st_size - strm.avail_in;
				219	inflateEnd(&strm);
				220
				221	// consume the gzip footer.
				222	curr->gzip_footer = img+pos;
				223	pos += GZIP_FOOTER_LEN;
				224	p = img+pos;
				225
				226	// The footer (that we just skipped over) contains the size of
				227	// the uncompressed data. Double-check to make sure that it
				228	// matches the size of the data we got when we actually did
				229	// the decompression.
				230	size_t footer_size = p[-4] + (p[-3] << 8) + (p[-2] << 16) + (p[-1] << 24);
				231	if (footer_size != curr->len) {
				232	fprintf(stderr, "Error: footer size %d != decompressed size %d\n",
				233	footer_size, curr->len);
				234	free(img);
				235	return NULL;
				236	}
				237	} else {
				238	// 'pos' is not the offset of the start of a gzip chunk, so scan
				239	// forward until we find a gzip header.
				240	curr->type = CHUNK_NORMAL;
				241	curr->data = p;
				242
				243	for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) {
				244	if (p[curr->len] == 0x1f &&
				245	p[curr->len+1] == 0x8b &&
				246	p[curr->len+2] == 0x08 &&
				247	p[curr->len+3] == 0x00) {
				248	break;
				249	}
				250	}
				251	pos += curr->len;
				252	}
				253	}
				254
				255	return img;
				256	}
				257
				258	#define BUFFER_SIZE 32768
				259
				260	/*
				261	* Takes the uncompressed data stored in the chunk, compresses it
				262	* using the zlib parameters stored in the chunk, and checks that it
				263	* matches exactly the compressed data we started with (also stored in
				264	* the chunk). Return 0 on success.
				265	*/
				266	int TryReconstruction(ImageChunk* chunk, unsigned char* out) {
				267	size_t p = chunk->gzip_header_len;
				268
				269	z_stream strm;
				270	strm.zalloc = Z_NULL;
				271	strm.zfree = Z_NULL;
				272	strm.opaque = Z_NULL;
				273	strm.avail_in = chunk->len;
				274	strm.next_in = chunk->data;
				275	int ret;
				276	ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits,
				277	chunk->memLevel, chunk->strategy);
				278	do {
				279	strm.avail_out = BUFFER_SIZE;
				280	strm.next_out = out;
				281	ret = deflate(&strm, Z_FINISH);
				282	size_t have = BUFFER_SIZE - strm.avail_out;
				283
				284	if (memcmp(out, chunk->gzip_data+p, have) != 0) {
				285	// mismatch; data isn't the same.
				286	deflateEnd(&strm);
				287	return -1;
				288	}
				289	p += have;
				290	} while (ret != Z_STREAM_END);
				291	deflateEnd(&strm);
				292	if (p + GZIP_FOOTER_LEN != chunk->gzip_len) {
				293	// mismatch; ran out of data before we should have.
				294	return -1;
				295	}
				296	return 0;
				297	}
				298
				299	/*
				300	* Verify that we can reproduce exactly the same compressed data that
				301	* we started with. Sets the level, method, windowBits, memLevel, and
				302	* strategy fields in the chunk to the encoding parameters needed to
				303	* produce the right output. Returns 0 on success.
				304	*/
				305	int ReconstructGzipChunk(ImageChunk* chunk) {
				306	if (chunk->type != CHUNK_GZIP) {
				307	fprintf(stderr, "attempt to reconstruct non-gzip chunk\n");
				308	return -1;
				309	}
				310
				311	size_t p = 0;
				312	unsigned char* out = malloc(BUFFER_SIZE);
				313
				314	// We only check two combinations of encoder parameters: level 6
				315	// (the default) and level 9 (the maximum).
				316	for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) {
				317	chunk->windowBits = -15; // 32kb window; negative to indicate a raw stream.
				318	chunk->memLevel = 8; // the default value.
				319	chunk->method = Z_DEFLATED;
				320	chunk->strategy = Z_DEFAULT_STRATEGY;
				321
				322	if (TryReconstruction(chunk, out) == 0) {
				323	free(out);
				324	return 0;
				325	}
				326	}
				327
				328	free(out);
				329	return -1;
				330	}
				331
				332	/** Write a 4-byte value to f in little-endian order. */
				333	void Write4(int value, FILE* f) {
				334	fputc(value & 0xff, f);
				335	fputc((value >> 8) & 0xff, f);
				336	fputc((value >> 16) & 0xff, f);
				337	fputc((value >> 24) & 0xff, f);
				338	}
				339
				340	/** Write an 8-byte value to f in little-endian order. */
				341	void Write8(long long value, FILE* f) {
				342	fputc(value & 0xff, f);
				343	fputc((value >> 8) & 0xff, f);
				344	fputc((value >> 16) & 0xff, f);
				345	fputc((value >> 24) & 0xff, f);
				346	fputc((value >> 32) & 0xff, f);
				347	fputc((value >> 40) & 0xff, f);
				348	fputc((value >> 48) & 0xff, f);
				349	fputc((value >> 56) & 0xff, f);
				350	}
				351
				352
				353	/*
				354	* Given source and target chunks, compute a bsdiff patch between them
				355	* by running bsdiff in a subprocess. Return the patch data, placing
				356	* its length in *size. Return NULL on failure. We expect the bsdiff
				357	* program to be in the path.
				358	*/
				359	unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) {
				360	char stemp[] = "/tmp/imgdiff-src-XXXXXX";
				361	char ttemp[] = "/tmp/imgdiff-tgt-XXXXXX";
				362	char ptemp[] = "/tmp/imgdiff-patch-XXXXXX";
				363	mkstemp(stemp);
				364	mkstemp(ttemp);
				365	mkstemp(ptemp);
				366
				367	FILE* f = fopen(stemp, "wb");
				368	if (f == NULL) {
				369	fprintf(stderr, "failed to open src chunk %s: %s\n",
				370	stemp, strerror(errno));
				371	return NULL;
				372	}
				373	if (fwrite(src->data, 1, src->len, f) != src->len) {
				374	fprintf(stderr, "failed to write src chunk to %s: %s\n",
				375	stemp, strerror(errno));
				376	return NULL;
				377	}
				378	fclose(f);
				379
				380	f = fopen(ttemp, "wb");
				381	if (f == NULL) {
				382	fprintf(stderr, "failed to open tgt chunk %s: %s\n",
				383	ttemp, strerror(errno));
				384	return NULL;
				385	}
				386	if (fwrite(tgt->data, 1, tgt->len, f) != tgt->len) {
				387	fprintf(stderr, "failed to write tgt chunk to %s: %s\n",
				388	ttemp, strerror(errno));
				389	return NULL;
				390	}
				391	fclose(f);
				392
				393	char cmd[200];
				394	sprintf(cmd, "bsdiff %s %s %s", stemp, ttemp, ptemp);
				395	if (system(cmd) != 0) {
				396	fprintf(stderr, "failed to run bsdiff: %s\n", strerror(errno));
				397	return NULL;
				398	}
				399
				400	struct stat st;
				401	if (stat(ptemp, &st) != 0) {
				402	fprintf(stderr, "failed to stat patch file %s: %s\n",
				403	ptemp, strerror(errno));
				404	return NULL;
				405	}
				406
				407	unsigned char* data = malloc(st.st_size);
				408	*size = st.st_size;
				409
				410	f = fopen(ptemp, "rb");
				411	if (f == NULL) {
				412	fprintf(stderr, "failed to open patch %s: %s\n", ptemp, strerror(errno));
				413	return NULL;
				414	}
				415	if (fread(data, 1, st.st_size, f) != st.st_size) {
				416	fprintf(stderr, "failed to read patch %s: %s\n", ptemp, strerror(errno));
				417	return NULL;
				418	}
				419	fclose(f);
				420
				421	unlink(stemp);
				422	unlink(ttemp);
				423	unlink(ptemp);
				424
				425	return data;
				426	}
				427
				428	/*
				429	* Cause a gzip chunk to be treated as a normal chunk (ie, as a blob
				430	* of uninterpreted data). The resulting patch will likely be about
				431	* as big as the target file, but it lets us handle the case of images
				432	* where some gzip chunks are reconstructible but others aren't (by
				433	* treating the ones that aren't as normal chunks).
				434	*/
				435	void ChangeGzipChunkToNormal(ImageChunk* ch) {
				436	ch->type = CHUNK_NORMAL;
				437	free(ch->data);
				438	ch->data = ch->gzip_data;
				439	ch->len = ch->gzip_len;
				440	}
				441
				442	int main(int argc, char** argv) {
				443	if (argc != 4) {
				444	fprintf(stderr, "usage: %s <src-img> <tgt-img> <patch-file>\n", argv[0]);
				445	return 2;
				446	}
				447
				448	int num_src_chunks;
				449	ImageChunk* src_chunks;
				450	if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) {
				451	fprintf(stderr, "failed to break apart source image\n");
				452	return 1;
				453	}
				454
				455	int num_tgt_chunks;
				456	ImageChunk* tgt_chunks;
				457	if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) {
				458	fprintf(stderr, "failed to break apart target image\n");
				459	return 1;
				460	}
				461
				462	// Verify that the source and target images have the same chunk
				463	// structure (ie, the same sequence of gzip and normal chunks).
				464
				465	if (num_src_chunks != num_tgt_chunks) {
				466	fprintf(stderr, "source and target don't have same number of chunks!\n");
				467	return 1;
				468	}
				469	int i;
				470	for (i = 0; i < num_src_chunks; ++i) {
				471	if (src_chunks[i].type != tgt_chunks[i].type) {
				472	fprintf(stderr, "source and target don't have same chunk "
				473	"structure! (chunk %d)\n", i);
				474	return 1;
				475	}
				476	}
				477
				478	// Confirm that given the uncompressed chunk data in the target, we
				479	// can recompress it and get exactly the same bits as are in the
				480	// input target image. If this fails, treat the chunk as a normal
				481	// non-gzipped chunk.
				482
				483	for (i = 0; i < num_tgt_chunks; ++i) {
				484	if (tgt_chunks[i].type == CHUNK_GZIP) {
				485	if (ReconstructGzipChunk(tgt_chunks+i) < 0) {
				486	printf("failed to reconstruct target gzip chunk %d; "
				487	"treating as normal chunk\n", i);
				488	ChangeGzipChunkToNormal(tgt_chunks+i);
				489	ChangeGzipChunkToNormal(src_chunks+i);
				490	} else {
				491	printf("reconstructed target gzip chunk %d\n", i);
				492	}
				493	}
				494	}
				495
				496	// Compute bsdiff patches for each chunk's data (the uncompressed
				497	// data, in the case of gzip chunks).
				498
				499	unsigned char** patch_data = malloc(num_src_chunks * sizeof(unsigned char*));
				500	size_t* patch_size = malloc(num_src_chunks * sizeof(size_t));
				501	for (i = 0; i < num_src_chunks; ++i) {
				502	patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i);
				503	printf("patch %d is %d bytes (of %d)\n", i, patch_size[i],
				504	tgt_chunks[i].type == CHUNK_NORMAL ? tgt_chunks[i].len : tgt_chunks[i].gzip_len);
				505
				506	}
				507
				508	// Figure out how big the imgdiff file header is going to be, so
				509	// that we can correctly compute the offset of each bsdiff patch
				510	// within the file.
				511
				512	size_t total_header_size = 12;
				513	for (i = 0; i < num_src_chunks; ++i) {
				514	total_header_size += 4 + 8*3;
				515	if (src_chunks[i].type == CHUNK_GZIP) {
				516	total_header_size += 82 + 46 + tgt_chunks[i].gzip_header_len + 8;
				517	}
				518	}
				519
				520	size_t offset = total_header_size;
				521
				522	FILE* f = fopen(argv[3], "wb");
				523
				524	// Write out the headers.
				525
				526	fwrite("IMGDIFF1", 1, 8, f);
				527	Write4(num_src_chunks, f);
				528	for (i = 0; i < num_tgt_chunks; ++i) {
				529	Write4(tgt_chunks[i].type, f);
				530	Write8(src_chunks[i].start, f);
				531	Write8(src_chunks[i].type == CHUNK_NORMAL ? src_chunks[i].len :
				532	(src_chunks[i].gzip_len + src_chunks[i].gzip_header_len + 8), f);
				533	Write8(offset, f);
				534
				535	if (tgt_chunks[i].type == CHUNK_GZIP) {
				536	Write8(src_chunks[i].len, f);
				537	Write8(tgt_chunks[i].len, f);
				538	Write4(tgt_chunks[i].level, f);
				539	Write4(tgt_chunks[i].method, f);
				540	Write4(tgt_chunks[i].windowBits, f);
				541	Write4(tgt_chunks[i].memLevel, f);
				542	Write4(tgt_chunks[i].strategy, f);
				543	Write4(tgt_chunks[i].gzip_header_len, f);
				544	fwrite(tgt_chunks[i].gzip_header, 1, tgt_chunks[i].gzip_header_len, f);
				545	fwrite(tgt_chunks[i].gzip_footer, 1, GZIP_FOOTER_LEN, f);
				546	}
				547
				548	offset += patch_size[i];
				549	}
				550
				551	// Append each chunk's bsdiff patch, in order.
				552
				553	for (i = 0; i < num_tgt_chunks; ++i) {
				554	fwrite(patch_data[i], 1, patch_size[i], f);
				555	}
				556
				557	fclose(f);
				558
				559	return 0;
				560	}