imagedelta.c 25.3 KB
Newer Older
1
/*
2
 * Copyright (c) 2000-2015 University of Utah and the Flux Group.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * 
 * {{{EMULAB-LICENSE
 * 
 * This file is part of the Emulab network testbed software.
 * 
 * This file is free software: you can redistribute it and/or modify it
 * under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This file is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
 * License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this file.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * }}}
 */

Mike Hibler's avatar
Mike Hibler committed
24
//#define CHUNKIFY_DEBUG
25 26

/*
27
 * imagedelta [ -S -f ] image1.ndz image2.ndz delta1to2.ndz
28 29 30
 *
 * Take two images (image1, image2) and produce a delta (delta1to2)
 * based on the differences. The -S option says to use the signature
31 32 33 34 35 36 37 38
 * files: image1.ndz.sig and image2.ndz.sig, if possible to determine
 * differences between the images. Signature files will be rejected
 * unless they can be positively matched with the image (right now,
 * via the modtime!) Using -f will force it to use a questionable
 * signature file.
 *
 * Without signature files, we compare the corresponding areas of both
 * images to determine if they are different.
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
 *
 * Note that order matters here! We are generating a delta to get from
 * "image1" to "image2"; i.e., doing:
 *
 *  imageunzip image1.ndz /dev/da0
 *  imageunzip delta1to2.ndz /dev/da0
 *
 * would be identical to:
 *
 *  imageunzip image2.ndz /dev/da0
 *
 * Approach:
 *
 * We scan the chunks headers of both images (image1, image2) to produce
 * allocated range lists for both (R1, R2). We use these to produce a
 * range list for the delta (RD) as follows.
 *
 * - Anything that is in R1 but not R2 does not go in RD.
 * - Anything in R2 but not in R1 must go into RD.
 * - For overlapping areas, we read and hash or compare both and,
 *   if different, include in RD.
 * - Using RD, select data from image2 that need to be read, decompressed
 *   and then recompressed into the new image.
 *
 * There is the usual issue of dealing with the difference in granularity
 * and alignment of ranges (arbitrary multiples of 512 byte) vs. hash
 * blocks (64K byte), but that logic exists in imagezip today.
 */
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>
#include <zlib.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <errno.h>
#include <openssl/sha.h>
#include <openssl/md5.h>
#ifndef NOTHREADS
#include <pthread.h>
#endif

#include "imagehdr.h"
Mike Hibler's avatar
Mike Hibler committed
84
#include "imagehash.h"
85 86
#include "libndz/libndz.h"

Mike Hibler's avatar
Mike Hibler committed
87
struct fileinfo {
88
    struct ndz_file *ndz;
89
    char *sigfile;
90
    struct ndz_rangemap *map, *sigmap;
Mike Hibler's avatar
Mike Hibler committed
91
} ndz1, ndz2, delta;
92

Mike Hibler's avatar
Mike Hibler committed
93 94 95 96
static int usesigfiles = 0;
static int forcesig = 0;
static int debug = 0;
static int verify = 0;
97
static int clevel = 4;
98
static int fullsig = 0;
Mike Hibler's avatar
Mike Hibler committed
99 100 101
static int hashtype = HASH_TYPE_SHA1;
static int hashlen = 20;
static long hashblksize = HASHBLK_SIZE / 512;
102 103 104 105 106

void
usage(void)
{
    fprintf(stderr,
107
	    "Usage: imagedelta [-SVfd] [-b blksize] [-D hashfunc] [-z level] image1.ndz image2.ndz delta1to2.ndz\n"
108 109 110 111
	    "\n"
	    "Produce a delta image (delta1to2) containing the changes\n"
	    "necessary to get from image1 to image2.\n"
	    "\n"
Mike Hibler's avatar
Mike Hibler committed
112 113
	    "-S         Use signature files when computing differences.\n"
	    "-V         Verify consistency of image and signature.\n"
114
	    "-F         Generate a full-image signature for the delta.\n"
Mike Hibler's avatar
Mike Hibler committed
115 116 117
	    "-f         Force imagedelta to use a questionable sigfile.\n"
	    "-d         Enable debugging.\n"
	    "-D hfunc   Hash function to use (md5 or sha1).\n"
118 119 120
	    "-b blksize Size of hash blocks (512 <= size <= 32M).\n"
	    "-z level   Compression level (0 to 9).\n");

121 122 123
    exit(1);
}

Mike Hibler's avatar
Mike Hibler committed
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
/*
 * Iterator for ranges in the image map.
 * Validate that entries match up with those in the signature map.
 */
static int
verifyfunc(struct ndz_rangemap *imap, struct ndz_range *range, void *arg)
{
    struct ndz_range **smnext = arg;
    struct ndz_range *srange = *smnext;
    ndz_addr_t addr, eaddr;

    addr = range->start;
    eaddr = range->end;

    /*
     * Every image range should correspond to an integral number of
     * signature map entries.
     */
    while (addr <= eaddr && srange) {
	if (srange->start != addr || srange->end > eaddr) {
	    /*
	     * XXX argh! One anomaly is when an image region gets
	     * split across chunks, in which case it appears as distinct
	     * ranges in our image map. Here we look ahead behind to
	     * identify those cases...
	     */
	    if (srange->data) {
151
		struct ndz_hashdata *hd = (struct ndz_hashdata *)srange->data;
Mike Hibler's avatar
Mike Hibler committed
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197

		if (HASH_CHUNKDOESSPAN(hd->chunkno)) {
		    /*
		     * If starts line up then make sure following
		     * image map entry is contiguous with us. If so,
		     * assume this is the special case and return
		     * without incrementing the sigmap entry.
		     */
		    if (srange->start == addr && range->next &&
			range->next->start == range->end + 1) {
			*smnext = srange;
			return 0;
		    }
		    /*
		     * See if we are on the other side of the anomaly.
		     * Here the srange start will be before the image
		     * map range start and the previous image range
		     * should be contiguous with us. If so, advance to
		     * the next srange and continue.
		     */
		    if (addr == range->start &&
			srange->start < addr && srange->end <= eaddr &&
			ndz_rangemap_lookup(imap, addr-1, NULL) != NULL) {
			addr = srange->end + 1;
			srange = srange->next;
			continue;
		    }
		}
	    }
	    fprintf(stderr, "  *** [%lu-%lu]: bad sigentry [%lu-%lu]\n",
		    range->start, eaddr, srange->start, srange->end);
	    return 1;
	}
	addr = srange->end + 1;
	srange = srange->next;
    }
    if (addr <= eaddr) {
	fprintf(stderr, "  *** [%lu-%lu]: signature map too short!\n",
		range->start, range->end);
	return 1;
    }

    *smnext = srange;
    return 0;
}

198 199 200 201 202 203
/*
 * File must exist and be readable.
 * If usesigfiles is set, signature file must exist as well.
 * Reads in the range map and signature as well.
 */
void
Mike Hibler's avatar
Mike Hibler committed
204
openifile(char *file, struct fileinfo *info)
205
{
206
    int sigfd;
207 208 209 210 211 212 213 214 215

    info->ndz = ndz_open(file, 0);
    if (info->ndz == NULL) {
	fprintf(stderr, "%s: could not open as NDZ file\n",
		ndz_filename(info->ndz));
	exit(1);
    }

    if (usesigfiles) {
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	struct stat sb1, sb2;

	info->sigfile = malloc(strlen(file) + 5);
	assert(info->sigfile != NULL);
	strcpy(info->sigfile, file);
	strcat(info->sigfile, ".sig");
	sigfd = open(info->sigfile, 0);
	if (sigfd < 0) {
	    fprintf(stderr, "%s: could not find signature file %s\n",
		    file, info->sigfile);
	    exit(1);
	}
	if (fstat(info->ndz->fd, &sb1) < 0 || fstat(sigfd, &sb2) < 0) {
	    fprintf(stderr, "%s: could stat image or signature file\n", file);
	    exit(1);
	}
Mike Hibler's avatar
Mike Hibler committed
232 233 234
	if (!forcesig && labs(sb1.st_mtime - sb2.st_mtime) > 2) {
	    fprintf(stderr, "%s: image and signature disagree (%ld != %ld), "
		    "use -f to override.\n", file, sb1.st_mtime, sb2.st_mtime);
235 236 237
	    exit(1);
	}
	close(sigfd);
238
    }
239 240 241
}

void
Mike Hibler's avatar
Mike Hibler committed
242
openofile(char *file, struct fileinfo *info)
243
{
Mike Hibler's avatar
Mike Hibler committed
244 245
    int sigfd;

246 247 248 249 250
    if (strcmp(file, "-") == 0) {
	fprintf(stderr, "Cannot output to stdout yet\n");
	exit(1);
    }

Mike Hibler's avatar
Mike Hibler committed
251 252 253 254 255
    info->ndz = ndz_open(file, 1);
    if (info->ndz == NULL) {
	perror(file);
	exit(1);
    }
256

Mike Hibler's avatar
Mike Hibler committed
257 258 259 260 261 262
    info->sigfile = malloc(strlen(file) + 5);
    assert(info->sigfile != NULL);
    strcpy(info->sigfile, file);
    strcat(info->sigfile, ".sig");

    /* check early that we can write to the sigfile! */
263
    sigfd = open(info->sigfile, O_WRONLY|O_CREAT|O_TRUNC, 0666);
Mike Hibler's avatar
Mike Hibler committed
264 265 266 267 268 269
    if (sigfd < 0) {
	perror(info->sigfile);
	exit(1);
    }
    close(sigfd);
}
270

Mike Hibler's avatar
Mike Hibler committed
271 272 273
void
readifile(struct fileinfo *info)
{
274
    /* read range info from image */
275 276
    info->map = ndz_readranges(info->ndz);
    if (info->map == NULL) {
277 278 279 280 281 282 283
	fprintf(stderr, "%s: could not read ranges\n",
		ndz_filename(info->ndz));
	exit(1);
    }

    /* read signature info */
    if (usesigfiles) {
284 285 286 287 288 289
	info->sigmap = ndz_readhashinfo(info->ndz, info->sigfile);
	if (info->sigmap == NULL) {
	    fprintf(stderr, "%s: could not read signature info\n",
		    ndz_filename(info->ndz));
	    exit(1);
	}
Mike Hibler's avatar
Mike Hibler committed
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
	if (verify) {
	    struct ndz_range *next = ndz_rangemap_first(info->sigmap);
	    int rv;

	    /*
	     * Perform a sanity check, ensuring that ranges in the image
	     * map exactly correspond to those in the signature.
	     */
	    rv = ndz_rangemap_iterate(info->map, verifyfunc, &next);
	    if (rv != 0 || next != NULL) {
		if (rv == 0)
		    fprintf(stderr,
			    "  *** image map too short at sig [%lu-%lu]\n",
			    next->start, next->end);
		fprintf(stderr, "%s: error while validating range/hash maps\n",
			ndz_filename(info->ndz));
Mike Hibler's avatar
Mike Hibler committed
306
#if 0
Mike Hibler's avatar
Mike Hibler committed
307 308 309 310 311 312 313 314 315
		printf("==== Image ");
		ndz_rangemap_dump(info->map, (debug==0), NULL);
		printf("==== Hash ");
		ndz_hashmap_dump(info->sigmap, (debug==0));
		fflush(stdout);
#endif
		exit(1);
	    }
	}
316 317
    } else
	info->sigmap = NULL;
318 319
}

320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
struct chunkstate {
    ndz_chunk_t chunkobj;
    ndz_chunkno_t chunkno;
    unsigned char *chunkdatabuf;
    blockhdr_t *header;
    struct region *region;
    struct region *curregion;
};

static int
initnewchunk(struct chunkstate *cstate)
{
    struct blockhdr_V2 *hdr;

    cstate->chunkobj = ndz_chunk_create(delta.ndz, cstate->chunkno, clevel);
    if (cstate->chunkobj == NULL) {
	fprintf(stderr, "Error creating chunk %u\n", cstate->chunkno);
	return 1;
    }
    cstate->header = ndz_chunk_header(cstate->chunkobj);

    /*
     * XXX we still do V3 (actually V2 format) headers.
     * We still don't really support V4 yet...
     */
    hdr = (struct blockhdr_V2 *)cstate->header;
    hdr->magic = COMPRESSED_V3;
    hdr->size = 0;
    hdr->blockindex = cstate->chunkno;
    hdr->regionsize = DEFAULTREGIONSIZE;
    hdr->regioncount = 0;
    hdr->firstsect = 0;
    hdr->lastsect = 0;
    hdr->reloccount = 0;

    cstate->region = (struct region *)(hdr + 1);
    cstate->curregion = cstate->region;

    return 0;
}

Mike Hibler's avatar
Mike Hibler committed
361 362
/*
 * Iterator for ranges in the delta map.
363 364 365 366 367 368 369 370 371 372
 *
 * Read and chunkify the data from the full image to produce the delta.
 *
 * If we have a signature file for the "source" full image, then we don't
 * need to do any hashing as the delta signature will be identical
 * (signatures always cover the entire image).
 *
 * If we have no signature, or we are changing the hash block size or
 * hash algorithm, or we just want to validate the original signature,
 * then we hash the data as we go.
Mike Hibler's avatar
Mike Hibler committed
373 374 375
 */
static int
chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg)
376
{
377
    struct chunkstate *cstate = arg;
Mike Hibler's avatar
Mike Hibler committed
378 379
    ndz_addr_t rstart = range->start;
    ndz_size_t rsize = range->end + 1 - rstart, sc;
380 381
    uint32_t roffset, hstart, hsize;
    size_t hbytes;
382
    ssize_t cc;
383
    struct ndz_hashdata *hdata;
Mike Hibler's avatar
Mike Hibler committed
384 385 386 387 388 389
    struct ndz_range *hrange;

#ifdef CHUNKIFY_DEBUG
    fprintf(stderr, "chunkify [%lu-%lu]:\n", range->start, range->end);
#endif

390 391 392 393
    /*
     * First call. Initialize the state we are going to carry through
     * with us via the iterator argument.
     */
394 395 396
    if (cstate->chunkobj == NULL) {
	cstate->chunkdatabuf = malloc(hashblksize * delta.ndz->sectsize);
	if (cstate->chunkdatabuf == NULL) {
Mike Hibler's avatar
Mike Hibler committed
397 398 399
	    fprintf(stderr, "could not initialize chunkify data structs\n");
	    return 1;
	}
400 401 402 403 404
	cstate->chunkno = 0;
	if (initnewchunk(cstate) != 0)
	    return 1;
	cstate->header->firstsect = rstart;
	cstate->curregion->start = rstart;
Mike Hibler's avatar
Mike Hibler committed
405 406
    }

407 408 409 410 411 412 413 414 415 416 417 418 419 420
    /*
     * Process the range in units of the hash blocksize.
     *
     * We always break image data ranges at hash blocksize boundaries but
     * note that data ranges and hash block ranges don't necessarily align.
     * A data range might span multiple hash ranges or there might be
     * multiple data ranges in the same hash range. In the latter case,
     * we could simplify things by joining data ranges within the same hash
     * range with zero-filled extra blocks so that we always had full hash
     * ranges, but that would make images larger and result in writing
     * extra data we don't have to when the image is deployed. Instead,
     * we just create small hash ranges covering only the data itself.
     */
    roffset = rstart % hashblksize;
Mike Hibler's avatar
Mike Hibler committed
421
    while (rsize > 0) {
422 423 424 425 426 427 428
	uint32_t pstart, psize;
	int spanschunk;
	size_t bufoff;

	hstart = rstart;
	if (roffset) {
	    hsize = hashblksize - roffset;
Mike Hibler's avatar
Mike Hibler committed
429 430
	    if (hsize > rsize)
		hsize = rsize;
431
	    roffset = 0;
Mike Hibler's avatar
Mike Hibler committed
432 433 434 435 436
	} else if (rsize > hashblksize)
	    hsize = hashblksize;
	else
	    hsize = rsize;
#ifdef CHUNKIFY_DEBUG
437
	fprintf(stderr, "  [%u-%u]: ", hstart, hstart + hsize - 1);
Mike Hibler's avatar
Mike Hibler committed
438 439 440
#endif

	/* XXX read/decompress data range */
441
	sc = ndz_readdata(ndz2.ndz, cstate->chunkdatabuf, hsize, hstart);
442 443 444 445 446
	if (sc != hsize) {
	    fprintf(stderr, "%s: unexpected read return %ld (instead of %u)\n",
		    ndz_filename(ndz2.ndz), (long)sc, hsize);
	    return 1;
	}
Mike Hibler's avatar
Mike Hibler committed
447 448

	/*
449
	 * Fetch or compute the hash value.
Mike Hibler's avatar
Mike Hibler committed
450
	 */
451 452 453 454 455 456
	assert(delta.ndz->hashcurentry < delta.ndz->hashentries);
	hdata = &delta.ndz->hashdata[delta.ndz->hashcurentry++];
	hdata->hashlen = hashlen;

	hbytes = hsize * delta.ndz->sectsize;
	hrange = ndz_rangemap_lookup(ndz2.sigmap, hstart, NULL);
Mike Hibler's avatar
Mike Hibler committed
457
	if (hrange && hrange->data &&
458 459 460
	    hrange->start == hstart && hrange->end == hstart + hsize - 1) {
	    struct ndz_hashdata *hd = (struct ndz_hashdata *)hrange->data;
	    memcpy(hdata->hash, hd->hash, hashlen);
Mike Hibler's avatar
Mike Hibler committed
461
#ifdef CHUNKIFY_DEBUG
462 463
	    fprintf(stderr, " found hash=%s\n",
		    ndz_hash_dump(hdata->hash, hashlen));
464
#endif
Mike Hibler's avatar
Mike Hibler committed
465
#if 0
466
	    /* sanity check */
467 468 469 470
	    {
		unsigned char hbuf[HASH_MAXSIZE];

		ndz_hash_data(delta.ndz, cstate->chunkdatabuf, hbytes, hbuf);
471
#ifdef CHUNKIFY_DEBUG
472 473
		fprintf(stderr, "    computed hash=%s\n",
			ndz_hash_dump(hbuf, hashlen));
474
#endif
475 476 477 478
		if (memcmp(hdata->hash, hbuf, hashlen)) {
		    fprintf(stderr, "*** [%u-%u]: hash does not compare!\n",
			    hstart, hstart + hsize - 1);
		}
479
	    }
Mike Hibler's avatar
Mike Hibler committed
480 481
#endif
	} else {
482 483
	    ndz_hash_data(delta.ndz, cstate->chunkdatabuf, hbytes,
			  hdata->hash);
Mike Hibler's avatar
Mike Hibler committed
484 485 486 487 488
#ifdef CHUNKIFY_DEBUG
	    fprintf(stderr, " no hash found\n");
#endif
	}

489
	/*
490
	 * At this point we have a range of data ([hstart - hstart+hsize-1])
491 492 493 494 495
	 * of a specific size (hsize) which we have hashed (hdata->hash).
	 * Now we compress and write it out to the new image file. This is
	 * complicated significantly by the fact that it might not all fit
	 * in the current chunk. If there is not enough room for this range
	 * in the current chunk, we split it and write what we can.
496 497 498 499 500 501 502 503
	 *
	 * This is complicated even further by our conservative algorithm
	 * for filling chunks, which is basically: if the amount of
	 * uncompressed data exceeds the amount of space left for the
	 * compressed data (plus a little slop in case it expands instead),
	 * then we stop. This is an iterative process since, most likely,
	 * the compressed data will be significantly smaller than the
	 * uncompressed data.
504
	 */
505 506 507 508 509 510 511 512 513 514 515
	bufoff = 0;
	spanschunk = 0;
	pstart = hstart;
	psize = hsize;
	while (psize > 0) {
	    uint32_t wsize;
	    size_t wbytes, chunkremaining;

	    chunkremaining = ndz_chunk_left(cstate->chunkobj);
	    if (chunkremaining < delta.ndz->sectsize) {
		/* switch to new chunk */
516
#ifdef CHUNKIFY_DEBUG
517 518 519
		fprintf(stderr, "    chunk %u full (%lu bytes), writing...\n",
			cstate->chunkno,
			(unsigned long)ndz_chunk_datasize(cstate->chunkobj));
520
#endif
Mike Hibler's avatar
Mike Hibler committed
521

522 523 524 525 526 527 528 529 530
		/* finalize the header */
		cstate->header->size = ndz_chunk_datasize(cstate->chunkobj);
		cstate->header->regioncount =
		    (cstate->curregion - cstate->region + 1);
		/* XXX should always be zero */
		if (cstate->chunkno == 0)
		    cstate->header->firstsect = 0;
		cstate->header->lastsect = pstart;

Mike Hibler's avatar
Mike Hibler committed
531 532 533 534 535 536 537 538 539 540 541 542
		/* include any relocations */
		if (ndz2.ndz->relocmap) {
		    void *buf = (cstate->curregion + 1);
		    delta.ndz->relocmap = ndz2.ndz->relocmap; /* XXX */
		    if (ndz_reloc_put(delta.ndz, cstate->header, buf) != 0) {
			delta.ndz->relocmap = NULL; /* XXX */
			fprintf(stderr, "Error writing relocation info\n");
			return 1;
		    }
		    delta.ndz->relocmap = NULL; /* XXX */
		}

543 544 545 546 547 548 549 550 551 552 553
		/* and write it */
		if (ndz_chunk_flush(cstate->chunkobj, 1) != 0) {
		    fprintf(stderr, "Error writing compressed data\n");
		    return 1;
		}

		cstate->chunkno++;
		if (initnewchunk(cstate) != 0)
		    return 1;
		cstate->header->firstsect = pstart;
		cstate->curregion->start = pstart;
Mike Hibler's avatar
Mike Hibler committed
554

555 556 557 558 559 560
		/* keep track if this hash range spans chunks */
		if (psize < hsize)
		    spanschunk++;

		chunkremaining = ndz_chunk_left(cstate->chunkobj);
		assert(psize <= chunkremaining / delta.ndz->sectsize);
561
	    }
Mike Hibler's avatar
Mike Hibler committed
562

563 564 565 566 567 568 569 570
	    /* write up to chunkremaining (truncated to sectorsize) bytes */
	    wsize = psize;
	    wbytes = wsize * delta.ndz->sectsize;
	    if (wbytes > chunkremaining) {
		wsize = (chunkremaining / delta.ndz->sectsize);
		wbytes = wsize * delta.ndz->sectsize;
	    }
	    assert(wsize > 0);
571 572

#ifdef CHUNKIFY_DEBUG
573 574 575 576
	    fprintf(stderr, "    appending %u sectors to chunk %u "
		    "(%ld bytes available)\n",
		    wsize, cstate->chunkno,
		    ndz_chunk_left(cstate->chunkobj));
577
#endif
578 579 580 581 582 583 584
	    cc = ndz_chunk_append(cstate->chunkobj,
				  cstate->chunkdatabuf + bufoff, wbytes);
	    if (cc < 0) {
		fprintf(stderr, "Error compressing data\n");
		return 1;
	    }
	    assert(cc == wbytes);
585

586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
	    /* append to the current region or create a new one */
	    if (cstate->curregion->start + cstate->curregion->size == pstart) {
		cstate->curregion->size += wsize;
#ifdef CHUNKIFY_DEBUG
		fprintf(stderr, "    adjust range entry to [%u-%u]\n",
			cstate->curregion->start,
			cstate->curregion->start+cstate->curregion->size-1);
#endif
	    } else {
		cstate->curregion++;
		cstate->curregion->start = pstart;
		cstate->curregion->size = wsize;
#ifdef CHUNKIFY_DEBUG
		fprintf(stderr, "    new range entry [%u-%u]\n",
			cstate->curregion->start,
			cstate->curregion->start+cstate->curregion->size-1);
#endif
	    }
Mike Hibler's avatar
Mike Hibler committed
604

605 606 607 608 609 610
	    bufoff += wbytes;
	    pstart += wsize;
	    psize -= wsize;
	    chunkremaining = ndz_chunk_left(cstate->chunkobj);
	}
 
Mike Hibler's avatar
Mike Hibler committed
611
	/*
612 613
	 * At this point we have written out the entire hash range.
	 * Add it to the hash map, recording the chunk(s) that it belongs to.
Mike Hibler's avatar
Mike Hibler committed
614
	 */
615 616 617 618 619 620 621 622 623 624 625 626 627 628
	if (spanschunk)
	    hdata->chunkno = HASH_CHUNKSETSPAN(cstate->chunkno-1);
	else
	    hdata->chunkno = cstate->chunkno;
#ifdef CHUNKIFY_DEBUG
	fprintf(stderr, "    write hash entry [%u-%u], chunk %u",
		hstart, hstart + hsize - 1, HASH_CHUNKNO(hdata->chunkno));
	if (HASH_CHUNKDOESSPAN(hdata->chunkno))
	    fprintf(stderr, "-%u", HASH_CHUNKNO(hdata->chunkno) + 1);
	fprintf(stderr, "\n");
#endif
	cc = ndz_rangemap_alloc(delta.sigmap, hstart, hsize, (void *)hdata);
	if (cc) {
	    fprintf(stderr, "Could not add hashmap entry\n");
629
	    return 1;
Mike Hibler's avatar
Mike Hibler committed
630 631 632 633 634 635
	}

	rstart += hsize;
	rsize -= hsize;
    }

636 637 638 639 640
    /*
     * If this is the last range, we have to flush the final chunk.
     */
    if (range->next == NULL) {
#ifdef CHUNKIFY_DEBUG
641 642 643
	fprintf(stderr, "    final chunk %u done (%lu bytes)\n",
		cstate->chunkno,
		(unsigned long)ndz_chunk_datasize(cstate->chunkobj));
644 645 646 647 648
#endif

	/* finalize the header */
	cstate->header->size = ndz_chunk_datasize(cstate->chunkobj);
	cstate->header->regioncount = (cstate->curregion - cstate->region + 1);
Mike Hibler's avatar
Mike Hibler committed
649 650 651
	/* XXX should always be zero */
	if (cstate->chunkno == 0)
	    cstate->header->firstsect = 0;
652
	cstate->header->lastsect = delta.ndz->maphi;
653

Mike Hibler's avatar
Mike Hibler committed
654 655 656 657 658 659 660 661 662 663 664 665
	/* include any relocations */
	if (ndz2.ndz->relocmap) {
	    void *buf = (cstate->curregion + 1);
	    delta.ndz->relocmap = ndz2.ndz->relocmap; /* XXX */
	    if (ndz_reloc_put(delta.ndz, cstate->header, buf) != 0) {
		delta.ndz->relocmap = NULL; /* XXX */
		fprintf(stderr, "Error writing relocation info\n");
		return 1;
	    }
	    delta.ndz->relocmap = NULL; /* XXX */
	}

666 667 668 669 670 671 672 673 674 675 676 677
	/* and write it */
	if (ndz_chunk_flush(cstate->chunkobj, 1) != 0) {
	    fprintf(stderr, "Error writing compressed data\n");
	    return 1;
	}

	free(cstate->chunkdatabuf);

	/* XXX for debugging */
	memset(cstate, 0, sizeof(*cstate));
    }

Mike Hibler's avatar
Mike Hibler committed
678
    return 0;
679 680 681
}

static void
Mike Hibler's avatar
Mike Hibler committed
682
chunkfunc(struct ndz_rangemap *map, void *ptr)
683
{
Mike Hibler's avatar
Mike Hibler committed
684 685
    unsigned int chunkno = (int)ptr;
    printf("chunkno=%u", chunkno);
686 687 688 689 690
}

int
main(int argc, char **argv)
{
Mike Hibler's avatar
Mike Hibler committed
691
    int ch;
692

693
    while ((ch = getopt(argc, argv, "SfdVb:D:F")) != -1)
694 695 696 697
	switch(ch) {
	case 'S':
	    usesigfiles = 1;
	    break;
Mike Hibler's avatar
Mike Hibler committed
698 699 700 701 702 703 704
	case 'b':
	    hashblksize = atol(optarg);
	    if (hashblksize < 512 || hashblksize > (32*1024*1024) ||
		(hashblksize & 511) != 0) {
		fprintf(stderr, "Invalid hash block size\n");
		usage();
	    }
705
	    hashblksize /= 512;
Mike Hibler's avatar
Mike Hibler committed
706 707 708 709 710 711 712 713 714 715 716 717
	    break;
	case 'D':
	    if (strcmp(optarg, "md5") == 0)
		hashtype = HASH_TYPE_MD5;
	    else if (strcmp(optarg, "sha1") == 0)
		hashtype = HASH_TYPE_SHA1;
	    else {
		fprintf(stderr, "Invalid digest type `%s'\n",
			optarg);
		usage();
	    }
	    break;
718 719 720
	case 'f':
	    forcesig = 1;
	    break;
Mike Hibler's avatar
Mike Hibler committed
721 722 723 724 725
	case 'V':
	    verify = 1;
	    break;
	case 'd':
	    debug++;
726
	    break;
727 728 729 730 731 732 733
	case 'z':
	    clevel = atoi(optarg);
	    if (clevel < 0 || clevel > 9) {
		fprintf(stderr, "Invalid compression level\n");
		usage();
	    }
	    break;
734 735 736
	case 'F':
	    fullsig = 1;
	    break;
737 738 739 740 741 742 743 744 745 746 747 748 749 750
	case 'h':
	case '?':
	default:
	    usage();
	}
    argc -= optind;
    argv += optind;

    if (argc < 3)
	usage();

    /*
     * Make sure we can open all the files
     */
Mike Hibler's avatar
Mike Hibler committed
751 752 753
    openifile(argv[0], &ndz1);
    openifile(argv[1], &ndz2);
    openofile(argv[2], &delta);
754 755 756 757

    /*
     * Read in the range and signature info.
     */
Mike Hibler's avatar
Mike Hibler committed
758 759 760
    readifile(&ndz1);
    readifile(&ndz2);

Mike Hibler's avatar
Mike Hibler committed
761
#if 0
Mike Hibler's avatar
Mike Hibler committed
762 763 764 765 766 767 768
    printf("==== Old range ");
    ndz_rangemap_dump(ndz1.map, (debug==0), chunkfunc);
    printf("==== Old hash ");
    ndz_hashmap_dump(ndz1.sigmap, (debug==0));
    printf("==== New range ");
    ndz_rangemap_dump(ndz2.map, (debug==0), chunkfunc);
    printf("==== New hash ");
769
    ndz_hashmap_dump(ndz2.sigmap, (debug==0));
770 771 772 773
    fflush(stdout);
#endif

    /*
Mike Hibler's avatar
Mike Hibler committed
774 775
     * Compute a delta map from the image signature maps.
     * First make sure the hash block size and function are consistent.
776
     */
Mike Hibler's avatar
Mike Hibler committed
777 778 779 780 781 782 783 784
    if (usesigfiles) {
	if (ndz1.ndz->hashtype != ndz2.ndz->hashtype ||
	    ndz1.ndz->hashblksize != ndz2.ndz->hashblksize) {
	    fprintf(stderr, "Incomparible signature files for %s (%u/%u) and %s (%u/%u)\n",
		    argv[0], ndz1.ndz->hashtype, ndz1.ndz->hashblksize,
		    argv[1], ndz2.ndz->hashtype, ndz2.ndz->hashblksize);
	    exit(1);
	}
Mike Hibler's avatar
Mike Hibler committed
785
	delta.map = ndz_compute_delta_sigmap(ndz1.ndz, ndz2.ndz);
Mike Hibler's avatar
Mike Hibler committed
786 787 788 789 790
	if (delta.map == NULL) {
	    fprintf(stderr, "Could not compute delta for %s and %s\n",
		    argv[0], argv[1]);
	    exit(1);
	}
791 792 793 794 795 796 797

	/*
	 * Delta map has same range as full image.
	 * XXX doesn't belong here.
	 */
	delta.ndz->maplo = ndz2.ndz->maplo;
	delta.ndz->maphi = ndz2.ndz->maphi;
Mike Hibler's avatar
Mike Hibler committed
798
#if 0
Mike Hibler's avatar
Mike Hibler committed
799 800 801 802 803 804 805
	printf("==== Delta hash ");
	ndz_hashmap_dump(delta.map, (debug==0));
	printf("==== Old hashmap stats:");
	ndz_rangemap_dumpstats(ndz1.sigmap);
	printf("==== New hashmap stats:");
	ndz_rangemap_dumpstats(ndz2.sigmap);
	fflush(stdout);
806
#endif
Mike Hibler's avatar
Mike Hibler committed
807
    }
808 809 810 811
    /*
     * Compute a delta map from the images themselves.
     * Same deal, but construct the delta map from the ranges maps.
     */
Mike Hibler's avatar
Mike Hibler committed
812
    else {
Mike Hibler's avatar
Mike Hibler committed
813
	delta.map = ndz_compute_delta(ndz1.ndz, ndz2.ndz);
814 815 816 817 818 819 820 821 822 823 824 825 826
	if (delta.map == NULL) {
	    fprintf(stderr, "Could not compute delta for %s and %s\n",
		    argv[0], argv[1]);
	    exit(1);
	}

	/*
	 * Delta map has same range as full image.
	 * XXX doesn't belong here.
	 */
	delta.ndz->maplo = ndz2.ndz->maplo;
	delta.ndz->maphi = ndz2.ndz->maphi;

Mike Hibler's avatar
Mike Hibler committed
827
#if 0
828 829 830 831 832 833 834 835
	printf("==== Delta map ");
	ndz_hashmap_dump(delta.map, (debug==0));
	printf("==== Old map stats:");
	ndz_rangemap_dumpstats(ndz1.map);
	printf("==== New map stats:");
	ndz_rangemap_dumpstats(ndz2.map);
	fflush(stdout);
#endif
Mike Hibler's avatar
Mike Hibler committed
836 837
	exit(2);
    }
838

839
    /*
Mike Hibler's avatar
Mike Hibler committed
840
     * Done with the old file.
841
     */
Mike Hibler's avatar
Mike Hibler committed
842 843 844 845
    ndz_close(ndz1.ndz);
    ndz1.sigmap = NULL;
    ndz1.map = NULL;
    ndz1.ndz = NULL;
846

Mike Hibler's avatar
Mike Hibler committed
847 848 849 850 851 852 853 854 855
    /*
     * Iterate through the produced map hashing (if necessary) and
     * chunking the data.
     */
    delta.sigmap = ndz_rangemap_init(NDZ_LOADDR, NDZ_HIADDR-NDZ_LOADDR);
    if (delta.sigmap == NULL) {
	fprintf(stderr, "%s: could not create signature map for delta image\n",
		argv[2]);
	exit(1);
856
    }
857

858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
    /*
     * Initialize signature file info for delta map.
     * XXX doesn't belong here.
     */
    delta.ndz->hashmap = delta.sigmap;
    delta.ndz->hashdata = calloc(ndz2.ndz->hashentries,
				 sizeof(struct ndz_hashdata));
    if (delta.ndz->hashdata == NULL) {
	fprintf(stderr, "%s: could not allocate hashdata for delta image\n",
		argv[2]);
	exit(1);
    }
    delta.ndz->hashtype = hashtype;
    delta.ndz->hashblksize = hashblksize;
    delta.ndz->hashentries = ndz2.ndz->hashentries;
    delta.ndz->hashcurentry = 0;

875 876 877 878 879 880 881 882 883 884 885 886 887
    /*
     * If there is anything in the resulting delta, produce an image!
     */
    if (ndz_rangemap_first(delta.map) != NULL) {
	struct chunkstate *cstate = calloc(1, sizeof(*cstate));
	assert(cstate != NULL);

	if (ndz_rangemap_iterate(delta.map, chunkify, cstate) != 0) {
	    fprintf(stderr, "%s: error while creating new delta image\n",
		    argv[2]);
	    exit(1);
	}
	free(cstate);
888

889
	/* readjust to reflect the actual number of hash entries */
Mike Hibler's avatar
Mike Hibler committed
890
	delta.ndz->hashentries = delta.ndz->hashcurentry;
891

892
	/* write the new sigfile */
893 894
	if (ndz_writehashinfo(fullsig ? ndz2.ndz : delta.ndz,
			      delta.sigfile, argv[2]) != 0) {
895 896 897 898
	    fprintf(stderr, "%s: could not write signature file %s\n",
		    argv[2], delta.sigfile);
	}
	
899 900 901
	ndz_close(ndz2.ndz);
	ndz_close(delta.ndz);
    } else {
Mike Hibler's avatar
Mike Hibler committed
902 903
	fprintf(stderr, "Images %s and %s are identical, no delta produced!\n",
		argv[0], argv[1]);
904 905 906 907
	ndz_close(ndz2.ndz);
	ndz_close(delta.ndz);
	unlink(argv[2]);
	unlink(delta.sigfile);
908 909 910 911 912 913 914 915 916 917 918 919
    }

    return 0;
}

/*
 * Local variables:
 * mode: C
 * c-set-style: "BSD"
 * c-basic-offset: 4
 * End:
 */