/* * Copyright (c) 2000-2015 University of Utah and the Flux Group. * * {{{EMULAB-LICENSE * * This file is part of the Emulab network testbed software. * * This file is free software: you can redistribute it and/or modify it * under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or (at * your option) any later version. * * This file is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public * License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this file. If not, see . * * }}} */ //#define CHUNKIFY_DEBUG /* * imagerezip [-S] Oimage.ndz Nimage.ndz * * Read the data of the old image (Oimage.ndz) and create a new image * (Nimage.ndz) from it. With -S, it will create a new signature file * as well. * * Currently this is just a testing tool for the new libraries. At some * point it might be used to rechunk or rehash existing images, or compress * them with a different level. */ #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NOTHREADS #include #endif #include "imagehdr.h" #include "imagehash.h" #include "libndz/libndz.h" struct fileinfo { struct ndz_file *ndz; char *sigfile; struct ndz_rangemap *map, *sigmap; } old, new; static int gensigfile = 0; static int forcesig = 0; static int debug = 0; static int verify = 0; static int sanitycheck = 1; static int clevel = 4; static int hashtype = HASH_TYPE_SHA1; static int hashlen = 20; static long hashblksize = -1; void usage(void) { fprintf(stderr, "Usage: imagerezip [-SVfd] [-b blksize] [-D hashfunc] [-z level] Oimage1.ndz Nimage.ndz\n" "\n" "Produce a new image that is a copy of the old image.\n" "\n" "-S Generate a new signature.\n" "-V Verify consistency of image and signature.\n" "-f Force imagedelta to use a questionable sigfile.\n" "-d Enable debugging.\n" "-D hfunc Hash function to use (md5 or sha1).\n" "-b blksize Size of hash blocks (512 <= size <= 32M).\n" "-z level Compression level (0 to 9).\n"); exit(1); } /* * Iterator for ranges in the image map. * Validate that entries match up with those in the signature map. */ static int verifyfunc(struct ndz_rangemap *imap, struct ndz_range *range, void *arg) { struct ndz_range **smnext = arg; struct ndz_range *srange = *smnext; ndz_addr_t addr, eaddr; addr = range->start; eaddr = range->end; /* * Every image range should correspond to an integral number of * signature map entries. */ while (addr <= eaddr && srange) { if (srange->start != addr || srange->end > eaddr) { /* * XXX argh! One anomaly is when an image region gets * split across chunks, in which case it appears as distinct * ranges in our image map. Here we look ahead behind to * identify those cases... */ if (srange->data) { struct ndz_hashdata *hd = (struct ndz_hashdata *)srange->data; if (HASH_CHUNKDOESSPAN(hd->chunkno)) { /* * If starts line up then make sure following * image map entry is contiguous with us. If so, * assume this is the special case and return * without incrementing the sigmap entry. */ if (srange->start == addr && range->next && range->next->start == range->end + 1) { *smnext = srange; return 0; } /* * See if we are on the other side of the anomaly. * Here the srange start will be before the image * map range start and the previous image range * should be contiguous with us. If so, advance to * the next srange and continue. */ if (addr == range->start && srange->start < addr && srange->end <= eaddr && ndz_rangemap_lookup(imap, addr-1, NULL) != NULL) { addr = srange->end + 1; srange = srange->next; continue; } } } fprintf(stderr, " *** [%lu-%lu]: bad sigentry [%lu-%lu]\n", range->start, eaddr, srange->start, srange->end); return 1; } addr = srange->end + 1; srange = srange->next; } if (addr <= eaddr) { fprintf(stderr, " *** [%lu-%lu]: signature map too short!\n", range->start, range->end); return 1; } *smnext = srange; return 0; } /* * File must exist and be readable. * If verify is set, signature file must exist as well. * Reads in the range map and signature as well. */ void openifile(char *file, struct fileinfo *info) { int sigfd; info->ndz = ndz_open(file, 0); if (info->ndz == NULL) { fprintf(stderr, "%s: could not open as NDZ file\n", ndz_filename(info->ndz)); exit(1); } if (verify) { struct stat sb1, sb2; info->sigfile = malloc(strlen(file) + 5); assert(info->sigfile != NULL); strcpy(info->sigfile, file); strcat(info->sigfile, ".sig"); sigfd = open(info->sigfile, 0); if (sigfd < 0) { fprintf(stderr, "%s: could not find signature file %s\n", file, info->sigfile); exit(1); } if (fstat(info->ndz->fd, &sb1) < 0 || fstat(sigfd, &sb2) < 0) { fprintf(stderr, "%s: could stat image or signature file\n", file); exit(1); } if (!forcesig && labs(sb1.st_mtime - sb2.st_mtime) > 2) { fprintf(stderr, "%s: image and signature disagree (%ld != %ld), " "use -f to override.\n", file, sb1.st_mtime, sb2.st_mtime); exit(1); } close(sigfd); } } void openofile(char *file, struct fileinfo *info) { int sigfd; if (strcmp(file, "-") == 0) { fprintf(stderr, "Cannot output to stdout yet\n"); exit(1); } info->ndz = ndz_open(file, 1); if (info->ndz == NULL) { perror(file); exit(1); } info->sigfile = malloc(strlen(file) + 5); assert(info->sigfile != NULL); strcpy(info->sigfile, file); strcat(info->sigfile, ".sig"); /* check early that we can write to the sigfile! */ sigfd = open(info->sigfile, O_WRONLY|O_CREAT|O_TRUNC, 0666); if (sigfd < 0) { perror(info->sigfile); exit(1); } close(sigfd); } void readifile(struct fileinfo *info) { /* read range info from image */ info->map = ndz_readranges(info->ndz); if (info->map == NULL) { fprintf(stderr, "%s: could not read ranges\n", ndz_filename(info->ndz)); exit(1); } /* read signature info */ if (verify) { info->sigmap = ndz_readhashinfo(info->ndz, info->sigfile); if (info->sigmap == NULL) { fprintf(stderr, "%s: could not read signature info\n", ndz_filename(info->ndz)); exit(1); } if (sanitycheck) { struct ndz_range *next = ndz_rangemap_first(info->sigmap); int rv; /* * Perform a sanity check, ensuring that ranges in the image * map exactly correspond to those in the signature. */ rv = ndz_rangemap_iterate(info->map, verifyfunc, &next); if (rv != 0 || next != NULL) { if (rv == 0) fprintf(stderr, " *** image map too short at sig [%lu-%lu]\n", next->start, next->end); fprintf(stderr, "%s: error while validating range/hash maps\n", ndz_filename(info->ndz)); #if 0 printf("==== Image "); ndz_rangemap_dump(info->map, (debug==0), NULL); printf("==== Hash "); ndz_hashmap_dump(info->sigmap, (debug==0)); fflush(stdout); #endif exit(1); } } } else info->sigmap = NULL; } struct chunkstate { ndz_chunk_t chunkobj; ndz_chunkno_t chunkno; unsigned char *chunkdatabuf; blockhdr_t *header; struct region *region; struct region *curregion; }; static int initnewchunk(struct chunkstate *cstate) { struct blockhdr_V2 *hdr; cstate->chunkobj = ndz_chunk_create(new.ndz, cstate->chunkno, clevel); if (cstate->chunkobj == NULL) { fprintf(stderr, "Error creating chunk %u\n", cstate->chunkno); return 1; } cstate->header = ndz_chunk_header(cstate->chunkobj); /* * XXX we still do V3 (actually V2 format) headers. * We still don't really support V4 yet... */ hdr = (struct blockhdr_V2 *)cstate->header; hdr->magic = COMPRESSED_V3; hdr->size = 0; hdr->blockindex = cstate->chunkno; hdr->regionsize = DEFAULTREGIONSIZE; hdr->regioncount = 0; hdr->firstsect = 0; hdr->lastsect = 0; hdr->reloccount = 0; cstate->region = (struct region *)(hdr + 1); cstate->curregion = cstate->region; return 0; } /* * Iterator for ranges in the delta map. * * Read and chunkify the data from the full image to produce the delta. * * If we have a signature file for the "source" full image, then we don't * need to do any hashing as the delta signature will be identical * (signatures always cover the entire image). * * If we have no signature, or we are changing the hash block size or * hash algorithm, or we just want to validate the original signature, * then we hash the data as we go. */ static int chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg) { struct chunkstate *cstate = arg; ndz_addr_t rstart = range->start; ndz_size_t rsize = range->end + 1 - rstart, sc; uint32_t roffset, hstart, hsize; size_t hbytes; ssize_t cc; struct ndz_hashdata *hdata; struct ndz_range *hrange; #ifdef CHUNKIFY_DEBUG fprintf(stderr, "chunkify [%lu-%lu]:\n", range->start, range->end); #endif /* * First call. Initialize the state we are going to carry through * with us via the iterator argument. */ if (cstate->chunkobj == NULL) { cstate->chunkdatabuf = malloc(hashblksize * new.ndz->sectsize); if (cstate->chunkdatabuf == NULL) { fprintf(stderr, "could not initialize chunkify data structs\n"); return 1; } cstate->chunkno = 0; if (initnewchunk(cstate) != 0) return 1; cstate->header->firstsect = rstart; cstate->curregion->start = rstart; } /* * Process the range, reading the old and producing the new. * When hashing, we also must respect hash block alignment. */ if (verify || gensigfile) roffset = rstart % hashblksize; else roffset = 0; while (rsize > 0) { uint32_t pstart, psize; int spanschunk; size_t bufoff; hstart = rstart; if (roffset) { hsize = hashblksize - roffset; if (hsize > rsize) hsize = rsize; roffset = 0; } else if (rsize > hashblksize) hsize = hashblksize; else hsize = rsize; #ifdef CHUNKIFY_DEBUG fprintf(stderr, " [%u-%u]: ", hstart, hstart + hsize - 1); #endif /* XXX read/decompress data range */ sc = ndz_readdata(old.ndz, cstate->chunkdatabuf, hsize, hstart); if (sc != hsize) { fprintf(stderr, "%s: unexpected read return %ld (instead of %u)\n", ndz_filename(old.ndz), (long)sc, hsize); return 1; } /* * Fetch and/or compute the hash value. */ hbytes = hsize * new.ndz->sectsize; if (verify || gensigfile) { unsigned char hbuf[HASH_MAXSIZE]; /* compute the hash */ ndz_hash_data(new.ndz, cstate->chunkdatabuf, hbytes, hbuf); #ifdef CHUNKIFY_DEBUG fprintf(stderr, "computed hash=%s\n", ndz_hash_dump(hbuf, hashlen)); #endif if (gensigfile) { assert(new.ndz->hashcurentry < new.ndz->hashentries); hdata = &new.ndz->hashdata[new.ndz->hashcurentry++]; hdata->hashlen = hashlen; memcpy(hdata->hash, hbuf, hashlen); } if (verify) { hrange = ndz_rangemap_lookup(old.sigmap, hstart, NULL); if (hrange && hrange->data && hrange->start == hstart && hrange->end == hstart + hsize - 1) { struct ndz_hashdata *hd = (struct ndz_hashdata *)hrange->data; if (memcmp(hd->hash, hbuf, hashlen)) { fprintf(stderr, "*** [%u-%u]: hash=%s does not compare!\n", hstart, hstart + hsize - 1, ndz_hash_dump(hd->hash, hashlen)); return 1; } } else { fprintf(stderr, "*** [%u-%u]: ", hstart, hstart + hsize - 1); if (hrange == NULL) fprintf(stderr, "range start not found!\n"); else if (hrange->data == NULL) fprintf(stderr, "no hash data found!\n"); else fprintf(stderr, "range mismatch [%lu-%lu]!\n", hrange->start, hrange->end); return 1; } } } #ifdef CHUNKIFY_DEBUG else fprintf(stderr, "no hash computed\n"); #endif /* * At this point we have a range of data ([hstart - hstart+hsize-1]) * of a specific size (hsize) which we have hashed (hdata->hash). * Now we compress and write it out to the new image file. This is * complicated significantly by the fact that it might not all fit * in the current chunk. If there is not enough room for this range * in the current chunk, we split it and write what we can. * * This is complicated even further by our conservative algorithm * for filling chunks, which is basically: if the amount of * uncompressed data exceeds the amount of space left for the * compressed data (plus a little slop in case it expands instead), * then we stop. This is an iterative process since, most likely, * the compressed data will be significantly smaller than the * uncompressed data. */ bufoff = 0; spanschunk = 0; pstart = hstart; psize = hsize; while (psize > 0) { uint32_t wsize; size_t wbytes, chunkremaining; chunkremaining = ndz_chunk_left(cstate->chunkobj); if (chunkremaining < new.ndz->sectsize) { /* switch to new chunk */ #ifdef CHUNKIFY_DEBUG fprintf(stderr, " chunk %u full (%lu bytes), writing...\n", cstate->chunkno, (unsigned long)ndz_chunk_datasize(cstate->chunkobj)); #endif /* finalize the header */ cstate->header->size = ndz_chunk_datasize(cstate->chunkobj); cstate->header->regioncount = (cstate->curregion - cstate->region + 1); /* XXX should always be zero */ if (cstate->chunkno == 0) cstate->header->firstsect = 0; cstate->header->lastsect = pstart; /* include any relocations */ if (old.ndz->relocmap) { void *buf = (cstate->curregion + 1); new.ndz->relocmap = old.ndz->relocmap; /* XXX */ if (ndz_reloc_put(new.ndz, cstate->header, buf) != 0) { new.ndz->relocmap = NULL; /* XXX */ fprintf(stderr, "Error writing relocation info\n"); return 1; } new.ndz->relocmap = NULL; /* XXX */ } /* and write it */ if (ndz_chunk_flush(cstate->chunkobj, 1) != 0) { fprintf(stderr, "Error writing compressed data\n"); return 1; } cstate->chunkno++; if (initnewchunk(cstate) != 0) return 1; cstate->header->firstsect = pstart; cstate->curregion->start = pstart; /* keep track if this hash range spans chunks */ if (psize < hsize) spanschunk++; chunkremaining = ndz_chunk_left(cstate->chunkobj); assert(psize <= chunkremaining / new.ndz->sectsize); } /* write up to chunkremaining (truncated to sectorsize) bytes */ wsize = psize; wbytes = wsize * new.ndz->sectsize; if (wbytes > chunkremaining) { wsize = (chunkremaining / new.ndz->sectsize); wbytes = wsize * new.ndz->sectsize; } assert(wsize > 0); #ifdef CHUNKIFY_DEBUG fprintf(stderr, " appending %u sectors to chunk %u " "(%ld bytes available)\n", wsize, cstate->chunkno, ndz_chunk_left(cstate->chunkobj)); #endif cc = ndz_chunk_append(cstate->chunkobj, cstate->chunkdatabuf + bufoff, wbytes); if (cc < 0) { fprintf(stderr, "Error compressing data\n"); return 1; } assert(cc == wbytes); /* append to the current region or create a new one */ if (cstate->curregion->start + cstate->curregion->size == pstart) { cstate->curregion->size += wsize; #ifdef CHUNKIFY_DEBUG fprintf(stderr, " adjust range entry to [%u-%u]\n", cstate->curregion->start, cstate->curregion->start+cstate->curregion->size-1); #endif } else { cstate->curregion++; cstate->curregion->start = pstart; cstate->curregion->size = wsize; #ifdef CHUNKIFY_DEBUG fprintf(stderr, " new range entry [%u-%u]\n", cstate->curregion->start, cstate->curregion->start+cstate->curregion->size-1); #endif } bufoff += wbytes; pstart += wsize; psize -= wsize; chunkremaining = ndz_chunk_left(cstate->chunkobj); } /* * At this point we have written out the entire range. If creating * a signature file, add it to the hash map, recording the chunk(s) * that it belongs to. */ if (gensigfile) { if (spanschunk) hdata->chunkno = HASH_CHUNKSETSPAN(cstate->chunkno-1); else hdata->chunkno = cstate->chunkno; #ifdef CHUNKIFY_DEBUG fprintf(stderr, " write hash entry [%u-%u], chunk %u", hstart, hstart + hsize - 1, HASH_CHUNKNO(hdata->chunkno)); if (HASH_CHUNKDOESSPAN(hdata->chunkno)) fprintf(stderr, "-%u", HASH_CHUNKNO(hdata->chunkno) + 1); fprintf(stderr, "\n"); #endif cc = ndz_rangemap_alloc(new.sigmap, hstart, hsize, (void *)hdata); if (cc) { fprintf(stderr, "Could not add hashmap entry\n"); return 1; } } rstart += hsize; rsize -= hsize; } /* * If this is the last range, we have to flush the final chunk. */ if (range->next == NULL) { #ifdef CHUNKIFY_DEBUG fprintf(stderr, " final chunk %u done (%lu bytes)\n", cstate->chunkno, (unsigned long)ndz_chunk_datasize(cstate->chunkobj)); #endif /* finalize the header */ cstate->header->size = ndz_chunk_datasize(cstate->chunkobj); cstate->header->regioncount = (cstate->curregion - cstate->region + 1); /* XXX should always be zero */ if (cstate->chunkno == 0) cstate->header->firstsect = 0; cstate->header->lastsect = new.ndz->maphi; /* include any relocations */ if (old.ndz->relocmap) { void *buf = (cstate->curregion + 1); new.ndz->relocmap = old.ndz->relocmap; /* XXX */ if (ndz_reloc_put(new.ndz, cstate->header, buf) != 0) { new.ndz->relocmap = NULL; /* XXX */ fprintf(stderr, "Error writing relocation info\n"); return 1; } new.ndz->relocmap = NULL; /* XXX */ } /* and write it */ if (ndz_chunk_flush(cstate->chunkobj, 1) != 0) { fprintf(stderr, "Error writing compressed data\n"); return 1; } free(cstate->chunkdatabuf); /* XXX for debugging */ memset(cstate, 0, sizeof(*cstate)); } return 0; } static int unchunkify(struct ndz_rangemap *omap, struct ndz_range *range, void *arg) { struct ndz_rangemap *nmap = arg; ndz_addr_t size = range->end - range->start + 1; assert(nmap); if (ndz_rangemap_alloc(nmap, range->start, size, 0)) { fprintf(stderr, "could not create map entry for [%lu-%lu]\n", range->start, range->end); return 1; } return 0; } static void chunkfunc(struct ndz_rangemap *map, void *ptr) { unsigned int chunkno = (int)ptr; printf("chunkno=%u", chunkno); } int main(int argc, char **argv) { int ch; while ((ch = getopt(argc, argv, "SfdVb:D:")) != -1) switch(ch) { case 'S': gensigfile = 1; break; case 'b': hashblksize = atol(optarg); if (hashblksize < 512 || hashblksize > (32*1024*1024) || (hashblksize & 511) != 0) { fprintf(stderr, "Invalid hash block size\n"); usage(); } hashblksize /= 512; break; case 'D': if (strcmp(optarg, "md5") == 0) hashtype = HASH_TYPE_MD5; else if (strcmp(optarg, "sha1") == 0) hashtype = HASH_TYPE_SHA1; else { fprintf(stderr, "Invalid digest type `%s'\n", optarg); usage(); } break; case 'f': forcesig = 1; break; case 'V': verify = 1; break; case 'd': debug++; break; case 'z': clevel = atoi(optarg); if (clevel < 0 || clevel > 9) { fprintf(stderr, "Invalid compression level\n"); usage(); } break; case 'h': case '?': default: usage(); } argc -= optind; argv += optind; if (argc < 2) usage(); /* * Set a reasonable default blocksize whether hashing or not. */ if (hashblksize == -1) { if (verify || gensigfile) hashblksize = HASHBLK_SIZE / 512; else hashblksize = (128 * 1024) / 512; } /* * Make sure we can open all the files */ openifile(argv[0], &old); openofile(argv[1], &new); /* * Read in the range and signature info. */ readifile(&old); #if 0 printf("==== Old range "); ndz_rangemap_dump(old.map, (debug==0), chunkfunc); if (old.sigmap) { printf("==== Old hash "); ndz_hashmap_dump(old.sigmap, (debug==0)); } fflush(stdout); #endif /* * If we are checking and producing signatures, make sure the * hashtype and block size are the same. */ if (verify && gensigfile) { if (old.ndz->hashtype != hashtype || old.ndz->hashblksize != hashblksize) { fprintf(stderr, "%s: incompatible hash values\n", argv[0]); exit(1); } } /* * Copy the old map to the new map removing any artificial chunk * boundaries. */ new.map = ndz_rangemap_init(NDZ_LOADDR, NDZ_HIADDR-NDZ_LOADDR); if (ndz_rangemap_iterate(old.map, unchunkify, new.map)) { fprintf(stderr, "%s: could not create new map\n", argv[0]); exit(1); } /* * New map has same range as old image. * XXX doesn't belong here. */ new.ndz->maplo = old.ndz->maplo; new.ndz->maphi = old.ndz->maphi; /* * Iterate through the map hashing (if necessary) and chunking the data. */ new.sigmap = ndz_rangemap_init(NDZ_LOADDR, NDZ_HIADDR-NDZ_LOADDR); if (new.sigmap == NULL) { fprintf(stderr, "%s: could not create signature map for new image\n", argv[1]); exit(1); } /* * Initialize signature file info for new map. * XXX doesn't belong here. */ new.ndz->hashmap = new.sigmap; new.ndz->hashdata = calloc(old.ndz->hashentries, sizeof(struct ndz_hashdata)); if (new.ndz->hashdata == NULL) { fprintf(stderr, "%s: could not allocate hashdata for new image\n", argv[1]); exit(1); } new.ndz->hashtype = hashtype; new.ndz->hashblksize = hashblksize; new.ndz->hashentries = old.ndz->hashentries; new.ndz->hashcurentry = 0; /* * If there is anything in the old image, produce a new image! */ if (ndz_rangemap_first(new.map) != NULL) { struct chunkstate *cstate = calloc(1, sizeof(*cstate)); assert(cstate != NULL); if (ndz_rangemap_iterate(new.map, chunkify, cstate) != 0) { fprintf(stderr, "%s: error while creating new image\n", argv[1]); exit(1); } free(cstate); /* write the new sigfile */ if (gensigfile) { if (ndz_writehashinfo(new.ndz, new.sigfile, argv[1]) != 0) { fprintf(stderr, "%s: could not write signature file %s\n", argv[1], new.sigfile); } } } return 0; } /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * End: */