Commit d5ffec9a authored by Mike Hibler's avatar Mike Hibler

First successful delta image created!

Of course, I have no idea if it is correct...but it is well formed!
parent d5361103
......@@ -100,18 +100,16 @@ static int usesigfiles = 0;
static int forcesig = 0;
static int debug = 0;
static int verify = 0;
static int clevel = 4;
static int hashtype = HASH_TYPE_SHA1;
static int hashlen = 20;
static long hashblksize = HASHBLK_SIZE / 512;
static ndz_chunk_t chunkobj;
static ndz_chunkno_t chunkno;
static char *chunkdatabuf;
void
usage(void)
{
fprintf(stderr,
"Usage: imagedelta [-SVfd] [-b blksize] [-D hashfunc] image1.ndz image2.ndz delta1to2.ndz\n"
"Usage: imagedelta [-SVfd] [-b blksize] [-D hashfunc] [-z level] image1.ndz image2.ndz delta1to2.ndz\n"
"\n"
"Produce a delta image (delta1to2) containing the changes\n"
"necessary to get from image1 to image2.\n"
......@@ -121,7 +119,9 @@ usage(void)
"-f Force imagedelta to use a questionable sigfile.\n"
"-d Enable debugging.\n"
"-D hfunc Hash function to use (md5 or sha1).\n"
"-b blksize Size of hash blocks (512 <= size <= 32M).\n");
"-b blksize Size of hash blocks (512 <= size <= 32M).\n"
"-z level Compression level (0 to 9).\n");
exit(1);
}
......@@ -252,13 +252,14 @@ openofile(char *file, struct fileinfo *info)
perror(file);
exit(1);
}
info->sigfile = malloc(strlen(file) + 5);
assert(info->sigfile != NULL);
strcpy(info->sigfile, file);
strcat(info->sigfile, ".sig");
/* check early that we can write to the sigfile! */
sigfd = open(info->sigfile, O_WRONLY|O_CREAT|O_TRUNC);
sigfd = open(info->sigfile, O_WRONLY|O_CREAT|O_TRUNC, 0666);
if (sigfd < 0) {
perror(info->sigfile);
exit(1);
......@@ -315,6 +316,47 @@ readifile(struct fileinfo *info)
info->sigmap = NULL;
}
struct chunkstate {
ndz_chunk_t chunkobj;
ndz_chunkno_t chunkno;
unsigned char *chunkdatabuf;
blockhdr_t *header;
struct region *region;
struct region *curregion;
};
static int
initnewchunk(struct chunkstate *cstate)
{
struct blockhdr_V2 *hdr;
cstate->chunkobj = ndz_chunk_create(delta.ndz, cstate->chunkno, clevel);
if (cstate->chunkobj == NULL) {
fprintf(stderr, "Error creating chunk %u\n", cstate->chunkno);
return 1;
}
cstate->header = ndz_chunk_header(cstate->chunkobj);
/*
* XXX we still do V3 (actually V2 format) headers.
* We still don't really support V4 yet...
*/
hdr = (struct blockhdr_V2 *)cstate->header;
hdr->magic = COMPRESSED_V3;
hdr->size = 0;
hdr->blockindex = cstate->chunkno;
hdr->regionsize = DEFAULTREGIONSIZE;
hdr->regioncount = 0;
hdr->firstsect = 0;
hdr->lastsect = 0;
hdr->reloccount = 0;
cstate->region = (struct region *)(hdr + 1);
cstate->curregion = cstate->region;
return 0;
}
/*
* Iterator for ranges in the delta map.
* Read and chunkify the data from the full image, hashing the data as
......@@ -323,9 +365,12 @@ readifile(struct fileinfo *info)
static int
chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg)
{
struct chunkstate *cstate = arg;
ndz_addr_t rstart = range->start;
ndz_size_t rsize = range->end + 1 - rstart, sc;
uint32_t offset, hsize;
size_t rbytes;
ssize_t cc;
unsigned char hashbuf[HASH_MAXSIZE], *hash;
struct ndz_range *hrange;
......@@ -333,17 +378,18 @@ chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg)
fprintf(stderr, "chunkify [%lu-%lu]:\n", range->start, range->end);
#endif
#if 0
if (chunkobj == NULL) {
chunkno = 0;
chunkobj = ndz_chunk_create(ndz2.ndz, chunkno);
chunkdatabuf = malloc(hashblksize * ndz->sectsize);
if (chunkobj == NULL || chunkdatabuf == NULL) {
if (cstate->chunkobj == NULL) {
cstate->chunkdatabuf = malloc(hashblksize * delta.ndz->sectsize);
if (cstate->chunkdatabuf == NULL) {
fprintf(stderr, "could not initialize chunkify data structs\n");
return 1;
}
cstate->chunkno = 0;
if (initnewchunk(cstate) != 0)
return 1;
cstate->header->firstsect = rstart;
cstate->curregion->start = rstart;
}
#endif
offset = rstart % hashblksize;
while (rsize > 0) {
......@@ -361,34 +407,95 @@ chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg)
#endif
/* XXX read/decompress data range */
sc = ndz_readdata(ndz2.ndz, chunkdatabuf, hsize, rstart);
sc = ndz_readdata(ndz2.ndz, cstate->chunkdatabuf, hsize, rstart);
if (sc != hsize) {
fprintf(stderr, "%s: unexpected read return %ld (instead of %u)\n",
ndz_filename(ndz2.ndz), (long)sc, hsize);
return 1;
}
/*
* See if we have an existing hash for the hash block
*/
rbytes = hsize * delta.ndz->sectsize;
hrange = ndz_rangemap_lookup(ndz2.sigmap, rstart, NULL);
if (hrange && hrange->data &&
hrange->start == rstart && hrange->end == rstart + hsize - 1) {
struct hashdata *hd = (struct hashdata *)hrange->data;
hash = hd->hash;
#ifdef CHUNKIFY_DEBUG
fprintf(stderr, " found hash=%s\n",
ndz_hash_dump(hash, hashlen));
fprintf(stderr, " found hash=%s\n", ndz_hash_dump(hash, hashlen));
#endif
#if 1
/* sanity check */
ndz_hash_data(delta.ndz, cstate->chunkdatabuf, rbytes, hashbuf);
#ifdef CHUNKIFY_DEBUG
fprintf(stderr, " computed hash=%s\n", ndz_hash_dump(hashbuf, hashlen));
#endif
if (memcmp(hash, hashbuf, hashlen)) {
fprintf(stderr, "*** [%lu-%lu]: hash does not compare!\n",
rstart, rstart + hsize - 1);
}
#endif
} else {
/* XXX compute hash over data */
ndz_hash_data(delta.ndz, cstate->chunkdatabuf, rbytes, hashbuf);
hash = hashbuf;
#ifdef CHUNKIFY_DEBUG
fprintf(stderr, " no hash found\n");
#endif
}
/* XXX add range/hashinfo to new sigmap */
/*
* If there is not enough room for this range in the current chunk,
* write it out and start a new one.
*/
if (rbytes > ndz_chunk_left(cstate->chunkobj)) {
#ifdef CHUNKIFY_DEBUG
fprintf(stderr, " chunk %u done, starting new one\n", cstate->chunkno);
#endif
/* XXX compress/write data range */
/* finalize the header */
cstate->header->size = ndz_chunk_datasize(cstate->chunkobj);
cstate->header->regioncount = (cstate->curregion - cstate->region + 1);
cstate->header->lastsect = rstart;
/* XXX deal with redo logic when nearing end-of-chunk */
/* and write it */
if (ndz_chunk_flush(cstate->chunkobj, 1) != 0) {
fprintf(stderr, "Error writing compressed data\n");
return 1;
}
/* XXX deal with switching chunks */
cstate->chunkno++;
if (initnewchunk(cstate) != 0)
return 1;
cstate->header->firstsect = rstart;
cstate->curregion->start = rstart;
}
assert(rbytes <= ndz_chunk_left(cstate->chunkobj));
/*
* Append the hashed range to the current chunk.
*/
#ifdef CHUNKIFY_DEBUG
fprintf(stderr, " appending to chunk %u\n", cstate->chunkno);
#endif
cc = ndz_chunk_append(cstate->chunkobj, cstate->chunkdatabuf, rbytes);
if (cc < 0) {
fprintf(stderr, "Error compressing data\n");
return 1;
}
assert(cc == rbytes);
/* append to the current region or create a new one */
if (cstate->curregion->start + cstate->curregion->size == rstart)
cstate->curregion->size += hsize;
else {
cstate->curregion++;
cstate->curregion->start = rstart;
cstate->curregion->size = hsize;
}
/* XXX add range/hashinfo to new sigmap */
#if 0
/*
......@@ -397,14 +504,14 @@ chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg)
if ((hash = rhash) == NULL) {
if (hash_range(rstart, hsize, hashbuf)) {
fprintf(stderr, "Error hashing image data\n");
return -1;
return 1;
}
hash = hashbuf;
}
if (addhash(hinfop, rstart, hsize, hash) != 0) {
fprintf(stderr, "Out of memory for new hash map\n");
return -1;
return 1;
}
#endif
......@@ -412,6 +519,37 @@ chunkify(struct ndz_rangemap *mmap, struct ndz_range *range, void *arg)
rsize -= hsize;
}
/*
* If this is the last range, we have to flush the final chunk.
*/
if (range->next == NULL) {
#ifdef CHUNKIFY_DEBUG
fprintf(stderr, " final chunk %u done\n", cstate->chunkno);
#endif
/* finalize the header */
cstate->header->size = ndz_chunk_datasize(cstate->chunkobj);
cstate->header->regioncount = (cstate->curregion - cstate->region + 1);
/*
* XXX not right, need to use the last sector of the ndz2 map.
* But I'm not sure it is set correct and it doesn't really matter
* since we will never be zeroing when loading a delta image!
*/
cstate->header->lastsect = range->end + 1;
/* and write it */
if (ndz_chunk_flush(cstate->chunkobj, 1) != 0) {
fprintf(stderr, "Error writing compressed data\n");
return 1;
}
free(cstate->chunkdatabuf);
/* XXX for debugging */
memset(cstate, 0, sizeof(*cstate));
}
return 0;
}
......@@ -460,6 +598,13 @@ main(int argc, char **argv)
case 'd':
debug++;
break;
case 'z':
clevel = atoi(optarg);
if (clevel < 0 || clevel > 9) {
fprintf(stderr, "Invalid compression level\n");
usage();
}
break;
case 'h':
case '?':
default:
......@@ -492,8 +637,7 @@ main(int argc, char **argv)
printf("==== New range ");
ndz_rangemap_dump(ndz2.map, (debug==0), chunkfunc);
printf("==== New hash ");
// ndz_hashmap_dump(ndz2.sigmap, (debug==0));
ndz_hashmap_dump(ndz2.sigmap, 0);
ndz_hashmap_dump(ndz2.sigmap, (debug==0));
fflush(stdout);
#endif
......@@ -548,11 +692,31 @@ main(int argc, char **argv)
argv[2]);
exit(1);
}
if (ndz_rangemap_iterate(delta.map, chunkify, NULL) != 0) {
/*
* If there is anything in the resulting delta, produce an image!
*/
if (ndz_rangemap_first(delta.map) != NULL) {
struct chunkstate *cstate = calloc(1, sizeof(*cstate));
assert(cstate != NULL);
delta.ndz->hashtype = hashtype;
delta.ndz->hashblksize = hashblksize;
if (ndz_rangemap_iterate(delta.map, chunkify, cstate) != 0) {
fprintf(stderr, "%s: error while creating new delta image\n",
argv[2]);
exit(1);
}
free(cstate);
ndz_close(ndz2.ndz);
ndz_close(delta.ndz);
} else {
fprintf(stderr, "Images are identical, no delta produced!\n");
ndz_close(ndz2.ndz);
ndz_close(delta.ndz);
unlink(argv[2]);
unlink(delta.sigfile);
}
return 0;
}
......
......@@ -26,6 +26,21 @@
*
* Since chunks are independently compressed, we can manipulate them
* independently.
*
* TODO:
* - In _read entire chunk at once, optionally return a pointer to the
* header struct. Alternatively, at least have a 1M buffer, read
* incrementally, and keep a high water mark that we have read so far.
*
* - Add a _reopen call when seeking backward in the same chunk. Still
* have to reset the zlib state, but don't have to reread the compressed
* data.
*
* - In _create, return a pointer to where the header should go so caller
* can fill it in. Option to _flush to say whether to write the header
* out or not.
*
* - Page-align the buffer.
*/
#include <unistd.h>
......@@ -40,31 +55,69 @@
#include "libndz.h"
#define CINPUTSIZE (128*1024)
//#define CHUNK_DEBUG
/*
* Currently we use the "classic" imagezip algorithm for filling chunks.
*
* 1. Before we even try compressing data, we make sure there is at least
* (request_size + 1000) bytes available, where request_size is the
* UNCOMPRESSED size.
* 2. After a compression is done, there needs to be at least 8K left, or
* we call it a day.
*/
#define CHUNKSLOP 1000
#define CHUNKTHRESH 8192
struct ndz_chunk {
struct ndz_file *ndz;
ndz_chunkno_t chunkno;
off_t foff;
z_stream z;
int cbufsize;
int cbufoff;
char *cdatabuf;
int clevel;
/* buffer stuff */
char *cbufbase; /* allocated memory */
blockhdr_t *cbufhdr;/* (aligned) chunk header location */
char *cbufdata; /* (aligned) chunk data location */
int cbufsize; /* size of data portion */
int cbufoff; /* high-water mark of valid data */
int cbufleft; /* write: space left to fill */
};
int
getchunkbuffer(struct ndz_chunk *chunk)
{
int psize = getpagesize();
uintptr_t ptr;
chunk->cbufbase = malloc(CHUNKSIZE + psize);
if (chunk->cbufbase == NULL)
return -1;
ptr = (((uintptr_t)chunk->cbufbase + psize - 1) & ~(psize - 1));
chunk->cbufhdr = (blockhdr_t *)ptr;
chunk->cbufdata = (char *)(ptr + DEFAULTREGIONSIZE);
chunk->cbufsize = CHUNKSIZE - DEFAULTREGIONSIZE;
chunk->cbufoff = 0;
chunk->cbufleft = chunk->cbufsize;
return 0;
}
ndz_chunk_t
ndz_chunk_open(struct ndz_file *ndz, ndz_chunkno_t chunkno)
{
struct ndz_chunk *chunk = malloc(sizeof *chunk);
if (chunk == NULL)
return NULL;
chunk->cdatabuf = malloc(CINPUTSIZE);
if (chunk->cdatabuf == NULL) {
#ifdef CHUNK_DEBUG
fprintf(stderr, "%s: chunk_open called\n", ndz->fname);
#endif
if (getchunkbuffer(chunk)) {
free(chunk);
return NULL;
}
chunk->cbufsize = CINPUTSIZE;
chunk->cbufoff = 0;
chunk->ndz = ndz;
chunk->chunkno = chunkno;
......@@ -80,9 +133,35 @@ ndz_chunk_open(struct ndz_file *ndz, ndz_chunkno_t chunkno)
}
chunk->foff = (off_t)chunkno * ndz->chunksize + DEFAULTREGIONSIZE;
/*
* XXX currently we use ndzfile::ndz_readchunkheader for read access to the
* chunk header. It is better for applications that don't need access to the
* decompressed data, since it won't drag in libz as use of functions in this
* file would.
*/
chunk->cbufhdr = NULL;
return (ndz_chunk_t)chunk;
}
int
ndz_chunk_rewind(ndz_chunk_t chobj)
{
struct ndz_chunk *chunk = (struct ndz_chunk *)chobj;
if (chunk == NULL || chunk->cbufbase == NULL)
return -1;
if (inflateReset(&chunk->z) != Z_OK) {
fprintf(stderr, "chunk_rewind: could not reset zlib state\n");
return -1;
}
chunk->z.next_in = (Bytef *)chunk->cbufdata;
chunk->z.avail_in = chunk->cbufoff;
return 0;
}
void
ndz_chunk_close(ndz_chunk_t chobj)
{
......@@ -90,9 +169,13 @@ ndz_chunk_close(ndz_chunk_t chobj)
if (chunk == NULL)
return;
/* release any cache resources */
#ifdef CHUNK_DEBUG
fprintf(stderr, "%s: chunk_close called\n", chunk->ndz->fname);
#endif
inflateEnd(&chunk->z);
if (chunk->cbufbase)
free(chunk->cbufbase);
free(chunk);
}
......@@ -106,6 +189,26 @@ ndz_chunk_chunkno(ndz_chunk_t chobj)
return chunk->chunkno;
}
blockhdr_t *
ndz_chunk_header(ndz_chunk_t chobj)
{
struct ndz_chunk *chunk = (struct ndz_chunk *)chobj;
if (chunk == NULL)
return NULL;
return chunk->cbufhdr;
}
ssize_t
ndz_chunk_datasize(ndz_chunk_t chobj)
{
struct ndz_chunk *chunk = (struct ndz_chunk *)chobj;
if (chunk == NULL)
return -1;
return chunk->cbufoff;
}
/*
* Sequentially read data from a chunk til there is no more to be read
*/
......@@ -119,17 +222,27 @@ ndz_chunk_read(ndz_chunk_t chobj, void *buf, size_t bytes)
if (chunk == NULL)
return -1;
#ifdef CHUNK_DEBUG
fprintf(stderr, "%s: chunk_read called\n", chunk->ndz->fname);
#endif
chunk->z.next_out = (Bytef *)buf;
chunk->z.avail_out = bytes;
while (chunk->z.avail_out > 0) {
/* read more compressed data from file if necessary */
if (chunk->z.avail_in == 0) {
cc = ndz_read(chunk->ndz, chunk->cdatabuf, chunk->cbufsize,
chunk->foff);
cc = ndz_read(chunk->ndz, chunk->cbufdata + chunk->cbufoff,
chunk->cbufsize - chunk->cbufoff, chunk->foff);
#ifdef CHUNK_DEBUG
fprintf(stderr, "chunk_read: reading %d bytes at %lu returns %ld\n",
chunk->cbufsize - chunk->cbufoff,
(unsigned long)chunk->foff, cc);
#endif
if (cc <= 0)
return cc;
chunk->z.next_in = (Bytef *)chunk->cdatabuf;
chunk->z.next_in = (Bytef *)(chunk->cbufdata + chunk->cbufoff);
chunk->z.avail_in = cc;
chunk->cbufoff += cc;
chunk->foff += cc;
}
assert(chunk->z.next_in != Z_NULL);
......@@ -138,7 +251,7 @@ ndz_chunk_read(ndz_chunk_t chobj, void *buf, size_t bytes)
rv = inflate(&chunk->z, Z_SYNC_FLUSH);
if (rv == Z_STREAM_END) {
#ifdef DEBUG
#ifdef CHUNK_DEBUG
fprintf(stderr, "chunk_read hit STREAM_END at foff=%ld, avail_out=%d\n",
(unsigned long)chunk->foff, chunk->z.avail_out);
#endif
......@@ -160,55 +273,122 @@ ndz_chunk_read(ndz_chunk_t chobj, void *buf, size_t bytes)
* til I decide if it is worthwhile to combine.
*/
ndz_chunk_t
ndz_chunk_create(struct ndz_file *ndz, ndz_chunkno_t chunkno)
ndz_chunk_create(struct ndz_file *ndz, ndz_chunkno_t chunkno, int clevel)
{
struct ndz_chunk *chunk = malloc(sizeof *chunk);
if (chunk == NULL)
return NULL;
/*
* We allocate a buffer large enough to hold the entire chunk
* since we need to manipulate it a bit while we are constructing it.
* Once finished, we flush the whole thing to the file.
*/
chunk->cdatabuf = malloc(CHUNKSIZE);
if (chunk->cdatabuf == NULL) {
#ifdef CHUNK_DEBUG
fprintf(stderr, "%s: chunk_create called\n", chunk->ndz->fname);
#endif
if (getchunkbuffer(chunk)) {
free(chunk);
return NULL;
}
chunk->cbufsize = CHUNKSIZE;
chunk->cbufoff = 0;
memset(chunk->cbufhdr, 0, DEFAULTREGIONSIZE);
chunk->clevel = clevel;
chunk->ndz = ndz;
chunk->chunkno = chunkno;
chunk->z.zalloc = Z_NULL;
chunk->z.zfree = Z_NULL;
chunk->z.opaque = Z_NULL;
chunk->z.next_in = Z_NULL;
chunk->z.avail_in = 0;
chunk->z.next_out = Z_NULL;
if (inflateInit(&chunk->z) != Z_OK) {
if (deflateInit(&chunk->z, chunk->clevel) != Z_OK) {
free(chunk);
return NULL;
}
chunk->foff = (off_t)chunkno * ndz->chunksize;
chunk->foff = (off_t)chunkno * ndz->chunksize + DEFAULTREGIONSIZE;
return (ndz_chunk_t)chunk;
}
void
ndz_chunk_flush(ndz_chunk_t chobj)
int
ndz_chunk_flush(ndz_chunk_t chobj, int withheader)
{
struct ndz_chunk *chunk = (struct ndz_chunk *)chobj;
char *buf = chunk->cbufdata;