Commit b22b7b72 authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote branch 'kwolf/for-anthony' into staging

parents 4a2ba232 a5c062ed
......@@ -19,7 +19,7 @@ block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-nested-y += qed-check.o
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
......
......@@ -350,7 +350,12 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
}
}
progress = completed_sector_sum * 100 / block_mig_state.total_sector_sum;
if (block_mig_state.total_sector_sum != 0) {
progress = completed_sector_sum * 100 /
block_mig_state.total_sector_sum;
} else {
progress = 100;
}
if (progress != block_mig_state.prev_progress) {
block_mig_state.prev_progress = progress;
qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
......@@ -633,8 +638,10 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
int len, flags;
char device_name[256];
int64_t addr;
BlockDriverState *bs;
BlockDriverState *bs, *bs_prev = NULL;
uint8_t *buf;
int64_t total_sectors = 0;
int nr_sectors;
do {
addr = qemu_get_be64(f);
......@@ -656,10 +663,26 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
return -EINVAL;
}
if (bs != bs_prev) {
bs_prev = bs;
total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
if (total_sectors <= 0) {
error_report("Error getting length of block device %s\n",
device_name);
return -EINVAL;
}
}
if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
nr_sectors = total_sectors - addr;
} else {
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
buf = qemu_malloc(BLOCK_SIZE);
qemu_get_buffer(f, buf, BLOCK_SIZE);
ret = bdrv_write(bs, addr, buf, BDRV_SECTORS_PER_DIRTY_CHUNK);
ret = bdrv_write(bs, addr, buf, nr_sectors);
qemu_free(buf);
if (ret < 0) {
......
......@@ -2778,6 +2778,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
QEMUOptionParameter *backing_fmt, *backing_file;
BlockDriverState *bs = NULL;
BlockDriver *drv, *proto_drv;
BlockDriver *backing_drv = NULL;
int ret = 0;
/* Find driver and parse its options */
......@@ -2846,7 +2847,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
if (backing_fmt && backing_fmt->value.s) {
if (!bdrv_find_format(backing_fmt->value.s)) {
backing_drv = bdrv_find_format(backing_fmt->value.s);
if (!backing_drv) {
error_report("Unknown backing file format '%s'",
backing_fmt->value.s);
ret = -EINVAL;
......@@ -2863,9 +2865,9 @@ int bdrv_img_create(const char *filename, const char *fmt,
bs = bdrv_new("");
ret = bdrv_open(bs, backing_file->value.s, flags, drv);
ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
if (ret < 0) {
error_report("Could not open '%s'", filename);
error_report("Could not open '%s'", backing_file->value.s);
goto out;
}
bdrv_get_geometry(bs, &size);
......
/*
* L2/refcount table cache for the QCOW2 format
*
* Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
typedef struct Qcow2CachedTable {
void* table;
int64_t offset;
bool dirty;
int cache_hits;
int ref;
} Qcow2CachedTable;
struct Qcow2Cache {
int size;
Qcow2CachedTable* entries;
struct Qcow2Cache* depends;
bool depends_on_flush;
bool writethrough;
};
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
bool writethrough)
{
BDRVQcowState *s = bs->opaque;
Qcow2Cache *c;
int i;
c = qemu_mallocz(sizeof(*c));
c->size = num_tables;
c->entries = qemu_mallocz(sizeof(*c->entries) * num_tables);
c->writethrough = writethrough;
for (i = 0; i < c->size; i++) {
c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
}
return c;
}
int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
{
int i;
for (i = 0; i < c->size; i++) {
assert(c->entries[i].ref == 0);
qemu_vfree(c->entries[i].table);
}
qemu_free(c->entries);
qemu_free(c);
return 0;
}
static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
{
int ret;
ret = qcow2_cache_flush(bs, c->depends);
if (ret < 0) {
return ret;
}
c->depends = NULL;
c->depends_on_flush = false;
return 0;
}
static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
BDRVQcowState *s = bs->opaque;
int ret = 0;
if (!c->entries[i].dirty || !c->entries[i].offset) {
return 0;
}
if (c->depends) {
ret = qcow2_cache_flush_dependency(bs, c);
} else if (c->depends_on_flush) {
ret = bdrv_flush(bs->file);
if (ret >= 0) {
c->depends_on_flush = false;
}
}
if (ret < 0) {
return ret;
}
if (c == s->refcount_block_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
} else if (c == s->l2_table_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
s->cluster_size);
if (ret < 0) {
return ret;
}
c->entries[i].dirty = false;
return 0;
}
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
int result = 0;
int ret;
int i;
for (i = 0; i < c->size; i++) {
ret = qcow2_cache_entry_flush(bs, c, i);
if (ret < 0 && result != -ENOSPC) {
result = ret;
}
}
if (result == 0) {
ret = bdrv_flush(bs->file);
if (ret < 0) {
result = ret;
}
}
return result;
}
int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency)
{
int ret;
if (dependency->depends) {
ret = qcow2_cache_flush_dependency(bs, dependency);
if (ret < 0) {
return ret;
}
}
if (c->depends && (c->depends != dependency)) {
ret = qcow2_cache_flush_dependency(bs, c);
if (ret < 0) {
return ret;
}
}
c->depends = dependency;
return 0;
}
void qcow2_cache_depends_on_flush(Qcow2Cache *c)
{
c->depends_on_flush = true;
}
static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
{
int i;
int min_count = INT_MAX;
int min_index = -1;
for (i = 0; i < c->size; i++) {
if (c->entries[i].ref) {
continue;
}
if (c->entries[i].cache_hits < min_count) {
min_index = i;
min_count = c->entries[i].cache_hits;
}
/* Give newer hits priority */
/* TODO Check how to optimize the replacement strategy */
c->entries[i].cache_hits /= 2;
}
if (min_index == -1) {
/* This can't happen in current synchronous code, but leave the check
* here as a reminder for whoever starts using AIO with the cache */
abort();
}
return min_index;
}
static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
uint64_t offset, void **table, bool read_from_disk)
{
BDRVQcowState *s = bs->opaque;
int i;
int ret;
/* Check if the table is already cached */
for (i = 0; i < c->size; i++) {
if (c->entries[i].offset == offset) {
goto found;
}
}
/* If not, write a table back and replace it */
i = qcow2_cache_find_entry_to_replace(c);
if (i < 0) {
return i;
}
ret = qcow2_cache_entry_flush(bs, c, i);
if (ret < 0) {
return ret;
}
c->entries[i].offset = 0;
if (read_from_disk) {
if (c == s->l2_table_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
if (ret < 0) {
return ret;
}
}
/* Give the table some hits for the start so that it won't be replaced
* immediately. The number 32 is completely arbitrary. */
c->entries[i].cache_hits = 32;
c->entries[i].offset = offset;
/* And return the right table */
found:
c->entries[i].cache_hits++;
c->entries[i].ref++;
*table = c->entries[i].table;
return 0;
}
int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table)
{
return qcow2_cache_do_get(bs, c, offset, table, true);
}
int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table)
{
return qcow2_cache_do_get(bs, c, offset, table, false);
}
int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
{
int i;
for (i = 0; i < c->size; i++) {
if (c->entries[i].table == *table) {
goto found;
}
}
return -ENOENT;
found:
c->entries[i].ref--;
*table = NULL;
assert(c->entries[i].ref >= 0);
if (c->writethrough) {
return qcow2_cache_entry_flush(bs, c, i);
} else {
return 0;
}
}
void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
{
int i;
for (i = 0; i < c->size; i++) {
if (c->entries[i].table == table) {
goto found;
}
}
abort();
found:
c->entries[i].dirty = true;
}
......@@ -67,7 +67,11 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
qemu_free(new_l1_table);
return new_l1_table_offset;
}
bdrv_flush(bs->file);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
return ret;
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
......@@ -81,7 +85,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
/* set new table */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
cpu_to_be32w((uint32_t*)data, new_l1_size);
cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
if (ret < 0) {
goto fail;
......@@ -98,63 +102,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
return ret;
}
void qcow2_l2_cache_reset(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
}
static inline int l2_cache_new_entry(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
uint32_t min_count;
int min_index, i;
/* find a new entry in the least used one */
min_index = 0;
min_count = 0xffffffff;
for(i = 0; i < L2_CACHE_SIZE; i++) {
if (s->l2_cache_counts[i] < min_count) {
min_count = s->l2_cache_counts[i];
min_index = i;
}
}
return min_index;
}
/*
* seek_l2_table
*
* seek l2_offset in the l2_cache table
* if not found, return NULL,
* if found,
* increments the l2 cache hit count of the entry,
* if counter overflow, divide by two all counters
* return the pointer to the l2 cache entry
*
*/
static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
{
int i, j;
for(i = 0; i < L2_CACHE_SIZE; i++) {
if (l2_offset == s->l2_cache_offsets[i]) {
/* increment the hit count */
if (++s->l2_cache_counts[i] == 0xffffffff) {
for(j = 0; j < L2_CACHE_SIZE; j++) {
s->l2_cache_counts[j] >>= 1;
}
}
return s->l2_cache + (i << s->l2_bits);
}
}
return NULL;
}
/*
* l2_load
*
......@@ -169,33 +116,11 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
uint64_t **l2_table)
{
BDRVQcowState *s = bs->opaque;
int min_index;
int ret;
/* seek if the table for the given offset is in the cache */
*l2_table = seek_l2_table(s, l2_offset);
if (*l2_table != NULL) {
return 0;
}
/* not found: load a new entry in the least used one */
min_index = l2_cache_new_entry(bs);
*l2_table = s->l2_cache + (min_index << s->l2_bits);
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
ret = bdrv_pread(bs->file, l2_offset, *l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
qcow2_l2_cache_reset(bs);
return ret;
}
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
return 0;
return ret;
}
/*
......@@ -238,7 +163,6 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index)
static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
{
BDRVQcowState *s = bs->opaque;
int min_index;
uint64_t old_l2_offset;
uint64_t *l2_table;
int64_t l2_offset;
......@@ -252,29 +176,48 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
if (l2_offset < 0) {
return l2_offset;
}
bdrv_flush(bs->file);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail;
}
/* allocate a new entry in the l2 cache */
min_index = l2_cache_new_entry(bs);
l2_table = s->l2_cache + (min_index << s->l2_bits);
ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
if (ret < 0) {
return ret;
}
l2_table = *table;
if (old_l2_offset == 0) {
/* if there was no old l2 table, clear the new table */
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
} else {
uint64_t* old_table;
/* if there was an old l2 table, read it from the disk */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
ret = bdrv_pread(bs->file, old_l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset,
(void**) &old_table);
if (ret < 0) {
goto fail;
}
memcpy(l2_table, old_table, s->cluster_size);
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
if (ret < 0) {
goto fail;
}
}
/* write the l2 table to the file */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
goto fail;
}
......@@ -286,17 +229,12 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
goto fail;
}
/* update the l2 cache entry */
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
*table = l2_table;
return 0;
fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
s->l1_table[l1_index] = old_l2_offset;
qcow2_l2_cache_reset(bs);
return ret;
}
......@@ -521,6 +459,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
&l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
}
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
nb_available = (c * s->cluster_sectors);
out:
if (nb_available > nb_needed)
......@@ -575,6 +515,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
return ret;
}
} else {
/* FIXME Order */
if (l2_offset)
qcow2_free_clusters<