Commit 964737ea authored by Anthony Liguori's avatar Anthony Liguori

Merge remote-tracking branch 'stefanha/block' into staging

# By Paolo Bonzini (21) and others
# Via Stefan Hajnoczi
* stefanha/block: (42 commits)
  qemu-iotests: Fixed test case 026
  qemu-iotests: Whitespace cleanup
  dataplane: Fix startup race.
  block: look for zero blocks in bs->file
  block: add default get_block_status implementation for protocols
  raw-posix: report unwritten extents as zero
  raw-posix: return get_block_status data and flags
  docs, qapi: document qemu-img map
  qemu-img: add a "map" subcommand
  block: return BDRV_BLOCK_ZERO past end of backing file
  block: use bdrv_has_zero_init to return BDRV_BLOCK_ZERO
  block: return get_block_status data and flags for formats
  block: define get_block_status return value
  block: introduce bdrv_get_block_status API
  block: make bdrv_has_zero_init return false for copy-on-write-images
  qemu-img: always probe the input image for allocated sectors
  block: expect errors from bdrv_co_is_allocated
  block: remove bdrv_is_allocated_above/bdrv_co_is_allocated_above distinction
  block: do not use ->total_sectors in bdrv_co_is_allocated
  block: make bdrv_co_is_allocated static
  ...

Message-id: 1378481953-23099-1-git-send-email-stefanha@redhat.com
Signed-off-by: default avatarAnthony Liguori <anthony@codemonkey.ws>
parents ce2b6941 8f94b077
......@@ -18,6 +18,28 @@ Example:
"data": { "actual": 944766976 },
"timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
BLOCK_IMAGE_CORRUPTED
---------------------
Emitted when a disk image is being marked corrupt.
Data:
- "device": Device name (json-string)
- "msg": Informative message (e.g., reason for the corruption) (json-string)
- "offset": If the corruption resulted from an image access, this is the access
offset into the image (json-int)
- "size": If the corruption resulted from an image access, this is the access
size (json-int)
Example:
{ "event": "BLOCK_IMAGE_CORRUPTED",
"data": { "device": "ide0-hd0",
"msg": "Prevented active L1 table overwrite", "offset": 196608,
"size": 65536 },
"timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
BLOCK_IO_ERROR
--------------
......
......@@ -336,8 +336,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
bmds->completed_sectors = 0;
bmds->shared_base = block_mig_state.shared_base;
alloc_aio_bitmap(bmds);
drive_get_ref(drive_get_by_blockdev(bs));
bdrv_set_in_use(bs, 1);
bdrv_ref(bs);
block_mig_state.total_sector_sum += sectors;
......@@ -575,7 +575,7 @@ static void blk_mig_cleanup(void)
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
bdrv_set_in_use(bmds->bs, 0);
drive_put_ref(drive_get_by_blockdev(bmds->bs));
bdrv_unref(bmds->bs);
g_free(bmds->aio_bitmap);
g_free(bmds);
}
......
This diff is collapsed.
......@@ -289,14 +289,14 @@ static void coroutine_fn backup_run(void *opaque)
* backing file. */
for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
/* bdrv_co_is_allocated() only returns true/false based
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
* For that reason we must verify each sector in the
* backup cluster length. We end up copying more than
* needed but at some point that is always the case. */
alloced =
bdrv_co_is_allocated(bs,
bdrv_is_allocated(bs,
start * BACKUP_SECTORS_PER_CLUSTER + i,
BACKUP_SECTORS_PER_CLUSTER - i, &n);
i += n;
......@@ -338,7 +338,7 @@ static void coroutine_fn backup_run(void *opaque)
hbitmap_free(job->bitmap);
bdrv_iostatus_disable(target);
bdrv_delete(target);
bdrv_unref(target);
block_job_completed(&job->common, ret);
}
......
......@@ -155,7 +155,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags)
s->test_file = bdrv_new("");
ret = bdrv_open(s->test_file, filename, NULL, flags, NULL);
if (ret < 0) {
bdrv_delete(s->test_file);
bdrv_unref(s->test_file);
s->test_file = NULL;
goto fail;
}
......@@ -169,7 +169,7 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
bdrv_delete(s->test_file);
bdrv_unref(s->test_file);
s->test_file = NULL;
}
......
......@@ -108,9 +108,9 @@ wait:
break;
}
/* Copy if allocated above the base */
ret = bdrv_co_is_allocated_above(top, base, sector_num,
COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
&n);
ret = bdrv_is_allocated_above(top, base, sector_num,
COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
&n);
copy = (ret == 1);
trace_commit_one_iteration(s, sector_num, n, ret);
if (copy) {
......
......@@ -106,7 +106,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags)
* XXX(hch): right now these functions are extremely inefficient.
* We should just read the whole bitmap we'll need in one go instead.
*/
static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first)
{
uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
uint8_t bitmap;
......@@ -117,27 +117,52 @@ static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
return ret;
}
if (bitmap & (1 << (bitnum % 8))) {
return 0;
}
if (*first) {
ret = bdrv_flush(bs->file);
if (ret < 0) {
return ret;
}
*first = false;
}
bitmap |= (1 << (bitnum % 8));
ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
if (ret < 0) {
return ret;
}
return 0;
}
static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
#define BITS_PER_BITMAP_SECTOR (512 * 8)
/* Cannot use bitmap.c on big-endian machines. */
static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
{
uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
uint8_t bitmap;
int ret;
return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
}
ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
if (ret < 0) {
return ret;
static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
{
int streak_value = value ? 0xFF : 0;
int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
int bitnum = start;
while (bitnum < last) {
if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
bitnum += 8;
continue;
}
if (cow_test_bit(bitnum, bitmap) == value) {
bitnum++;
continue;
}
break;
}
return !!(bitmap & (1 << (bitnum % 8)));
return MIN(bitnum, last) - start;
}
/* Return true if first block has been changed (ie. current version is
......@@ -146,24 +171,33 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *num_same)
{
int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
uint8_t bitmap[BDRV_SECTOR_SIZE];
int ret;
int changed;
if (nb_sectors == 0) {
*num_same = nb_sectors;
return 0;
ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
if (ret < 0) {
return ret;
}
changed = is_bit_set(bs, sector_num);
if (changed < 0) {
return 0; /* XXX: how to return I/O errors? */
}
bitnum &= BITS_PER_BITMAP_SECTOR - 1;
changed = cow_test_bit(bitnum, bitmap);
*num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors);
return changed;
}
for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
if (is_bit_set(bs, sector_num + *num_same) != changed)
break;
static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *num_same)
{
BDRVCowState *s = bs->opaque;
int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
if (ret < 0) {
return ret;
}
return changed;
return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
}
static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
......@@ -171,9 +205,10 @@ static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
{
int error = 0;
int i;
bool first = true;
for (i = 0; i < nb_sectors; i++) {
error = cow_set_bit(bs, sector_num + i);
error = cow_set_bit(bs, sector_num + i, &first);
if (error) {
break;
}
......@@ -189,7 +224,11 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
int ret, n;
while (nb_sectors > 0) {
if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
if (ret < 0) {
return ret;
}
if (ret) {
ret = bdrv_pread(bs->file,
s->cow_sectors_offset + sector_num * 512,
buf, n * 512);
......@@ -314,7 +353,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
}
exit:
bdrv_delete(cow_bs);
bdrv_unref(cow_bs);
return ret;
}
......@@ -344,7 +383,7 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
.bdrv_co_is_allocated = cow_co_is_allocated,
.bdrv_co_get_block_status = cow_co_get_block_status,
.create_options = cow_create_options,
};
......
......@@ -1241,11 +1241,11 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
{
int ret = 0;
int64_t total_size = 0;
BlockDriverState bs;
BlockDriverState *bs;
IscsiLun *iscsilun = NULL;
QDict *bs_options;
memset(&bs, 0, sizeof(BlockDriverState));
bs = bdrv_new("");
/* Read out options */
while (options && options->name) {
......@@ -1255,12 +1255,12 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
options++;
}
bs.opaque = g_malloc0(sizeof(struct IscsiLun));
iscsilun = bs.opaque;
bs->opaque = g_malloc0(sizeof(struct IscsiLun));
iscsilun = bs->opaque;
bs_options = qdict_new();
qdict_put(bs_options, "filename", qstring_from_str(filename));
ret = iscsi_open(&bs, bs_options, 0);
ret = iscsi_open(bs, bs_options, 0);
QDECREF(bs_options);
if (ret != 0) {
......@@ -1274,7 +1274,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
ret = -ENODEV;
goto out;
}
if (bs.total_sectors < total_size) {
if (bs->total_sectors < total_size) {
ret = -ENOSPC;
goto out;
}
......@@ -1284,7 +1284,9 @@ out:
if (iscsilun->iscsi != NULL) {
iscsi_destroy_context(iscsilun->iscsi);
}
g_free(bs.opaque);
g_free(bs->opaque);
bs->opaque = NULL;
bdrv_unref(bs);
return ret;
}
......
......@@ -338,8 +338,8 @@ static void coroutine_fn mirror_run(void *opaque)
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
for (sector_num = 0; sector_num < end; ) {
int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
ret = bdrv_co_is_allocated_above(bs, base,
sector_num, next - sector_num, &n);
ret = bdrv_is_allocated_above(bs, base,
sector_num, next - sector_num, &n);
if (ret < 0) {
goto immediate_exit;
......@@ -480,7 +480,7 @@ immediate_exit:
bdrv_swap(s->target, s->common.bs);
}
bdrv_close(s->target);
bdrv_delete(s->target);
bdrv_unref(s->target);
block_job_completed(&s->common, ret);
}
......
......@@ -223,18 +223,44 @@ void bdrv_query_info(BlockDriverState *bs,
info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
if (bs->io_limits_enabled) {
info->inserted->bps =
bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
info->inserted->bps_rd =
bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
info->inserted->bps_wr =
bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
info->inserted->iops =
bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
info->inserted->iops_rd =
bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
info->inserted->iops_wr =
bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
ThrottleConfig cfg;
throttle_get_config(&bs->throttle_state, &cfg);
info->inserted->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
info->inserted->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg;
info->inserted->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg;
info->inserted->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
info->inserted->has_bps_max =
cfg.buckets[THROTTLE_BPS_TOTAL].max;
info->inserted->bps_max =
cfg.buckets[THROTTLE_BPS_TOTAL].max;
info->inserted->has_bps_rd_max =
cfg.buckets[THROTTLE_BPS_READ].max;
info->inserted->bps_rd_max =
cfg.buckets[THROTTLE_BPS_READ].max;
info->inserted->has_bps_wr_max =
cfg.buckets[THROTTLE_BPS_WRITE].max;
info->inserted->bps_wr_max =
cfg.buckets[THROTTLE_BPS_WRITE].max;
info->inserted->has_iops_max =
cfg.buckets[THROTTLE_OPS_TOTAL].max;
info->inserted->iops_max =
cfg.buckets[THROTTLE_OPS_TOTAL].max;
info->inserted->has_iops_rd_max =
cfg.buckets[THROTTLE_OPS_READ].max;
info->inserted->iops_rd_max =
cfg.buckets[THROTTLE_OPS_READ].max;
info->inserted->has_iops_wr_max =
cfg.buckets[THROTTLE_OPS_WRITE].max;
info->inserted->iops_wr_max =
cfg.buckets[THROTTLE_OPS_WRITE].max;
info->inserted->has_iops_size = cfg.op_size;
info->inserted->iops_size = cfg.op_size;
}
bs0 = bs;
......
......@@ -395,7 +395,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
......@@ -410,7 +410,14 @@ static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
if (n > nb_sectors)
n = nb_sectors;
*pnum = n;
return (cluster_offset != 0);
if (!cluster_offset) {
return 0;
}
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
return BDRV_BLOCK_DATA;
}
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
......@@ -751,7 +758,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
g_free(tmp);
ret = 0;
exit:
bdrv_delete(qcow_bs);
bdrv_unref(qcow_bs);
return ret;
}
......@@ -896,7 +903,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
.bdrv_co_is_allocated = qcow_co_is_allocated,
.bdrv_co_get_block_status = qcow_co_get_block_status,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
......
......@@ -688,24 +688,34 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
return 0;
}
static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_offset;
int ret;
int index_in_cluster, ret;
int64_t status = 0;
*pnum = nb_sectors;
/* FIXME We can get errors here, but the bdrv_co_is_allocated interface
* can't pass them on today */
qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
*pnum = 0;
return ret;
}
return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
!s->crypt_method) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
if (ret == QCOW2_CLUSTER_ZERO) {
status |= BDRV_BLOCK_ZERO;
} else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
status |= BDRV_BLOCK_DATA;
}
return status;
}
/* handle reading after the end of the backing file */
......@@ -1452,7 +1462,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
ret = 0;
out:
bdrv_delete(bs);
bdrv_unref(bs);
return ret;
}
......@@ -1868,7 +1878,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_reopen_prepare = qcow2_reopen_prepare,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_is_allocated = qcow2_co_is_allocated,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,
......
......@@ -599,7 +599,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
ret = 0; /* success */
out:
g_free(l1_table);
bdrv_delete(bs);
bdrv_unref(bs);
return ret;
}
......@@ -652,45 +652,66 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
}
typedef struct {
BlockDriverState *bs;
Coroutine *co;
int is_allocated;
uint64_t pos;
int64_t status;
int *pnum;
} QEDIsAllocatedCB;
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
{
QEDIsAllocatedCB *cb = opaque;
BDRVQEDState *s = cb->bs->opaque;
*cb->pnum = len / BDRV_SECTOR_SIZE;
cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
switch (ret) {
case QED_CLUSTER_FOUND:
offset |= qed_offset_into_cluster(s, cb->pos);
cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
break;
case QED_CLUSTER_ZERO:
cb->status = BDRV_BLOCK_ZERO;
break;
case QED_CLUSTER_L2:
case QED_CLUSTER_L1:
cb->status = 0;
break;
default:
assert(ret < 0);
cb->status = ret;
break;
}
if (cb->co) {
qemu_coroutine_enter(cb->co, NULL);
}
}
static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
BDRVQEDState *s = bs->opaque;
uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
QEDIsAllocatedCB cb = {
.is_allocated = -1,
.bs = bs,
.pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
.status = BDRV_BLOCK_OFFSET_MASK,
.pnum = pnum,
};
QEDRequest request = { .l2_table = NULL };
qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
/* Now sleep if the callback wasn't invoked immediately */
while (cb.is_allocated == -1) {
while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
cb.co = qemu_coroutine_self();
qemu_coroutine_yield();
}
qed_unref_l2_cache_entry(request.l2_table);
return cb.is_allocated;
return cb.status;
}
static int bdrv_qed_make_empty(BlockDriverState *bs)
......@@ -1575,7 +1596,7 @@ static BlockDriver bdrv_qed = {
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
......
......@@ -1084,12 +1084,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
*/
static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
off_t start, data, hole;
int ret;
int64_t ret;
ret = fd_open(bs);