block.c 171 KB
Newer Older
bellard's avatar
bellard committed
1 2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27 28
#include "block/block_int.h"
#include "block/blockjob.h"
29
#include "qemu/module.h"
30
#include "qapi/qmp/qjson.h"
31
#include "sysemu/block-backend.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
35
#include "block/qapi.h"
36
#include "qmp-commands.h"
37
#include "qemu/timer.h"
38
#include "qapi-event.h"
bellard's avatar
bellard committed
39

40
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
41 42 43
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
44
#include <sys/queue.h>
45
#ifndef __DragonFly__
bellard's avatar
bellard committed
46 47
#include <sys/disk.h>
#endif
48
#endif
bellard's avatar
bellard committed
49

50 51 52 53
#ifdef _WIN32
#include <windows.h>
#endif

54 55 56 57 58
struct BdrvDirtyBitmap {
    HBitmap *bitmap;
    QLIST_ENTRY(BdrvDirtyBitmap) list;
};

59 60
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

61
static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63
        BlockCompletionFunc *cb, void *opaque);
64
static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66
        BlockCompletionFunc *cb, void *opaque);
67 68 69 70 71 72
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
73 74
static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
75
    BdrvRequestFlags flags);
76 77
static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
78
    BdrvRequestFlags flags);
79 80 81 82 83
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                         int64_t sector_num,
                                         QEMUIOVector *qiov,
                                         int nb_sectors,
                                         BdrvRequestFlags flags,
84
                                         BlockCompletionFunc *cb,
85 86
                                         void *opaque,
                                         bool is_write);
87
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
88
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
89
    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellard's avatar
bellard committed
90

91 92
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
93

94 95 96
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);

97 98
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
99

100 101 102
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

123
/* throttling disk I/O limits */
124 125
void bdrv_set_io_limits(BlockDriverState *bs,
                        ThrottleConfig *cfg)
126
{
127
    int i;
128

129
    throttle_config(&bs->throttle_state, cfg);
130

131 132
    for (i = 0; i < 2; i++) {
        qemu_co_enter_next(&bs->throttled_reqs[i]);
133
    }
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
}

/* this function drain all the throttled IOs */
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
{
    bool drained = false;
    bool enabled = bs->io_limits_enabled;
    int i;

    bs->io_limits_enabled = false;

    for (i = 0; i < 2; i++) {
        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
            drained = true;
        }
    }

    bs->io_limits_enabled = enabled;
152

153
    return drained;
154 155
}

156
void bdrv_io_limits_disable(BlockDriverState *bs)
157
{
158
    bs->io_limits_enabled = false;
159

160 161 162
    bdrv_start_throttled_reqs(bs);

    throttle_destroy(&bs->throttle_state);
163 164
}

165
static void bdrv_throttle_read_timer_cb(void *opaque)
166
{
167 168
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[0]);
169 170
}

171
static void bdrv_throttle_write_timer_cb(void *opaque)
172
{
173 174
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[1]);
175 176
}

177 178 179 180 181
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs)
{
    assert(!bs->io_limits_enabled);
    throttle_init(&bs->throttle_state,
182
                  bdrv_get_aio_context(bs),
183 184 185 186 187 188 189 190 191 192 193 194
                  QEMU_CLOCK_VIRTUAL,
                  bdrv_throttle_read_timer_cb,
                  bdrv_throttle_write_timer_cb,
                  bs);
    bs->io_limits_enabled = true;
}

/* This function makes an IO wait if needed
 *
 * @nb_sectors: the number of sectors of the IO
 * @is_write:   is the IO a write
 */
195
static void bdrv_io_limits_intercept(BlockDriverState *bs,
196
                                     unsigned int bytes,
197
                                     bool is_write)
198
{
199 200
    /* does this io must wait */
    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
201

202 203 204 205
    /* if must wait or any request of this type throttled queue the IO */
    if (must_wait ||
        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
206 207
    }

208
    /* the IO will be executed, do the accounting */
209 210
    throttle_account(&bs->throttle_state, is_write, bytes);

211

212 213 214
    /* if the next request must wait -> do nothing */
    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
        return;
215 216
    }

217 218
    /* else queue next request for execution */
    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
219 220
}

221 222 223 224 225 226 227 228 229 230
size_t bdrv_opt_mem_align(BlockDriverState *bs)
{
    if (!bs || !bs->drv) {
        /* 4k should be on the safe side */
        return 4096;
    }

    return bs->bl.opt_mem_alignment;
}

231 232 233
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
234 235
    const char *p;

236 237 238 239 240
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
241 242 243
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
244 245
#endif

246
    return *p == ':';
247 248
}

bellard's avatar
bellard committed
249
int path_is_absolute(const char *path)
250
{
251 252
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
253
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
254
        return 1;
255 256
    }
    return (*path == '/' || *path == '\\');
257
#else
258
    return (*path == '/');
259
#endif
260 261
}

bellard's avatar
bellard committed
262 263 264 265 266 267
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
268
{
bellard's avatar
bellard committed
269 270 271 272 273 274 275 276 277 278 279 280 281
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
282 283 284 285 286 287 288 289 290
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
291 292 293 294 295 296 297 298 299 300 301 302
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
303 304 305
    }
}

306 307 308 309 310 311 312 313 314
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

315
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
316
{
317 318
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
319 320 321
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

322 323 324
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
325 326 327 328 329
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
330
    }
331

332
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
333
}
bellard's avatar
bellard committed
334

335
BlockDriverState *bdrv_new_root(void)
bellard's avatar
bellard committed
336
{
337
    BlockDriverState *bs = bdrv_new();
338 339 340 341 342 343 344 345 346 347

    QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
    return bs;
}

BlockDriverState *bdrv_new(void)
{
    BlockDriverState *bs;
    int i;

348
    bs = g_new0(BlockDriverState, 1);
349
    QLIST_INIT(&bs->dirty_bitmaps);
350 351 352
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        QLIST_INIT(&bs->op_blockers[i]);
    }
353
    bdrv_iostatus_disable(bs);
354
    notifier_list_init(&bs->close_notifiers);
355
    notifier_with_return_list_init(&bs->before_write_notifiers);
356 357
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
358
    bs->refcnt = 1;
359
    bs->aio_context = qemu_get_aio_context();
360

bellard's avatar
bellard committed
361 362 363
    return bs;
}

364 365 366 367 368
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
369 370 371
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
372 373
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
374
            return drv1;
375
        }
bellard's avatar
bellard committed
376 377 378 379
    }
    return NULL;
}

380
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
381
{
382 383 384 385 386
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
387 388 389
    };
    const char **p;

390
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
391
        return 1;               /* no whitelist, anything goes */
392
    }
393

394
    for (p = whitelist_rw; *p; p++) {
395 396 397 398
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
399 400 401 402 403 404 405
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
406 407 408
    return 0;
}

409 410
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
411 412
{
    BlockDriver *drv = bdrv_find_format(format_name);
413
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
414 415
}

416 417 418
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
419
    QemuOpts *opts;
420
    int ret;
421
    Error *err;
422 423 424 425
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
426 427 428
    Error *local_err = NULL;
    int ret;

429 430 431
    CreateCo *cco = opaque;
    assert(cco->drv);

432
    ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
433
    if (local_err) {
434 435 436
        error_propagate(&cco->err, local_err);
    }
    cco->ret = ret;
437 438
}

439
int bdrv_create(BlockDriver *drv, const char* filename,
440
                QemuOpts *opts, Error **errp)
bellard's avatar
bellard committed
441
{
442 443 444 445 446 447
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
448
        .opts = opts,
449
        .ret = NOT_DONE,
450
        .err = NULL,
451 452
    };

453
    if (!drv->bdrv_create) {
454
        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
455 456
        ret = -ENOTSUP;
        goto out;
457 458 459 460 461 462 463 464 465
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
466
            aio_poll(qemu_get_aio_context(), true);
467 468 469 470
        }
    }

    ret = cco.ret;
471
    if (ret < 0) {
472
        if (cco.err) {
473 474 475 476 477
            error_propagate(errp, cco.err);
        } else {
            error_setg_errno(errp, -ret, "Could not create image");
        }
    }
478

479 480
out:
    g_free(cco.filename);
481
    return ret;
bellard's avatar
bellard committed
482 483
}

484
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
485 486
{
    BlockDriver *drv;
487 488
    Error *local_err = NULL;
    int ret;
489

490
    drv = bdrv_find_protocol(filename, true);
491
    if (drv == NULL) {
492
        error_setg(errp, "Could not find protocol for file '%s'", filename);
493
        return -ENOENT;
494 495
    }

496
    ret = bdrv_create(drv, filename, opts, &local_err);
497
    if (local_err) {
498 499 500
        error_propagate(errp, local_err);
    }
    return ret;
501 502
}

503
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
504 505
{
    BlockDriver *drv = bs->drv;
506
    Error *local_err = NULL;
507 508 509

    memset(&bs->bl, 0, sizeof(bs->bl));

510
    if (!drv) {
511
        return;
512 513 514 515
    }

    /* Take some limits from the children as a default */
    if (bs->file) {
516 517 518 519 520
        bdrv_refresh_limits(bs->file, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
521
        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
522
        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
523 524 525
        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
    } else {
        bs->bl.opt_mem_alignment = 512;
526 527 528
    }

    if (bs->backing_hd) {
529 530 531 532 533
        bdrv_refresh_limits(bs->backing_hd, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
534 535 536
        bs->bl.opt_transfer_length =
            MAX(bs->bl.opt_transfer_length,
                bs->backing_hd->bl.opt_transfer_length);
537 538 539
        bs->bl.max_transfer_length =
            MIN_NON_ZERO(bs->bl.max_transfer_length,
                         bs->backing_hd->bl.max_transfer_length);
540 541 542
        bs->bl.opt_mem_alignment =
            MAX(bs->bl.opt_mem_alignment,
                bs->backing_hd->bl.opt_mem_alignment);
543 544 545 546
    }

    /* Then let the driver override it */
    if (drv->bdrv_refresh_limits) {
547
        drv->bdrv_refresh_limits(bs, errp);
548 549 550
    }
}

551 552 553 554 555
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
556
{
557
#ifdef _WIN32
558
    char temp_dir[MAX_PATH];
559 560 561 562 563 564
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
565
#else
566
    int fd;
567
    const char *tmpdir;
568
    tmpdir = getenv("TMPDIR");
569 570 571
    if (!tmpdir) {
        tmpdir = "/var/tmp";
    }
572 573 574
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
575
    fd = mkstemp(filename);
576 577 578 579 580
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
581 582 583
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
584
#endif
585
}
bellard's avatar
bellard committed
586

587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

609 610
BlockDriver *bdrv_find_protocol(const char *filename,
                                bool allow_protocol_prefix)
bellard's avatar
bellard committed
611 612 613
{
    BlockDriver *drv1;
    char protocol[128];
614
    int len;
bellard's avatar
bellard committed
615
    const char *p;
bellard's avatar
bellard committed
616

617 618
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

619 620 621 622 623 624 625 626 627 628 629 630
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

631
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
632
        return bdrv_find_format("file");
633
    }
634

635 636
    p = strchr(filename, ':');
    assert(p != NULL);
637 638 639 640 641
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
642
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
643
        if (drv1->protocol_name &&
644
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
645
            return drv1;
646
        }
bellard's avatar
bellard committed
647 648 649 650
    }
    return NULL;
}

651
static int find_image_format(BlockDriverState *bs, const char *filename,
652
                             BlockDriver **pdrv, Error **errp)
653
{
654
    int score, score_max;
655 656
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
657
    int ret = 0;
658

659
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
660
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
661 662
        drv = bdrv_find_format("raw");
        if (!drv) {
663
            error_setg(errp, "Could not find raw image format");
664 665 666 667
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
668
    }
669

bellard's avatar
bellard committed
670 671
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
672 673
        error_setg_errno(errp, -ret, "Could not read image for determining its "
                         "format");
674 675
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
676 677
    }

bellard's avatar
bellard committed
678
    score_max = 0;
679
    drv = NULL;
680
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
681 682 683 684 685 686
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
687
        }
bellard's avatar
bellard committed
688
    }
689
    if (!drv) {
690 691
        error_setg(errp, "Could not determine image format: No compatible "
                   "driver found");
692 693 694 695
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
696 697
}

698 699
/**
 * Set the current 'total_sectors' value
700
 * Return 0 on success, -errno on error.
701 702 703 704 705
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

706 707 708 709
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

710 711 712 713 714 715
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
716
        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
717 718 719 720 721 722
    }

    bs->total_sectors = hint;
    return 0;
}

723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

743 744 745 746 747 748 749 750 751 752 753
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
754 755
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
756 757 758 759 760 761 762 763 764 765 766 767 768 769
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

786 787 788 789 790 791 792 793 794 795
/*
 * Returns the flags that a temporary snapshot should get, based on the
 * originally requested flags (the originally requested image will have flags
 * like a backing file)
 */
static int bdrv_temp_snapshot_flags(int flags)
{
    return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
}

796 797 798 799 800 801 802 803 804 805 806 807 808 809
/*
 * Returns the flags that bs->file should get, based on the given flags for
 * the parent BDS
 */
static int bdrv_inherited_flags(int flags)
{
    /* Enable protocol handling, disable format probing for bs->file */
    flags |= BDRV_O_PROTOCOL;

    /* Our block drivers take care to send flushes and respect unmap policy,
     * so we can enable both unconditionally on lower layers. */
    flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;

    /* Clear flags that only apply to the top layer */
810
    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
811 812 813 814

    return flags;
}

815 816 817 818 819 820 821 822 823 824
/*
 * Returns the flags that bs->backing_hd should get, based on the given flags
 * for the parent BDS
 */
static int bdrv_backing_flags(int flags)
{
    /* backing files always opened read-only */
    flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);

    /* snapshot=on is handled on the top layer */
825
    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
826 827 828 829

    return flags;
}

830 831 832 833 834 835 836 837
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
838
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
839 840 841 842

    /*
     * Snapshots should be writable.
     */
843
    if (flags & BDRV_O_TEMPORARY) {
844 845 846 847 848 849
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

850 851 852
static void bdrv_assign_node_name(BlockDriverState *bs,
                                  const char *node_name,
                                  Error **errp)
853 854
{
    if (!node_name) {
855
        return;
856 857
    }

Kevin Wolf's avatar
Kevin Wolf committed
858
    /* Check for empty string or invalid characters */
859
    if (!id_wellformed(node_name)) {
Kevin Wolf's avatar
Kevin Wolf committed
860
        error_setg(errp, "Invalid node name");
861
        return;
862 863
    }

864
    /* takes care of avoiding namespaces collisions */
865
    if (blk_by_name(node_name)) {
866 867
        error_setg(errp, "node-name=%s is conflicting with a device id",
                   node_name);
868
        return;
869 870
    }

871 872 873
    /* takes care of avoiding duplicates node names */
    if (bdrv_find_node(node_name)) {
        error_setg(errp, "Duplicate node name");
874
        return;
875 876 877 878 879 880 881
    }

    /* copy node name into the bs and insert it into the graph list */
    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
}

882 883
/*
 * Common part for opening disk images and files
884 885
 *
 * Removes all processed options from *options.
886
 */
887
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
888
    QDict *options, int flags, BlockDriver *drv, Error **errp)
889 890
{
    int ret, open_flags;
891
    const char *filename;
892
    const char *node_name = NULL;
893
    Error *local_err = NULL;
894 895

    assert(drv != NULL);
896
    assert(bs->file == NULL);
897
    assert(options != NULL && bs->options != options);
898

899 900 901 902 903 904
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

905 906 907 908 909 910
    if (drv->bdrv_needs_filename && !filename) {
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
        return -EINVAL;
    }

911
    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
912

913
    node_name = qdict_get_try_str(options, "node-name");
914
    bdrv_assign_node_name(bs, node_name, &local_err);
915
    if (local_err) {
916 917
        error_propagate(errp, local_err);
        return -EINVAL;
918 919 920
    }
    qdict_del(options, "node-name");

921 922 923 924 925 926 927 928
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

929
    bs->open_flags = flags;
930
    bs->guest_block_size = 512;
931
    bs->request_alignment = 512;
932
    bs->zero_beyond_eof = true;
933 934
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);
935
    bs->growable = !!(flags & BDRV_O_PROTOCOL);
936 937

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
938 939 940 941 942
        error_setg(errp,
                   !bs->read_only && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                   drv->format_name);
943 944
        return -ENOTSUP;
    }
945

946
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
947 948 949 950 951 952 953
    if (flags & BDRV_O_COPY_ON_READ) {
        if (!bs->read_only) {
            bdrv_enable_copy_on_read(bs);
        } else {
            error_setg(errp, "Can't use copy-on-read on read-only device");
            return -EINVAL;
        }
954 955
    }

956 957 958 959 960
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
961
    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
962 963

    bs->drv = drv;
964
    bs->opaque = g_malloc0(drv->instance_size);
965

966
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
967

968 969
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
970
        assert(file == NULL);
971
        assert(!drv->bdrv_needs_filename || filename != NULL);
972
        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
973
    } else {
974
        if (file == NULL) {
975 976
            error_setg(errp, "Can't use '%s' as a block driver for the "
                       "protocol level", drv->format_name);
977 978 979
            ret = -EINVAL;
            goto free_and_fail;
        }
980
        bs->file = file;
981
        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
982 983
    }

984
    if (ret < 0) {
985
        if (local_err) {
986
            error_propagate(errp, local_err);
987 988
        } else if (bs->filename[0]) {
            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
989 990 991
        } else {
            error_setg_errno(errp, -ret, "Could not open image");
        }
992 993 994
        goto free_and_fail;
    }

995 996
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
997
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
998
        goto free_and_fail;
999
    }
1000

1001 1002 1003 1004 1005 1006 1007
    bdrv_refresh_limits(bs, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto free_and_fail;
    }

1008
    assert(bdrv_opt_mem_align(bs) != 0);
1009
    assert((bs->request_alignment != 0) || bs->sg);
1010 1011 1012
    return 0;

free_and_fail:
1013
    bs->file = NULL;
1014
    g_free(bs->opaque);
1015 1016 1017 1018 1019
    bs->opaque = NULL;
    bs->drv = NULL;
    return ret;
}

1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
static QDict *parse_json_filename(const char *filename, Error **errp)
{
    QObject *options_obj;
    QDict *options;
    int ret;

    ret = strstart(filename, "json:", &filename);
    assert(ret);

    options_obj = qobject_from_json(filename);
    if (!options_obj) {
        error_setg(errp, "Could not parse the JSON options");
        return NULL;
    }

    if (qobject_type(options_obj) != QTYPE_QDICT) {
        qobject_decref(options_obj);
        error_setg(errp, "Invalid JSON object given");
        return NULL;
    }

    options = qobject_to_qdict(options_obj);
    qdict_flatten(options);

    return options;
}

Kevin Wolf's avatar
Kevin Wolf committed
1047
/*
1048 1049
 * Fills in default options for opening images and converts the legacy
 * filename/flags pair to option QDict entries.
Kevin Wolf's avatar
Kevin Wolf committed
1050
 */
1051
static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
1052
                             BlockDriver *drv, Error **errp)
bellard's avatar
bellard committed
1053
{
1054
    const char *filename = *pfilename;
1055
    const char *drvname;
1056
    bool protocol = flags & BDRV_O_PROTOCOL;
1057
    bool parse_filename = false;
1058
    Error *local_err = NULL;
bellard's avatar
bellard committed
1059

1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
    /* Parse json: pseudo-protocol */
    if (filename && g_str_has_prefix(filename, "json:")) {
        QDict *json_options = parse_json_filename(filename, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return -EINVAL;
        }

        /* Options given in the filename have lower priority than options
         * specified directly */
        qdict_join(*options, json_options, false);
        QDECREF(json_options);
        *pfilename = filename = NULL;
    }

1075
    /* Fetch the file name from the options QDict if necessary */
1076
    if (protocol && filename) {
1077 1078 1079 1080 1081 1082 1083 1084
        if (!qdict_haskey(*options, "filename")) {
            qdict_put(*options, "filename", qstring_from_str(filename));
            parse_filename = true;
        } else {
            error_setg(errp, "Can't specify 'file' and 'filename' options at "
                             "the same time");
            return -EINVAL;
        }
1085 1086
    }

1087
    /* Find the right block driver */
1088
    filename = qdict_get_try_str(*options, "filename");
1089
    drvname = qdict_get_try_str(*options, "driver");
1090

1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
    if (drv) {
        if (drvname) {
            error_setg(errp, "Driver specified twice");
            return -EINVAL;
        }
        drvname = drv->format_name;
        qdict_put(*options, "driver", qstring_from_str(drvname));
    } else {
        if (!drvname && protocol) {
            if (filename) {
                drv = bdrv_find_protocol(filename, parse_filename);
                if (!drv) {
                    error_setg(errp, "Unknown protocol");
                    return -EINVAL;
                }

                drvname = drv->format_name;
                qdict_put(*options, "driver", qstring_from_str(drvname));
            } else {
                error_setg(errp, "Must specify either driver or file");
1111 1112
                return -EINVAL;
            }
1113 1114 1115 1116 1117 1118
        } else if (drvname) {
            drv = bdrv_find_format(drvname);
            if (!drv) {
                error_setg(errp, "Unknown driver '%s'", drvname);
                return -ENOENT;
            }
1119
        }
1120 1121
    }

1122
    assert(drv || !protocol);
1123

1124
    /* Driver-specific filename parsing */
1125
    if (drv && drv->bdrv_parse_filename && parse_filename) {
1126
        drv->bdrv_parse_filename(filename, *options, &local_err);
1127
        if (local_err) {
1128
            error_propagate(errp, local_err);
1129
            return -EINVAL;
1130
        }
1131 1132 1133 1134

        if (!drv->bdrv_needs_filename) {
            qdict_del(*options, "filename");
        }
1135 1136
    }

1137 1138 1139
    return 0;
}

1140 1141 1142
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
{

1143 1144 1145 1146 1147 1148
    if (bs->backing_hd) {
        assert(bs->backing_blocker);
        bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
    } else if (backing_hd) {
        error_setg(&bs->backing_blocker,
                   "device is used as backing hd of '%s'",
1149
                   bdrv_get_device_name(bs));
1150 1151
    }

1152 1153
    bs->backing_hd = backing_hd;
    if (!backing_hd) {
1154 1155
        error_free(bs->backing_blocker);
        bs->backing_blocker = NULL;
1156 1157 1158 1159 1160 1161
        goto out;
    }
    bs->open_flags &= ~BDRV_O_NO_BACKING;
    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
            backing_hd->drv ? backing_hd->drv->format_name : "");
1162 1163 1164 1165 1166

    bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
    /* Otherwise we won't be able to commit due to check in bdrv_commit */
    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
                    bs->backing_blocker);
1167
out:
1168
    bdrv_refresh_limits(bs, NULL);
1169 1170
}

1171 1172 1173 1174 1175 1176 1177 1178
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
1179
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1180
{
1181
    char *backing_filename = g_malloc0(PATH_MAX);
1182
    int ret = 0;
1183
    BlockDriver *back_drv = NULL;
1184
    BlockDriverState *backing_hd;
1185
    Error *local_err = NULL;
1186 1187

    if (bs->backing_hd != NULL) {
1188
        QDECREF(options);
1189
        goto free_exit;
1190 1191
    }

1192 1193 1194 1195 1196
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

1197
    bs->open_flags &= ~BDRV_O_NO_BACKING;
1198 1199 1200
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1201
        QDECREF(options);
1202
        goto free_exit;
1203
    } else {
1204
        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
1205 1206
    }

1207 1208 1209 1210 1211 1212 1213
    if (!bs->drv || !bs->drv->supports_backing) {
        ret = -EINVAL;
        error_setg(errp, "Driver doesn't support backing files");
        QDECREF(options);
        goto free_exit;
    }

1214
    backing_hd = bdrv_new();
1215

1216 1217 1218 1219
    if (bs->backing_format[0] != '\0') {
        back_drv = bdrv_find_format(bs->backing_format);
    }

1220
    assert(bs->backing_hd == NULL);
1221
    ret = bdrv_open(&backing_hd,
1222
                    *backing_filename ? backing_filename : NULL, NULL, options,
1223
                    bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
1224
    if (ret < 0) {
1225 1226
        bdrv_unref(backing_hd);
        backing_hd = NULL;
1227
        bs->open_flags |= BDRV_O_NO_BACKING;
1228 1229 1230
        error_setg(errp, "Could not open backing file: %s",
                   error_get_pretty(local_err));
        error_free(local_err);
1231
        goto free_exit;
1232
    }
1233
    bdrv_set_backing_hd(bs, backing_hd);
1234

1235 1236 1237
free_exit:
    g_free(backing_filename);
    return ret;
1238 1239
}

Max Reitz's avatar
Max Reitz committed
1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
/*
 * Opens a disk image whose options are given as BlockdevRef in another block
 * device's options.
 *
 * If allow_none is true, no image will be opened if filename is false and no
 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
 *
 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
 * itself, all options starting with "${bdref_key}." are considered part of the
 * BlockdevRef.
 *
 * The BlockdevRef will be removed from the options QDict.
1253 1254
 *
 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitz's avatar
Max Reitz committed
1255 1256 1257
 */
int bdrv_open_image(BlockDriverState **pbs, const char *filename,
                    QDict *options, const char *bdref_key, int flags,
1258
                    bool allow_none, Error **errp)
Max Reitz's avatar
Max Reitz committed
1259 1260 1261 1262 1263 1264
{
    QDict *image_options;
    int ret;
    char *bdref_key_dot;
    const char *reference;

1265 1266 1267
    assert(pbs);
    assert(*pbs == NULL);

Max Reitz's avatar
Max Reitz committed
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280
    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
    g_free(bdref_key_dot);

    reference = qdict_get_try_str(options, bdref_key);
    if (!filename && !reference && !qdict_size(image_options)) {
        if (allow_none) {
            ret = 0;
        } else {
            error_setg(errp, "A block device must be specified for \"%s\"",
                       bdref_key);
            ret = -EINVAL;
        }
1281
        QDECREF(image_options);
Max Reitz's avatar
Max Reitz committed
1282 1283 1284
        goto done;
    }

1285
    ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitz's avatar
Max Reitz committed
1286 1287 1288 1289 1290 1291

done:
    qdict_del(options, bdref_key);
    return ret;
}

1292
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1293 1294
{
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1295
    char *tmp_filename = g_malloc0(PATH_MAX + 1);
1296 1297
    int64_t total_size;
    BlockDriver *bdrv_qcow2;
1298
    QemuOpts *opts = NULL;
1299 1300 1301 1302 1303 1304 1305 1306 1307
    QDict *snapshot_options;
    BlockDriverState *bs_snapshot;
    Error *local_err;
    int ret;

    /* if snapshot, we create a temporary backing file and open it
       instead of opening 'filename' directly */

    /* Get the required size from the image */
1308 1309
    total_size = bdrv_getlength(bs);
    if (total_size < 0) {
1310
        ret = total_size;
1311
        error_setg_errno(errp, -total_size, "Could not get image size");
1312
        goto out;
1313
    }
1314 1315

    /* Create the temporary image */