All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

block.c 171 KB
Newer Older
bellard's avatar
bellard committed
1 2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27 28
#include "block/block_int.h"
#include "block/blockjob.h"
29
#include "qemu/module.h"
30
#include "qapi/qmp/qjson.h"
31
#include "sysemu/block-backend.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
35
#include "block/qapi.h"
Luiz Capitulino's avatar
Luiz Capitulino committed
36
#include "qmp-commands.h"
37
#include "qemu/timer.h"
38
#include "qapi-event.h"
bellard's avatar
bellard committed
39

Juan Quintela's avatar
Juan Quintela committed
40
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
41 42 43
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
Blue Swirl's avatar
Blue Swirl committed
44
#include <sys/queue.h>
45
#ifndef __DragonFly__
bellard's avatar
bellard committed
46 47
#include <sys/disk.h>
#endif
48
#endif
bellard's avatar
bellard committed
49

50 51 52 53
#ifdef _WIN32
#include <windows.h>
#endif

Fam Zheng's avatar
Fam Zheng committed
54 55 56 57 58
struct BdrvDirtyBitmap {
    HBitmap *bitmap;
    QLIST_ENTRY(BdrvDirtyBitmap) list;
};

59 60
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

61
static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63
        BlockCompletionFunc *cb, void *opaque);
64
static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66
        BlockCompletionFunc *cb, void *opaque);
67 68 69 70 71 72
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
73 74
static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
75
    BdrvRequestFlags flags);
76 77
static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
78
    BdrvRequestFlags flags);
79 80 81 82 83
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                         int64_t sector_num,
                                         QEMUIOVector *qiov,
                                         int nb_sectors,
                                         BdrvRequestFlags flags,
84
                                         BlockCompletionFunc *cb,
85 86
                                         void *opaque,
                                         bool is_write);
87
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
88
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
89
    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellard's avatar
bellard committed
90

91 92
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
93

94 95 96
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);

97 98
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
99

100 101 102
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

123
/* throttling disk I/O limits */
124 125
void bdrv_set_io_limits(BlockDriverState *bs,
                        ThrottleConfig *cfg)
126
{
127
    int i;
128

129
    throttle_config(&bs->throttle_state, cfg);
130

131 132
    for (i = 0; i < 2; i++) {
        qemu_co_enter_next(&bs->throttled_reqs[i]);
133
    }
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
}

/* this function drain all the throttled IOs */
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
{
    bool drained = false;
    bool enabled = bs->io_limits_enabled;
    int i;

    bs->io_limits_enabled = false;

    for (i = 0; i < 2; i++) {
        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
            drained = true;
        }
    }

    bs->io_limits_enabled = enabled;
152

153
    return drained;
154 155
}

156
void bdrv_io_limits_disable(BlockDriverState *bs)
157
{
158
    bs->io_limits_enabled = false;
159

160 161 162
    bdrv_start_throttled_reqs(bs);

    throttle_destroy(&bs->throttle_state);
163 164
}

165
static void bdrv_throttle_read_timer_cb(void *opaque)
166
{
167 168
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[0]);
169 170
}

171
static void bdrv_throttle_write_timer_cb(void *opaque)
172
{
173 174
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[1]);
175 176
}

177 178 179 180 181
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs)
{
    assert(!bs->io_limits_enabled);
    throttle_init(&bs->throttle_state,
182
                  bdrv_get_aio_context(bs),
183 184 185 186 187 188 189 190 191 192 193 194
                  QEMU_CLOCK_VIRTUAL,
                  bdrv_throttle_read_timer_cb,
                  bdrv_throttle_write_timer_cb,
                  bs);
    bs->io_limits_enabled = true;
}

/* This function makes an IO wait if needed
 *
 * @nb_sectors: the number of sectors of the IO
 * @is_write:   is the IO a write
 */
195
static void bdrv_io_limits_intercept(BlockDriverState *bs,
196
                                     unsigned int bytes,
197
                                     bool is_write)
198
{
199 200
    /* does this io must wait */
    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
201

202 203 204 205
    /* if must wait or any request of this type throttled queue the IO */
    if (must_wait ||
        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
206 207
    }

208
    /* the IO will be executed, do the accounting */
209 210
    throttle_account(&bs->throttle_state, is_write, bytes);

211

212 213 214
    /* if the next request must wait -> do nothing */
    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
        return;
215 216
    }

217 218
    /* else queue next request for execution */
    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
219 220
}

221 222 223 224 225 226 227 228 229 230
size_t bdrv_opt_mem_align(BlockDriverState *bs)
{
    if (!bs || !bs->drv) {
        /* 4k should be on the safe side */
        return 4096;
    }

    return bs->bl.opt_mem_alignment;
}

231 232 233
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
234 235
    const char *p;

236 237 238 239 240
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
241 242 243
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
244 245
#endif

246
    return *p == ':';
247 248
}

bellard's avatar
bellard committed
249
int path_is_absolute(const char *path)
250
{
bellard's avatar
bellard committed
251 252
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
253
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard's avatar
bellard committed
254
        return 1;
255 256
    }
    return (*path == '/' || *path == '\\');
257
#else
258
    return (*path == '/');
259
#endif
260 261
}

bellard's avatar
bellard committed
262 263 264 265 266 267
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
268
{
bellard's avatar
bellard committed
269 270 271 272 273 274 275 276 277 278 279 280 281
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
282 283 284 285 286 287 288 289 290
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
291 292 293 294 295 296 297 298 299 300 301 302
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
303 304 305
    }
}

306 307 308 309 310 311 312 313 314
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

315
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
316
{
317 318
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
319 320 321
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

322 323 324
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
325 326 327 328 329
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
330
    }
331

332
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
333
}
bellard's avatar
bellard committed
334

335
BlockDriverState *bdrv_new_root(void)
bellard's avatar
bellard committed
336
{
337
    BlockDriverState *bs = bdrv_new();
338 339 340 341 342 343 344 345 346 347

    QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
    return bs;
}

BlockDriverState *bdrv_new(void)
{
    BlockDriverState *bs;
    int i;

348
    bs = g_new0(BlockDriverState, 1);
Fam Zheng's avatar
Fam Zheng committed
349
    QLIST_INIT(&bs->dirty_bitmaps);
350 351 352
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        QLIST_INIT(&bs->op_blockers[i]);
    }
353
    bdrv_iostatus_disable(bs);
Paolo Bonzini's avatar
Paolo Bonzini committed
354
    notifier_list_init(&bs->close_notifiers);
355
    notifier_with_return_list_init(&bs->before_write_notifiers);
356 357
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
358
    bs->refcnt = 1;
359
    bs->aio_context = qemu_get_aio_context();
Paolo Bonzini's avatar
Paolo Bonzini committed
360

bellard's avatar
bellard committed
361 362 363
    return bs;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
364 365 366 367 368
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
369 370 371
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
372 373
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
374
            return drv1;
375
        }
bellard's avatar
bellard committed
376 377 378 379
    }
    return NULL;
}

380
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
381
{
382 383 384 385 386
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
387 388 389
    };
    const char **p;

390
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
391
        return 1;               /* no whitelist, anything goes */
392
    }
393

394
    for (p = whitelist_rw; *p; p++) {
395 396 397 398
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
399 400 401 402 403 404 405
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
406 407 408
    return 0;
}

409 410
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
411 412
{
    BlockDriver *drv = bdrv_find_format(format_name);
413
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
414 415
}

416 417 418
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
419
    QemuOpts *opts;
420
    int ret;
421
    Error *err;
422 423 424 425
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
426 427 428
    Error *local_err = NULL;
    int ret;

429 430 431
    CreateCo *cco = opaque;
    assert(cco->drv);

Chunyan Liu's avatar
Chunyan Liu committed
432
    ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
433
    if (local_err) {
434 435 436
        error_propagate(&cco->err, local_err);
    }
    cco->ret = ret;
437 438
}

439
int bdrv_create(BlockDriver *drv, const char* filename,
440
                QemuOpts *opts, Error **errp)
bellard's avatar
bellard committed
441
{
442 443 444 445 446 447
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
448
        .opts = opts,
449
        .ret = NOT_DONE,
450
        .err = NULL,
451 452
    };

Chunyan Liu's avatar
Chunyan Liu committed
453
    if (!drv->bdrv_create) {
454
        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
455 456
        ret = -ENOTSUP;
        goto out;
457 458 459 460 461 462 463 464 465
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
466
            aio_poll(qemu_get_aio_context(), true);
467 468 469 470
        }
    }

    ret = cco.ret;
471
    if (ret < 0) {
472
        if (cco.err) {
473 474 475 476 477
            error_propagate(errp, cco.err);
        } else {
            error_setg_errno(errp, -ret, "Could not create image");
        }
    }
478

479 480
out:
    g_free(cco.filename);
481
    return ret;
bellard's avatar
bellard committed
482 483
}

Chunyan Liu's avatar
Chunyan Liu committed
484
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
485 486
{
    BlockDriver *drv;
487 488
    Error *local_err = NULL;
    int ret;
489

490
    drv = bdrv_find_protocol(filename, true);
491
    if (drv == NULL) {
492
        error_setg(errp, "Could not find protocol for file '%s'", filename);
493
        return -ENOENT;
494 495
    }

Chunyan Liu's avatar
Chunyan Liu committed
496
    ret = bdrv_create(drv, filename, opts, &local_err);
497
    if (local_err) {
498 499 500
        error_propagate(errp, local_err);
    }
    return ret;
501 502
}

503
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
504 505
{
    BlockDriver *drv = bs->drv;
506
    Error *local_err = NULL;
507 508 509

    memset(&bs->bl, 0, sizeof(bs->bl));

510
    if (!drv) {
511
        return;
512 513 514 515
    }

    /* Take some limits from the children as a default */
    if (bs->file) {
516 517 518 519 520
        bdrv_refresh_limits(bs->file, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
521
        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
522
        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
523 524 525
        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
    } else {
        bs->bl.opt_mem_alignment = 512;
526 527 528
    }

    if (bs->backing_hd) {
529 530 531 532 533
        bdrv_refresh_limits(bs->backing_hd, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
534 535 536
        bs->bl.opt_transfer_length =
            MAX(bs->bl.opt_transfer_length,
                bs->backing_hd->bl.opt_transfer_length);
537 538 539
        bs->bl.max_transfer_length =
            MIN_NON_ZERO(bs->bl.max_transfer_length,
                         bs->backing_hd->bl.max_transfer_length);
540 541 542
        bs->bl.opt_mem_alignment =
            MAX(bs->bl.opt_mem_alignment,
                bs->backing_hd->bl.opt_mem_alignment);
543 544 545 546
    }

    /* Then let the driver override it */
    if (drv->bdrv_refresh_limits) {
547
        drv->bdrv_refresh_limits(bs, errp);
548 549 550
    }
}

551 552 553 554 555
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
556
{
557
#ifdef _WIN32
558
    char temp_dir[MAX_PATH];
559 560 561 562 563 564
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
565
#else
bellard's avatar
bellard committed
566
    int fd;
567
    const char *tmpdir;
aurel32's avatar
aurel32 committed
568
    tmpdir = getenv("TMPDIR");
569 570 571
    if (!tmpdir) {
        tmpdir = "/var/tmp";
    }
572 573 574
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
575
    fd = mkstemp(filename);
576 577 578 579 580
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
581 582 583
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
584
#endif
585
}
bellard's avatar
bellard committed
586

587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

609 610
BlockDriver *bdrv_find_protocol(const char *filename,
                                bool allow_protocol_prefix)
bellard's avatar
bellard committed
611 612 613
{
    BlockDriver *drv1;
    char protocol[128];
614
    int len;
bellard's avatar
bellard committed
615
    const char *p;
bellard's avatar
bellard committed
616

617 618
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

619 620 621 622 623 624 625 626 627 628 629 630
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

631
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
632
        return bdrv_find_format("file");
633
    }
634

635 636
    p = strchr(filename, ':');
    assert(p != NULL);
637 638 639 640 641
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
642
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
643
        if (drv1->protocol_name &&
644
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
645
            return drv1;
646
        }
bellard's avatar
bellard committed
647 648 649 650
    }
    return NULL;
}

651
static int find_image_format(BlockDriverState *bs, const char *filename,
652
                             BlockDriver **pdrv, Error **errp)
653
{
654
    int score, score_max;
655 656
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
657
    int ret = 0;
658

659
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
660
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
661 662
        drv = bdrv_find_format("raw");
        if (!drv) {
663
            error_setg(errp, "Could not find raw image format");
664 665 666 667
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
668
    }
669

bellard's avatar
bellard committed
670 671
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
672 673
        error_setg_errno(errp, -ret, "Could not read image for determining its "
                         "format");
674 675
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
676 677
    }

bellard's avatar
bellard committed
678
    score_max = 0;
679
    drv = NULL;
680
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
681 682 683 684 685 686
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
687
        }
bellard's avatar
bellard committed
688
    }
689
    if (!drv) {
690 691
        error_setg(errp, "Could not determine image format: No compatible "
                   "driver found");
692 693 694 695
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
696 697
}

698 699
/**
 * Set the current 'total_sectors' value
700
 * Return 0 on success, -errno on error.
701 702 703 704 705
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

706 707 708 709
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

710 711 712 713 714 715
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
Fam Zheng's avatar
Fam Zheng committed
716
        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
717 718 719 720 721 722
    }

    bs->total_sectors = hint;
    return 0;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

743 744 745 746 747 748 749 750 751 752 753
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
754 755
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
756 757 758 759 760 761 762 763 764 765 766 767 768 769
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

786 787 788 789 790 791 792 793 794 795
/*
 * Returns the flags that a temporary snapshot should get, based on the
 * originally requested flags (the originally requested image will have flags
 * like a backing file)
 */
static int bdrv_temp_snapshot_flags(int flags)
{
    return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
}

796 797 798 799 800 801 802 803 804 805 806 807 808 809
/*
 * Returns the flags that bs->file should get, based on the given flags for
 * the parent BDS
 */
static int bdrv_inherited_flags(int flags)
{
    /* Enable protocol handling, disable format probing for bs->file */
    flags |= BDRV_O_PROTOCOL;

    /* Our block drivers take care to send flushes and respect unmap policy,
     * so we can enable both unconditionally on lower layers. */
    flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;

    /* Clear flags that only apply to the top layer */
810
    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
811 812 813 814

    return flags;
}

815 816 817 818 819 820 821 822 823 824
/*
 * Returns the flags that bs->backing_hd should get, based on the given flags
 * for the parent BDS
 */
static int bdrv_backing_flags(int flags)
{
    /* backing files always opened read-only */
    flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);

    /* snapshot=on is handled on the top layer */
825
    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
826 827 828 829

    return flags;
}

Kevin Wolf's avatar
Kevin Wolf committed
830 831 832 833 834 835 836 837
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
838
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf's avatar
Kevin Wolf committed
839 840 841 842

    /*
     * Snapshots should be writable.
     */
843
    if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf's avatar
Kevin Wolf committed
844 845 846 847 848 849
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

850 851 852
static void bdrv_assign_node_name(BlockDriverState *bs,
                                  const char *node_name,
                                  Error **errp)
853 854
{
    if (!node_name) {
855
        return;
856 857
    }

Kevin Wolf's avatar
Kevin Wolf committed
858
    /* Check for empty string or invalid characters */
859
    if (!id_wellformed(node_name)) {
Kevin Wolf's avatar
Kevin Wolf committed
860
        error_setg(errp, "Invalid node name");
861
        return;
862 863
    }

864
    /* takes care of avoiding namespaces collisions */
865
    if (blk_by_name(node_name)) {
866 867
        error_setg(errp, "node-name=%s is conflicting with a device id",
                   node_name);
868
        return;
869 870
    }

871 872 873
    /* takes care of avoiding duplicates node names */
    if (bdrv_find_node(node_name)) {
        error_setg(errp, "Duplicate node name");
874
        return;
875 876 877 878 879 880 881
    }

    /* copy node name into the bs and insert it into the graph list */
    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
}

882 883
/*
 * Common part for opening disk images and files
884 885
 *
 * Removes all processed options from *options.
886
 */
887
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
888
    QDict *options, int flags, BlockDriver *drv, Error **errp)
889 890
{
    int ret, open_flags;
Kevin Wolf's avatar
Kevin Wolf committed
891
    const char *filename;
892
    const char *node_name = NULL;
893
    Error *local_err = NULL;
894 895

    assert(drv != NULL);
896
    assert(bs->file == NULL);
897
    assert(options != NULL && bs->options != options);
898

899 900 901 902 903 904
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

905 906 907 908 909 910
    if (drv->bdrv_needs_filename && !filename) {
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
        return -EINVAL;
    }

911
    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
912

913
    node_name = qdict_get_try_str(options, "node-name");
914
    bdrv_assign_node_name(bs, node_name, &local_err);
915
    if (local_err) {
916 917
        error_propagate(errp, local_err);
        return -EINVAL;
918 919 920
    }
    qdict_del(options, "node-name");

921 922 923 924 925 926 927 928
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

929
    bs->open_flags = flags;
930
    bs->guest_block_size = 512;
931
    bs->request_alignment = 512;
932
    bs->zero_beyond_eof = true;
933 934
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);
935
    bs->growable = !!(flags & BDRV_O_PROTOCOL);
936 937

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
938 939 940 941 942
        error_setg(errp,
                   !bs->read_only && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                   drv->format_name);
943 944
        return -ENOTSUP;
    }
945

946
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
947 948 949 950 951 952 953
    if (flags & BDRV_O_COPY_ON_READ) {
        if (!bs->read_only) {
            bdrv_enable_copy_on_read(bs);
        } else {
            error_setg(errp, "Can't use copy-on-read on read-only device");
            return -EINVAL;
        }
954 955
    }

956 957 958 959 960
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
Max Reitz's avatar
Max Reitz committed
961
    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
962 963

    bs->drv = drv;
964
    bs->opaque = g_malloc0(drv->instance_size);
965

966
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
967

968 969
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
970
        assert(file == NULL);
971
        assert(!drv->bdrv_needs_filename || filename != NULL);
972
        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
973
    } else {
974
        if (file == NULL) {
975 976
            error_setg(errp, "Can't use '%s' as a block driver for the "
                       "protocol level", drv->format_name);
977 978 979
            ret = -EINVAL;
            goto free_and_fail;
        }
980
        bs->file = file;
981
        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
982 983
    }

984
    if (ret < 0) {
985
        if (local_err) {
986
            error_propagate(errp, local_err);
987 988
        } else if (bs->filename[0]) {
            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
989 990 991
        } else {
            error_setg_errno(errp, -ret, "Could not open image");
        }
992 993 994
        goto free_and_fail;
    }

995 996
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
997
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
998
        goto free_and_fail;
999
    }
1000

1001 1002 1003 1004 1005 1006 1007
    bdrv_refresh_limits(bs,