block.c 124 KB
Newer Older
bellard's avatar
bellard committed
1
2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor/monitor.h"
28
29
#include "block/block_int.h"
#include "block/blockjob.h"
30
#include "qemu/module.h"
31
#include "qapi/qmp/qjson.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
Luiz Capitulino's avatar
Luiz Capitulino committed
35
#include "qmp-commands.h"
36
#include "qemu/timer.h"
bellard's avatar
bellard committed
37

Juan Quintela's avatar
Juan Quintela committed
38
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
39
40
41
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
Blue Swirl's avatar
Blue Swirl committed
42
#include <sys/queue.h>
43
#ifndef __DragonFly__
bellard's avatar
bellard committed
44
45
#include <sys/disk.h>
#endif
46
#endif
bellard's avatar
bellard committed
47

48
49
50
51
#ifdef _WIN32
#include <windows.h>
#endif

52
53
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

54
55
typedef enum {
    BDRV_REQ_COPY_ON_READ = 0x1,
56
    BDRV_REQ_ZERO_WRITE   = 0x2,
57
58
} BdrvRequestFlags;

59
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60
61
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62
        BlockDriverCompletionFunc *cb, void *opaque);
63
64
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65
        BlockDriverCompletionFunc *cb, void *opaque);
66
67
68
69
70
71
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
72
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73
74
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
75
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76
77
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
78
79
80
81
82
83
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                               int64_t sector_num,
                                               QEMUIOVector *qiov,
                                               int nb_sectors,
                                               BlockDriverCompletionFunc *cb,
                                               void *opaque,
84
                                               bool is_write);
85
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
86
87
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors);
bellard's avatar
bellard committed
88

89
90
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
91

92
93
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
94

95
96
97
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

118
/* throttling disk I/O limits */
119
120
void bdrv_set_io_limits(BlockDriverState *bs,
                        ThrottleConfig *cfg)
121
{
122
    int i;
123

124
    throttle_config(&bs->throttle_state, cfg);
125

126
127
    for (i = 0; i < 2; i++) {
        qemu_co_enter_next(&bs->throttled_reqs[i]);
128
    }
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
}

/* this function drain all the throttled IOs */
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
{
    bool drained = false;
    bool enabled = bs->io_limits_enabled;
    int i;

    bs->io_limits_enabled = false;

    for (i = 0; i < 2; i++) {
        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
            drained = true;
        }
    }

    bs->io_limits_enabled = enabled;
147

148
    return drained;
149
150
}

151
void bdrv_io_limits_disable(BlockDriverState *bs)
152
{
153
    bs->io_limits_enabled = false;
154

155
156
157
    bdrv_start_throttled_reqs(bs);

    throttle_destroy(&bs->throttle_state);
158
159
}

160
static void bdrv_throttle_read_timer_cb(void *opaque)
161
{
162
163
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[0]);
164
165
}

166
static void bdrv_throttle_write_timer_cb(void *opaque)
167
{
168
169
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[1]);
170
171
}

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs)
{
    assert(!bs->io_limits_enabled);
    throttle_init(&bs->throttle_state,
                  QEMU_CLOCK_VIRTUAL,
                  bdrv_throttle_read_timer_cb,
                  bdrv_throttle_write_timer_cb,
                  bs);
    bs->io_limits_enabled = true;
}

/* This function makes an IO wait if needed
 *
 * @nb_sectors: the number of sectors of the IO
 * @is_write:   is the IO a write
 */
189
static void bdrv_io_limits_intercept(BlockDriverState *bs,
190
191
                                     int nb_sectors,
                                     bool is_write)
192
{
193
194
    /* does this io must wait */
    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
195

196
197
198
199
    /* if must wait or any request of this type throttled queue the IO */
    if (must_wait ||
        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
200
201
    }

202
203
204
205
    /* the IO will be executed, do the accounting */
    throttle_account(&bs->throttle_state,
                     is_write,
                     nb_sectors * BDRV_SECTOR_SIZE);
206

207
208
209
    /* if the next request must wait -> do nothing */
    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
        return;
210
211
    }

212
213
    /* else queue next request for execution */
    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
214
215
}

216
217
218
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
219
220
    const char *p;

221
222
223
224
225
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
226
227
228
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
229
230
#endif

231
    return *p == ':';
232
233
}

bellard's avatar
bellard committed
234
int path_is_absolute(const char *path)
235
{
bellard's avatar
bellard committed
236
237
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
238
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard's avatar
bellard committed
239
        return 1;
240
241
    }
    return (*path == '/' || *path == '\\');
242
#else
243
    return (*path == '/');
244
#endif
245
246
}

bellard's avatar
bellard committed
247
248
249
250
251
252
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
253
{
bellard's avatar
bellard committed
254
255
256
257
258
259
260
261
262
263
264
265
266
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
267
268
269
270
271
272
273
274
275
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
276
277
278
279
280
281
282
283
284
285
286
287
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
288
289
290
    }
}

291
292
293
294
295
296
297
298
299
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

300
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
301
{
302
303
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
304
305
306
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

307
308
309
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
310
311
312
313
314
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
315
    }
316

317
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
318
}
bellard's avatar
bellard committed
319
320
321
322

/* create a new block device (by default it is empty) */
BlockDriverState *bdrv_new(const char *device_name)
{
323
    BlockDriverState *bs;
bellard's avatar
bellard committed
324

325
    bs = g_malloc0(sizeof(BlockDriverState));
bellard's avatar
bellard committed
326
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellard's avatar
bellard committed
327
    if (device_name[0] != '\0') {
328
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellard's avatar
bellard committed
329
    }
330
    bdrv_iostatus_disable(bs);
Paolo Bonzini's avatar
Paolo Bonzini committed
331
    notifier_list_init(&bs->close_notifiers);
332
    notifier_with_return_list_init(&bs->before_write_notifiers);
333
334
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
335
    bs->refcnt = 1;
Paolo Bonzini's avatar
Paolo Bonzini committed
336

bellard's avatar
bellard committed
337
338
339
    return bs;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
340
341
342
343
344
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
345
346
347
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
348
349
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
350
            return drv1;
351
        }
bellard's avatar
bellard committed
352
353
354
355
    }
    return NULL;
}

356
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
357
{
358
359
360
361
362
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
363
364
365
    };
    const char **p;

366
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
367
        return 1;               /* no whitelist, anything goes */
368
    }
369

370
    for (p = whitelist_rw; *p; p++) {
371
372
373
374
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
375
376
377
378
379
380
381
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
382
383
384
    return 0;
}

385
386
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
387
388
{
    BlockDriver *drv = bdrv_find_format(format_name);
389
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
390
391
}

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
    QEMUOptionParameter *options;
    int ret;
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
    CreateCo *cco = opaque;
    assert(cco->drv);

    cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
}

407
408
int bdrv_create(BlockDriver *drv, const char* filename,
    QEMUOptionParameter *options)
bellard's avatar
bellard committed
409
{
410
411
412
413
414
415
416
417
418
419
420
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
        .options = options,
        .ret = NOT_DONE,
    };

    if (!drv->bdrv_create) {
421
422
        ret = -ENOTSUP;
        goto out;
423
424
425
426
427
428
429
430
431
432
433
434
435
436
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
            qemu_aio_wait();
        }
    }

    ret = cco.ret;
437

438
439
out:
    g_free(cco.filename);
440
    return ret;
bellard's avatar
bellard committed
441
442
}

443
444
445
446
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
{
    BlockDriver *drv;

447
    drv = bdrv_find_protocol(filename, true);
448
    if (drv == NULL) {
449
        return -ENOENT;
450
451
452
453
454
    }

    return bdrv_create(drv, filename, options);
}

455
456
457
458
459
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
460
{
461
#ifdef _WIN32
462
    char temp_dir[MAX_PATH];
463
464
465
466
467
468
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
469
#else
bellard's avatar
bellard committed
470
    int fd;
471
    const char *tmpdir;
aurel32's avatar
aurel32 committed
472
473
474
    tmpdir = getenv("TMPDIR");
    if (!tmpdir)
        tmpdir = "/tmp";
475
476
477
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
478
    fd = mkstemp(filename);
479
480
481
482
483
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
484
485
486
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
487
#endif
488
}
bellard's avatar
bellard committed
489

490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

512
513
BlockDriver *bdrv_find_protocol(const char *filename,
                                bool allow_protocol_prefix)
bellard's avatar
bellard committed
514
515
516
{
    BlockDriver *drv1;
    char protocol[128];
517
    int len;
bellard's avatar
bellard committed
518
    const char *p;
bellard's avatar
bellard committed
519

520
521
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

522
523
524
525
526
527
528
529
530
531
532
533
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

534
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
535
        return bdrv_find_format("file");
536
    }
537

538
539
    p = strchr(filename, ':');
    assert(p != NULL);
540
541
542
543
544
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
545
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
546
        if (drv1->protocol_name &&
547
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
548
            return drv1;
549
        }
bellard's avatar
bellard committed
550
551
552
553
    }
    return NULL;
}

554
555
static int find_image_format(BlockDriverState *bs, const char *filename,
                             BlockDriver **pdrv)
556
{
557
    int score, score_max;
558
559
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
560
    int ret = 0;
561

562
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
563
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
564
565
566
567
568
569
        drv = bdrv_find_format("raw");
        if (!drv) {
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
570
    }
571

bellard's avatar
bellard committed
572
573
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
574
575
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
576
577
    }

bellard's avatar
bellard committed
578
    score_max = 0;
579
    drv = NULL;
580
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
581
582
583
584
585
586
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
587
        }
bellard's avatar
bellard committed
588
    }
589
590
591
592
593
    if (!drv) {
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
594
595
}

596
597
598
599
600
601
602
/**
 * Set the current 'total_sectors' value
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

603
604
605
606
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

607
608
609
610
611
612
613
614
615
616
617
618
619
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
        hint = length >> BDRV_SECTOR_BITS;
    }

    bs->total_sectors = hint;
    return 0;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

640
641
642
643
644
645
646
647
648
649
650
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
651
652
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
653
654
655
656
657
658
659
660
661
662
663
664
665
666
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

Kevin Wolf's avatar
Kevin Wolf committed
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

    /*
     * Snapshots should be writable.
     */
    if (bs->is_temporary) {
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

703
704
/*
 * Common part for opening disk images and files
705
706
 *
 * Removes all processed options from *options.
707
 */
708
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Kevin Wolf's avatar
Kevin Wolf committed
709
    QDict *options, int flags, BlockDriver *drv)
710
711
{
    int ret, open_flags;
Kevin Wolf's avatar
Kevin Wolf committed
712
    const char *filename;
713
714

    assert(drv != NULL);
715
    assert(bs->file == NULL);
716
    assert(options != NULL && bs->options != options);
717

718
719
720
721
722
723
724
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
725

726
727
728
729
730
731
732
733
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

734
735
    bs->open_flags = flags;
    bs->buffer_alignment = 512;
736
    bs->zero_beyond_eof = true;
737
738
739
740
741
742
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
        return -ENOTSUP;
    }
743

744
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
745
    if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
746
747
748
        bdrv_enable_copy_on_read(bs);
    }

749
750
751
752
753
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
754
755

    bs->drv = drv;
756
    bs->opaque = g_malloc0(drv->instance_size);
757

758
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
759

760
761
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
762
763
        assert(file == NULL);
        assert(drv->bdrv_parse_filename || filename != NULL);
764
        ret = drv->bdrv_file_open(bs, options, open_flags);
765
    } else {
766
767
768
769
770
771
772
        if (file == NULL) {
            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
                          "block driver for the protocol level",
                          drv->format_name);
            ret = -EINVAL;
            goto free_and_fail;
        }
773
        bs->file = file;
774
        ret = drv->bdrv_open(bs, options, open_flags);
775
776
    }

777
778
779
780
    if (ret < 0) {
        goto free_and_fail;
    }

781
782
783
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        goto free_and_fail;
784
    }
785

786
787
#ifndef _WIN32
    if (bs->is_temporary) {
788
        assert(filename != NULL);
789
790
791
792
793
794
        unlink(filename);
    }
#endif
    return 0;

free_and_fail:
795
    bs->file = NULL;
796
    g_free(bs->opaque);
797
798
799
800
801
    bs->opaque = NULL;
    bs->drv = NULL;
    return ret;
}

Kevin Wolf's avatar
Kevin Wolf committed
802
803
/*
 * Opens a file using a protocol (file, host_device, nbd, ...)
804
805
806
807
808
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
Kevin Wolf's avatar
Kevin Wolf committed
809
 */
810
811
int bdrv_file_open(BlockDriverState **pbs, const char *filename,
                   QDict *options, int flags)
bellard's avatar
bellard committed
812
{
bellard's avatar
bellard committed
813
    BlockDriverState *bs;
814
    BlockDriver *drv;
815
    const char *drvname;
816
    bool allow_protocol_prefix = false;
bellard's avatar
bellard committed
817
818
    int ret;

819
820
821
822
823
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

bellard's avatar
bellard committed
824
    bs = bdrv_new("");
825
826
827
    bs->options = options;
    options = qdict_clone_shallow(options);

Kevin Wolf's avatar
Kevin Wolf committed
828
829
830
831
832
    /* Fetch the file name from the options QDict if necessary */
    if (!filename) {
        filename = qdict_get_try_str(options, "filename");
    } else if (filename && !qdict_haskey(options, "filename")) {
        qdict_put(options, "filename", qstring_from_str(filename));
833
        allow_protocol_prefix = true;
Kevin Wolf's avatar
Kevin Wolf committed
834
835
836
837
838
839
840
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
                      "'filename' options at the same time");
        ret = -EINVAL;
        goto fail;
    }

841
842
843
    /* Find the right block driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
844
        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
845
846
        qdict_del(options, "driver");
    } else if (filename) {
847
848
849
850
        drv = bdrv_find_protocol(filename, allow_protocol_prefix);
        if (!drv) {
            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
        }
851
852
853
854
855
856
857
858
859
860
861
862
863
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "Must specify either driver or file");
        drv = NULL;
    }

    if (!drv) {
        ret = -ENOENT;
        goto fail;
    }

    /* Parse the filename and open it */
    if (drv->bdrv_parse_filename && filename) {
864
865
866
867
868
869
870
871
        Error *local_err = NULL;
        drv->bdrv_parse_filename(filename, options, &local_err);
        if (error_is_set(&local_err)) {
            qerror_report_err(local_err);
            error_free(local_err);
            ret = -EINVAL;
            goto fail;
        }
872
        qdict_del(options, "filename");
873
874
875
876
877
878
    } else if (!drv->bdrv_parse_filename && !filename) {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "The '%s' block driver requires a file name",
                      drv->format_name);
        ret = -EINVAL;
        goto fail;
879
880
    }

Kevin Wolf's avatar
Kevin Wolf committed
881
    ret = bdrv_open_common(bs, NULL, options, flags, drv);
bellard's avatar
bellard committed
882
    if (ret < 0) {
883
884
885
886
887
888
889
890
891
892
893
        goto fail;
    }

    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
                      "support the option '%s'",
                      drv->format_name, entry->key);
        ret = -EINVAL;
        goto fail;
894
    }
895
896
    QDECREF(options);

897
    bs->growable = 1;
bellard's avatar
bellard committed
898
899
    *pbs = bs;
    return 0;
900
901
902
903
904
905

fail:
    QDECREF(options);
    if (!bs->drv) {
        QDECREF(bs->options);
    }
Fam Zheng's avatar
Fam Zheng committed
906
    bdrv_unref(bs);
907
    return ret;
bellard's avatar
bellard committed
908
909
}

910
911
912
913
914
915
916
917
918
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
919
920
921
922
923
924
{
    char backing_filename[PATH_MAX];
    int back_flags, ret;
    BlockDriver *back_drv = NULL;

    if (bs->backing_hd != NULL) {
925
        QDECREF(options);
926
927
928
        return 0;
    }

929
930
931
932
933
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

934
    bs->open_flags &= ~BDRV_O_NO_BACKING;
935
936
937
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
938
        QDECREF(options);
939
940
941
942
943
944
945
946
947
948
949
950
951
952
        return 0;
    }

    bs->backing_hd = bdrv_new("");
    bdrv_get_full_backing_filename(bs, backing_filename,
                                   sizeof(backing_filename));

    if (bs->backing_format[0] != '\0') {
        back_drv = bdrv_find_format(bs->backing_format);
    }

    /* backing files always opened read-only */
    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);

953
954
    ret = bdrv_open(bs->backing_hd,
                    *backing_filename ? backing_filename : NULL, options,
955
                    back_flags, back_drv);
956
    if (ret < 0) {
Fam Zheng's avatar
Fam Zheng committed
957
        bdrv_unref(bs->backing_hd);
958
959
960
961
962
963
964
        bs->backing_hd = NULL;
        bs->open_flags |= BDRV_O_NO_BACKING;
        return ret;
    }
    return 0;
}

965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
static void extract_subqdict(QDict *src, QDict **dst, const char *start)
{
    const QDictEntry *entry, *next;
    const char *p;

    *dst = qdict_new();
    entry = qdict_first(src);

    while (entry != NULL) {
        next = qdict_next(src, entry);
        if (strstart(entry->key, start, &p)) {
            qobject_incref(entry->value);
            qdict_put_obj(*dst, p, entry->value);
            qdict_del(src, entry->key);
        }
        entry = next;
    }
}

Kevin Wolf's avatar
Kevin Wolf committed
984
985
/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
986
987
988
989
990
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Kevin Wolf's avatar
Kevin Wolf committed
991
 */
992
993
int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
              int flags, BlockDriver *drv)
bellard's avatar
bellard committed
994
{
Kevin Wolf's avatar
Kevin Wolf committed
995
    int ret;
996
997
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char tmp_filename[PATH_MAX + 1];
998
    BlockDriverState *file = NULL;
999
    QDict *file_options = NULL;
1000
    const char *drvname;
bellard's avatar
bellard committed
1001

1002
1003
1004
1005
1006
1007
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

    bs->options = options;
1008
    options = qdict_clone_shallow(options);
1009
1010

    /* For snapshot=on, create a temporary qcow2 overlay */
bellard's avatar
bellard committed
1011
    if (flags & BDRV_O_SNAPSHOT) {
bellard's avatar
bellard committed
1012
1013
        BlockDriverState *bs1;
        int64_t total_size;
Kevin Wolf's avatar
Kevin Wolf committed
1014
        BlockDriver *bdrv_qcow2;
1015
        QEMUOptionParameter *create_options;
Kevin Wolf's avatar
Kevin Wolf committed
1016
        char backing_filename[PATH_MAX];
1017

1018
1019
1020
1021
1022
1023
1024
        if (qdict_size(options) != 0) {
            error_report("Can't use snapshot=on with driver-specific options");
            ret = -EINVAL;
            goto fail;
        }
        assert(filename != NULL);

bellard's avatar
bellard committed
1025
1026
        /* if snapshot, we create a temporary backing file and open it
           instead of opening 'filename' directly */
1027

bellard's avatar
bellard committed
1028
1029
        /* if there is a backing file, use it */
        bs1 = bdrv_new("");
1030
        ret = bdrv_open(bs1, filename, NULL, 0, drv);
1031
        if (ret < 0) {
Fam Zheng's avatar
Fam Zheng committed
1032
            bdrv_unref(bs1);
1033
            goto fail;
bellard's avatar
bellard committed
1034
        }
1035
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori's avatar
aliguori committed
1036

Fam Zheng's avatar
Fam Zheng committed
1037
        bdrv_unref(bs1);
1038

1039
1040
        ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
        if (ret < 0) {
1041
            goto fail;
1042
        }
aliguori's avatar
aliguori committed
1043
1044

        /* Real path is meaningless for protocols */
1045
        if (path_has_protocol(filename)) {
aliguori's avatar
aliguori committed
1046
1047
            snprintf(backing_filename, sizeof(backing_filename),
                     "%s", filename);
1048
1049
1050
1051
        } else if (!realpath(filename, backing_filename)) {
            ret = -errno;
            goto fail;
        }
aliguori's avatar
aliguori committed
1052

Kevin Wolf's avatar
Kevin Wolf committed
1053
        bdrv_qcow2 = bdrv_find_format("qcow2");
1054
1055
        create_options = parse_option_parameters("", bdrv_qcow2->create_options,
                                                 NULL);
Kevin Wolf's avatar
Kevin Wolf committed
1056

1057
1058
1059
        set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
        set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
                             backing_filename);
Kevin Wolf's avatar
Kevin Wolf committed
1060
        if (drv) {
1061
            set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
Kevin Wolf's avatar
Kevin Wolf committed
1062
1063
1064
                drv->format_name);
        }

1065
1066
        ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
        free_option_parameters(create_options);
1067
        if (ret < 0) {
1068
            goto fail;
bellard's avatar
bellard committed
1069
        }
Kevin Wolf's avatar
Kevin Wolf committed
1070

bellard's avatar
bellard committed
1071
        filename = tmp_filename;
Kevin Wolf's avatar
Kevin Wolf committed
1072
        drv = bdrv_qcow2;
bellard's avatar
bellard committed
1073
1074
        bs->is_temporary = 1;
    }
bellard's avatar
bellard committed
1075

1076
1077
1078
1079
1080
    /* Open image file without format layer */
    if (flags & BDRV_O_RDWR) {
        flags |= BDRV_O_ALLOW_RDWR;
    }

1081
1082
1083
    extract_subqdict(options, &file_options, "file.");

    ret = bdrv_file_open(&file, filename, file_options,
1084
                         bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
1085
    if (ret < 0) {
1086
        goto fail;
1087
1088
    }

Kevin Wolf's avatar
Kevin Wolf committed
1089
    /* Find the right image format driver */
1090
1091
1092
1093
1094
1095
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
        qdict_del(options, "driver");
    }

1096
    if (!drv) {
1097
        ret = find_image_format(file, filename, &drv);
1098
    }
1099

1100
1101
    if (!drv) {
        goto unlink_and_fail;
bellard's avatar
bellard committed
1102
    }
Kevin Wolf's avatar
Kevin Wolf committed
1103
1104

    /* Open the image */
Kevin Wolf's avatar
Kevin Wolf committed
1105
    ret = bdrv_open_common(bs, file, options, flags, drv);
Kevin Wolf's avatar
Kevin Wolf committed
1106
    if (ret < 0) {
1107
1108
1109
        goto unlink_and_fail;
    }

1110
    if (bs->file != file) {
Fam Zheng's avatar
Fam Zheng committed
1111
        bdrv_unref(file);
1112
1113
1114
        file = NULL;
    }

Kevin Wolf's avatar
Kevin Wolf committed
1115
    /* If there is a backing file, use it */
1116
    if ((flags & BDRV_O_NO_BACKING) == 0) {
1117
1118
1119
1120
        QDict *backing_options;

        extract_subqdict(options, &backing_options, "backing.");
        ret = bdrv_open_backing_file(bs, backing_options);
Kevin Wolf's avatar
Kevin Wolf committed
1121
        if (ret < 0) {
1122
            goto close_and_fail;
Kevin Wolf's avatar
Kevin Wolf committed
1123
1124
1125
        }
    }

1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
            "device '%s' doesn't support the option '%s'",
            drv->format_name, bs->device_name, entry->key);

        ret = -EINVAL;
        goto close_and_fail;
    }
    QDECREF(options);

Kevin Wolf's avatar
Kevin Wolf committed
1138
    if (!bdrv_key_required(bs)) {
1139
        bdrv_dev_change_media_cb(bs, true);
Kevin Wolf's avatar
Kevin Wolf committed
1140
1141
1142
1143
1144
    }

    return 0;

unlink_and_fail:
1145
    if (file != NULL) {
Fam Zheng's avatar
Fam Zheng committed
1146
        bdrv_unref(file);
1147
    }
Kevin Wolf's avatar
Kevin Wolf committed
1148
1149
1150
    if (bs->is_temporary