block.c 124 KB
Newer Older
bellard's avatar
bellard committed
1
2
/*
 * QEMU System Emulator block driver
3
 *
bellard's avatar
bellard committed
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
bellard's avatar
bellard committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
pbrook's avatar
pbrook committed
25
#include "qemu-common.h"
26
#include "trace.h"
27
#include "monitor/monitor.h"
28
29
#include "block/block_int.h"
#include "block/blockjob.h"
30
#include "qemu/module.h"
31
#include "qapi/qmp/qjson.h"
32
#include "sysemu/sysemu.h"
33
#include "qemu/notify.h"
34
#include "block/coroutine.h"
Luiz Capitulino's avatar
Luiz Capitulino committed
35
#include "qmp-commands.h"
36
#include "qemu/timer.h"
bellard's avatar
bellard committed
37

Juan Quintela's avatar
Juan Quintela committed
38
#ifdef CONFIG_BSD
bellard's avatar
bellard committed
39
40
41
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
Blue Swirl's avatar
Blue Swirl committed
42
#include <sys/queue.h>
43
#ifndef __DragonFly__
bellard's avatar
bellard committed
44
45
#include <sys/disk.h>
#endif
46
#endif
bellard's avatar
bellard committed
47

48
49
50
51
#ifdef _WIN32
#include <windows.h>
#endif

52
53
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

54
55
typedef enum {
    BDRV_REQ_COPY_ON_READ = 0x1,
56
    BDRV_REQ_ZERO_WRITE   = 0x2,
57
58
} BdrvRequestFlags;

59
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60
61
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62
        BlockDriverCompletionFunc *cb, void *opaque);
63
64
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65
        BlockDriverCompletionFunc *cb, void *opaque);
66
67
68
69
70
71
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov);
72
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73
74
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
75
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76
77
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
78
79
80
81
82
83
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                               int64_t sector_num,
                                               QEMUIOVector *qiov,
                                               int nb_sectors,
                                               BlockDriverCompletionFunc *cb,
                                               void *opaque,
84
                                               bool is_write);
85
static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf's avatar
Kevin Wolf committed
86
87
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors);
bellard's avatar
bellard committed
88

89
90
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
    QTAILQ_HEAD_INITIALIZER(bdrv_states);
91

92
93
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellard's avatar
bellard committed
94

95
96
97
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

118
/* throttling disk I/O limits */
119
120
void bdrv_set_io_limits(BlockDriverState *bs,
                        ThrottleConfig *cfg)
121
{
122
    int i;
123

124
    throttle_config(&bs->throttle_state, cfg);
125

126
127
    for (i = 0; i < 2; i++) {
        qemu_co_enter_next(&bs->throttled_reqs[i]);
128
    }
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
}

/* this function drain all the throttled IOs */
static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
{
    bool drained = false;
    bool enabled = bs->io_limits_enabled;
    int i;

    bs->io_limits_enabled = false;

    for (i = 0; i < 2; i++) {
        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
            drained = true;
        }
    }

    bs->io_limits_enabled = enabled;
147

148
    return drained;
149
150
}

151
void bdrv_io_limits_disable(BlockDriverState *bs)
152
{
153
    bs->io_limits_enabled = false;
154

155
156
157
    bdrv_start_throttled_reqs(bs);

    throttle_destroy(&bs->throttle_state);
158
159
}

160
static void bdrv_throttle_read_timer_cb(void *opaque)
161
{
162
163
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[0]);
164
165
}

166
static void bdrv_throttle_write_timer_cb(void *opaque)
167
{
168
169
    BlockDriverState *bs = opaque;
    qemu_co_enter_next(&bs->throttled_reqs[1]);
170
171
}

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs)
{
    assert(!bs->io_limits_enabled);
    throttle_init(&bs->throttle_state,
                  QEMU_CLOCK_VIRTUAL,
                  bdrv_throttle_read_timer_cb,
                  bdrv_throttle_write_timer_cb,
                  bs);
    bs->io_limits_enabled = true;
}

/* This function makes an IO wait if needed
 *
 * @nb_sectors: the number of sectors of the IO
 * @is_write:   is the IO a write
 */
189
static void bdrv_io_limits_intercept(BlockDriverState *bs,
190
191
                                     int nb_sectors,
                                     bool is_write)
192
{
193
194
    /* does this io must wait */
    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
195

196
197
198
199
    /* if must wait or any request of this type throttled queue the IO */
    if (must_wait ||
        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
200
201
    }

202
203
204
205
    /* the IO will be executed, do the accounting */
    throttle_account(&bs->throttle_state,
                     is_write,
                     nb_sectors * BDRV_SECTOR_SIZE);
206

207
208
209
    /* if the next request must wait -> do nothing */
    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
        return;
210
211
    }

212
213
    /* else queue next request for execution */
    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
214
215
}

216
217
218
/* check if the path starts with "<protocol>:" */
static int path_has_protocol(const char *path)
{
219
220
    const char *p;

221
222
223
224
225
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
226
227
228
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
229
230
#endif

231
    return *p == ':';
232
233
}

bellard's avatar
bellard committed
234
int path_is_absolute(const char *path)
235
{
bellard's avatar
bellard committed
236
237
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
238
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard's avatar
bellard committed
239
        return 1;
240
241
    }
    return (*path == '/' || *path == '\\');
242
#else
243
    return (*path == '/');
244
#endif
245
246
}

bellard's avatar
bellard committed
247
248
249
250
251
252
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
253
{
bellard's avatar
bellard committed
254
255
256
257
258
259
260
261
262
263
264
265
266
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
267
268
269
270
271
272
273
274
275
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
bellard's avatar
bellard committed
276
277
278
279
280
281
282
283
284
285
286
287
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
288
289
290
    }
}

291
292
293
294
295
296
297
298
299
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
{
    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
        pstrcpy(dest, sz, bs->backing_file);
    } else {
        path_combine(dest, sz, bs->filename, bs->backing_file);
    }
}

300
void bdrv_register(BlockDriver *bdrv)
bellard's avatar
bellard committed
301
{
302
303
    /* Block drivers without coroutine functions need emulation */
    if (!bdrv->bdrv_co_readv) {
304
305
306
        bdrv->bdrv_co_readv = bdrv_co_readv_em;
        bdrv->bdrv_co_writev = bdrv_co_writev_em;

307
308
309
        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
         * the block driver lacks aio we need to emulate that too.
         */
310
311
312
313
314
        if (!bdrv->bdrv_aio_readv) {
            /* add AIO emulation layer */
            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
        }
bellard's avatar
bellard committed
315
    }
316

317
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellard's avatar
bellard committed
318
}
bellard's avatar
bellard committed
319
320
321
322

/* create a new block device (by default it is empty) */
BlockDriverState *bdrv_new(const char *device_name)
{
323
    BlockDriverState *bs;
bellard's avatar
bellard committed
324

325
    bs = g_malloc0(sizeof(BlockDriverState));
bellard's avatar
bellard committed
326
    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellard's avatar
bellard committed
327
    if (device_name[0] != '\0') {
328
        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellard's avatar
bellard committed
329
    }
330
    bdrv_iostatus_disable(bs);
Paolo Bonzini's avatar
Paolo Bonzini committed
331
    notifier_list_init(&bs->close_notifiers);
332
    notifier_with_return_list_init(&bs->before_write_notifiers);
333
334
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
Paolo Bonzini's avatar
Paolo Bonzini committed
335

bellard's avatar
bellard committed
336
337
338
    return bs;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
339
340
341
342
343
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

bellard's avatar
bellard committed
344
345
346
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
347
348
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
bellard's avatar
bellard committed
349
            return drv1;
350
        }
bellard's avatar
bellard committed
351
352
353
354
    }
    return NULL;
}

355
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
356
{
357
358
359
360
361
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
362
363
364
    };
    const char **p;

365
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
366
        return 1;               /* no whitelist, anything goes */
367
    }
368

369
    for (p = whitelist_rw; *p; p++) {
370
371
372
373
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
374
375
376
377
378
379
380
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
381
382
383
    return 0;
}

384
385
BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
                                          bool read_only)
386
387
{
    BlockDriver *drv = bdrv_find_format(format_name);
388
    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
389
390
}

391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
    QEMUOptionParameter *options;
    int ret;
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
    CreateCo *cco = opaque;
    assert(cco->drv);

    cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
}

406
407
int bdrv_create(BlockDriver *drv, const char* filename,
    QEMUOptionParameter *options)
bellard's avatar
bellard committed
408
{
409
410
411
412
413
414
415
416
417
418
419
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
        .options = options,
        .ret = NOT_DONE,
    };

    if (!drv->bdrv_create) {
420
421
        ret = -ENOTSUP;
        goto out;
422
423
424
425
426
427
428
429
430
431
432
433
434
435
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
            qemu_aio_wait();
        }
    }

    ret = cco.ret;
436

437
438
out:
    g_free(cco.filename);
439
    return ret;
bellard's avatar
bellard committed
440
441
}

442
443
444
445
int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
{
    BlockDriver *drv;

446
    drv = bdrv_find_protocol(filename, true);
447
    if (drv == NULL) {
448
        return -ENOENT;
449
450
451
452
453
    }

    return bdrv_create(drv, filename, options);
}

454
455
456
457
458
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
bellard's avatar
bellard committed
459
{
460
#ifdef _WIN32
461
    char temp_dir[MAX_PATH];
462
463
464
465
466
467
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
bellard's avatar
bellard committed
468
#else
bellard's avatar
bellard committed
469
    int fd;
470
    const char *tmpdir;
aurel32's avatar
aurel32 committed
471
472
473
    tmpdir = getenv("TMPDIR");
    if (!tmpdir)
        tmpdir = "/tmp";
474
475
476
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
bellard's avatar
bellard committed
477
    fd = mkstemp(filename);
478
479
480
481
482
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
483
484
485
        return -errno;
    }
    return 0;
bellard's avatar
bellard committed
486
#endif
487
}
bellard's avatar
bellard committed
488

489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

511
512
BlockDriver *bdrv_find_protocol(const char *filename,
                                bool allow_protocol_prefix)
bellard's avatar
bellard committed
513
514
515
{
    BlockDriver *drv1;
    char protocol[128];
516
    int len;
bellard's avatar
bellard committed
517
    const char *p;
bellard's avatar
bellard committed
518

519
520
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

521
522
523
524
525
526
527
528
529
530
531
532
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

533
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
534
        return bdrv_find_format("file");
535
    }
536

537
538
    p = strchr(filename, ':');
    assert(p != NULL);
539
540
541
542
543
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
544
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
545
        if (drv1->protocol_name &&
546
            !strcmp(drv1->protocol_name, protocol)) {
bellard's avatar
bellard committed
547
            return drv1;
548
        }
bellard's avatar
bellard committed
549
550
551
552
    }
    return NULL;
}

553
554
static int find_image_format(BlockDriverState *bs, const char *filename,
                             BlockDriver **pdrv)
555
{
556
    int score, score_max;
557
558
    BlockDriver *drv1, *drv;
    uint8_t buf[2048];
559
    int ret = 0;
560

561
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
562
    if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
563
564
565
566
567
568
        drv = bdrv_find_format("raw");
        if (!drv) {
            ret = -ENOENT;
        }
        *pdrv = drv;
        return ret;
569
    }
570

bellard's avatar
bellard committed
571
572
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
573
574
        *pdrv = NULL;
        return ret;
bellard's avatar
bellard committed
575
576
    }

bellard's avatar
bellard committed
577
    score_max = 0;
578
    drv = NULL;
579
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard's avatar
bellard committed
580
581
582
583
584
585
        if (drv1->bdrv_probe) {
            score = drv1->bdrv_probe(buf, ret, filename);
            if (score > score_max) {
                score_max = score;
                drv = drv1;
            }
bellard's avatar
bellard committed
586
        }
bellard's avatar
bellard committed
587
    }
588
589
590
591
592
    if (!drv) {
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
bellard's avatar
bellard committed
593
594
}

595
596
597
598
599
600
601
/**
 * Set the current 'total_sectors' value
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

602
603
604
605
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bs->sg)
        return 0;

606
607
608
609
610
611
612
613
614
615
616
617
618
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
        hint = length >> BDRV_SECTOR_BITS;
    }

    bs->total_sectors = hint;
    return 0;
}

Paolo Bonzini's avatar
Paolo Bonzini committed
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

639
640
641
642
643
644
645
646
647
648
649
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
650
651
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
652
653
654
655
656
657
658
659
660
661
662
663
664
665
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
/**
 * The copy-on-read flag is actually a reference count so multiple users may
 * use the feature without worrying about clobbering its previous state.
 * Copy-on-read stays enabled until all users have called to disable it.
 */
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
    bs->copy_on_read++;
}

void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
    assert(bs->copy_on_read > 0);
    bs->copy_on_read--;
}

Kevin Wolf's avatar
Kevin Wolf committed
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

    /*
     * Snapshots should be writable.
     */
    if (bs->is_temporary) {
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

702
703
/*
 * Common part for opening disk images and files
704
705
 *
 * Removes all processed options from *options.
706
 */
707
static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Kevin Wolf's avatar
Kevin Wolf committed
708
    QDict *options, int flags, BlockDriver *drv)
709
710
{
    int ret, open_flags;
Kevin Wolf's avatar
Kevin Wolf committed
711
    const char *filename;
712
713

    assert(drv != NULL);
714
    assert(bs->file == NULL);
715
    assert(options != NULL && bs->options != options);
716

717
718
719
720
721
722
723
    if (file != NULL) {
        filename = file->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
724

725
726
727
728
729
730
731
732
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
    if (file != NULL && drv->bdrv_file_open) {
        bdrv_swap(file, bs);
        return 0;
    }

733
734
    bs->open_flags = flags;
    bs->buffer_alignment = 512;
735
    bs->zero_beyond_eof = true;
736
737
738
739
740
741
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
        return -ENOTSUP;
    }
742

743
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
744
    if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
745
746
747
        bdrv_enable_copy_on_read(bs);
    }

748
749
750
751
752
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
753
754

    bs->drv = drv;
755
    bs->opaque = g_malloc0(drv->instance_size);
756

757
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
758

759
760
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
761
762
        assert(file == NULL);
        assert(drv->bdrv_parse_filename || filename != NULL);
763
        ret = drv->bdrv_file_open(bs, options, open_flags);
764
    } else {
765
766
767
768
769
770
771
        if (file == NULL) {
            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
                          "block driver for the protocol level",
                          drv->format_name);
            ret = -EINVAL;
            goto free_and_fail;
        }
772
        bs->file = file;
773
        ret = drv->bdrv_open(bs, options, open_flags);
774
775
    }

776
777
778
779
    if (ret < 0) {
        goto free_and_fail;
    }

780
781
782
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        goto free_and_fail;
783
    }
784

785
786
#ifndef _WIN32
    if (bs->is_temporary) {
787
        assert(filename != NULL);
788
789
790
791
792
793
        unlink(filename);
    }
#endif
    return 0;

free_and_fail:
794
    bs->file = NULL;
795
    g_free(bs->opaque);
796
797
798
799
800
    bs->opaque = NULL;
    bs->drv = NULL;
    return ret;
}

Kevin Wolf's avatar
Kevin Wolf committed
801
802
/*
 * Opens a file using a protocol (file, host_device, nbd, ...)
803
804
805
806
807
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
Kevin Wolf's avatar
Kevin Wolf committed
808
 */
809
810
int bdrv_file_open(BlockDriverState **pbs, const char *filename,
                   QDict *options, int flags)
bellard's avatar
bellard committed
811
{
bellard's avatar
bellard committed
812
    BlockDriverState *bs;
813
    BlockDriver *drv;
814
    const char *drvname;
815
    bool allow_protocol_prefix = false;
bellard's avatar
bellard committed
816
817
    int ret;

818
819
820
821
822
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

bellard's avatar
bellard committed
823
    bs = bdrv_new("");
824
825
826
    bs->options = options;
    options = qdict_clone_shallow(options);

Kevin Wolf's avatar
Kevin Wolf committed
827
828
829
830
831
    /* Fetch the file name from the options QDict if necessary */
    if (!filename) {
        filename = qdict_get_try_str(options, "filename");
    } else if (filename && !qdict_haskey(options, "filename")) {
        qdict_put(options, "filename", qstring_from_str(filename));
832
        allow_protocol_prefix = true;
Kevin Wolf's avatar
Kevin Wolf committed
833
834
835
836
837
838
839
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
                      "'filename' options at the same time");
        ret = -EINVAL;
        goto fail;
    }

840
841
842
    /* Find the right block driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
843
        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
844
845
        qdict_del(options, "driver");
    } else if (filename) {
846
847
848
849
        drv = bdrv_find_protocol(filename, allow_protocol_prefix);
        if (!drv) {
            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
        }
850
851
852
853
854
855
856
857
858
859
860
861
862
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "Must specify either driver or file");
        drv = NULL;
    }

    if (!drv) {
        ret = -ENOENT;
        goto fail;
    }

    /* Parse the filename and open it */
    if (drv->bdrv_parse_filename && filename) {
863
864
865
866
867
868
869
870
        Error *local_err = NULL;
        drv->bdrv_parse_filename(filename, options, &local_err);
        if (error_is_set(&local_err)) {
            qerror_report_err(local_err);
            error_free(local_err);
            ret = -EINVAL;
            goto fail;
        }
871
        qdict_del(options, "filename");
872
873
874
875
876
877
    } else if (!drv->bdrv_parse_filename && !filename) {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "The '%s' block driver requires a file name",
                      drv->format_name);
        ret = -EINVAL;
        goto fail;
878
879
    }

Kevin Wolf's avatar
Kevin Wolf committed
880
    ret = bdrv_open_common(bs, NULL, options, flags, drv);
bellard's avatar
bellard committed
881
    if (ret < 0) {
882
883
884
885
886
887
888
889
890
891
892
        goto fail;
    }

    /* Check if any unknown options were used */
    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
                      "support the option '%s'",
                      drv->format_name, entry->key);
        ret = -EINVAL;
        goto fail;
893
    }
894
895
    QDECREF(options);

896
    bs->growable = 1;
bellard's avatar
bellard committed
897
898
    *pbs = bs;
    return 0;
899
900
901
902
903
904
905
906

fail:
    QDECREF(options);
    if (!bs->drv) {
        QDECREF(bs->options);
    }
    bdrv_delete(bs);
    return ret;
bellard's avatar
bellard committed
907
908
}

909
910
911
912
913
914
915
916
917
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
918
919
920
921
922
923
{
    char backing_filename[PATH_MAX];
    int back_flags, ret;
    BlockDriver *back_drv = NULL;

    if (bs->backing_hd != NULL) {
924
        QDECREF(options);
925
926
927
        return 0;
    }

928
929
930
931
932
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

933
    bs->open_flags &= ~BDRV_O_NO_BACKING;
934
935
936
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
937
        QDECREF(options);
938
939
940
941
942
943
944
945
946
947
948
949
950
951
        return 0;
    }

    bs->backing_hd = bdrv_new("");
    bdrv_get_full_backing_filename(bs, backing_filename,
                                   sizeof(backing_filename));

    if (bs->backing_format[0] != '\0') {
        back_drv = bdrv_find_format(bs->backing_format);
    }

    /* backing files always opened read-only */
    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);

952
953
    ret = bdrv_open(bs->backing_hd,
                    *backing_filename ? backing_filename : NULL, options,
954
                    back_flags, back_drv);
955
956
957
958
959
960
961
962
963
    if (ret < 0) {
        bdrv_delete(bs->backing_hd);
        bs->backing_hd = NULL;
        bs->open_flags |= BDRV_O_NO_BACKING;
        return ret;
    }
    return 0;
}

964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
static void extract_subqdict(QDict *src, QDict **dst, const char *start)
{
    const QDictEntry *entry, *next;
    const char *p;

    *dst = qdict_new();
    entry = qdict_first(src);

    while (entry != NULL) {
        next = qdict_next(src, entry);
        if (strstart(entry->key, start, &p)) {
            qobject_incref(entry->value);
            qdict_put_obj(*dst, p, entry->value);
            qdict_del(src, entry->key);
        }
        entry = next;
    }
}

Kevin Wolf's avatar
Kevin Wolf committed
983
984
/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
985
986
987
988
989
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Kevin Wolf's avatar
Kevin Wolf committed
990
 */
991
992
int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
              int flags, BlockDriver *drv)
bellard's avatar
bellard committed
993
{
Kevin Wolf's avatar
Kevin Wolf committed
994
    int ret;
995
996
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
    char tmp_filename[PATH_MAX + 1];
997
    BlockDriverState *file = NULL;
998
    QDict *file_options = NULL;
999
    const char *drvname;
bellard's avatar
bellard committed
1000

1001
1002
1003
1004
1005
1006
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

    bs->options = options;
1007
    options = qdict_clone_shallow(options);
1008
1009

    /* For snapshot=on, create a temporary qcow2 overlay */
bellard's avatar
bellard committed
1010
    if (flags & BDRV_O_SNAPSHOT) {
bellard's avatar
bellard committed
1011
1012
        BlockDriverState *bs1;
        int64_t total_size;
Kevin Wolf's avatar
Kevin Wolf committed
1013
        BlockDriver *bdrv_qcow2;
1014
        QEMUOptionParameter *create_options;
Kevin Wolf's avatar
Kevin Wolf committed
1015
        char backing_filename[PATH_MAX];
1016

1017
1018
1019
1020
1021
1022
1023
        if (qdict_size(options) != 0) {
            error_report("Can't use snapshot=on with driver-specific options");
            ret = -EINVAL;
            goto fail;
        }
        assert(filename != NULL);

bellard's avatar
bellard committed
1024
1025
        /* if snapshot, we create a temporary backing file and open it
           instead of opening 'filename' directly */
1026

bellard's avatar
bellard committed
1027
1028
        /* if there is a backing file, use it */
        bs1 = bdrv_new("");
1029
        ret = bdrv_open(bs1, filename, NULL, 0, drv);
1030
        if (ret < 0) {
bellard's avatar
bellard committed
1031
            bdrv_delete(bs1);
1032
            goto fail;
bellard's avatar
bellard committed
1033
        }
1034
        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori's avatar
aliguori committed
1035

bellard's avatar
bellard committed
1036
        bdrv_delete(bs1);
1037

1038
1039
        ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
        if (ret < 0) {
1040
            goto fail;
1041
        }
aliguori's avatar
aliguori committed
1042
1043

        /* Real path is meaningless for protocols */
1044
        if (path_has_protocol(filename)) {
aliguori's avatar
aliguori committed
1045
1046
            snprintf(backing_filename, sizeof(backing_filename),
                     "%s", filename);
1047
1048
1049
1050
        } else if (!realpath(filename, backing_filename)) {
            ret = -errno;
            goto fail;
        }
aliguori's avatar
aliguori committed
1051

Kevin Wolf's avatar
Kevin Wolf committed
1052
        bdrv_qcow2 = bdrv_find_format("qcow2");
1053
1054
        create_options = parse_option_parameters("", bdrv_qcow2->create_options,
                                                 NULL);
Kevin Wolf's avatar
Kevin Wolf committed
1055

1056
1057
1058
        set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
        set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
                             backing_filename);
Kevin Wolf's avatar
Kevin Wolf committed
1059
        if (drv) {
1060
            set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
Kevin Wolf's avatar
Kevin Wolf committed
1061
1062
1063
                drv->format_name);
        }

1064
1065
        ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
        free_option_parameters(create_options);
1066
        if (ret < 0) {
1067
            goto fail;
bellard's avatar
bellard committed
1068
        }
Kevin Wolf's avatar
Kevin Wolf committed
1069

bellard's avatar
bellard committed
1070
        filename = tmp_filename;
Kevin Wolf's avatar
Kevin Wolf committed
1071
        drv = bdrv_qcow2;
bellard's avatar
bellard committed
1072
1073
        bs->is_temporary = 1;
    }
bellard's avatar
bellard committed
1074

1075
1076
1077
1078
1079
    /* Open image file without format layer */
    if (flags & BDRV_O_RDWR) {
        flags |= BDRV_O_ALLOW_RDWR;
    }

1080
1081
1082
    extract_subqdict(options, &file_options, "file.");

    ret = bdrv_file_open(&file, filename, file_options,
1083
                         bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
1084
    if (ret < 0) {
1085
        goto fail;
1086
1087
    }

Kevin Wolf's avatar
Kevin Wolf committed
1088
    /* Find the right image format driver */
1089
1090
1091
1092
1093
1094
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
        qdict_del(options, "driver");
    }

1095
    if (!drv) {
1096
        ret = find_image_format(file, filename, &drv);
1097
    }
1098

1099
1100
    if (!drv) {
        goto unlink_and_fail;
bellard's avatar
bellard committed
1101
    }
Kevin Wolf's avatar
Kevin Wolf committed
1102
1103

    /* Open the image */
Kevin Wolf's avatar
Kevin Wolf committed
1104
    ret = bdrv_open_common(bs, file, options, flags, drv);
Kevin Wolf's avatar
Kevin Wolf committed
1105
    if (ret < 0) {
1106
1107
1108
        goto unlink_and_fail;
    }

1109
1110
1111
1112
1113
    if (bs->file != file) {
        bdrv_delete(file);
        file = NULL;
    }

Kevin Wolf's avatar
Kevin Wolf committed
1114
    /* If there is a backing file, use it */
1115
    if ((flags & BDRV_O_NO_BACKING) == 0) {
1116
1117
1118
1119
        QDict *backing_options;

        extract_subqdict(options, &backing_options, "backing.");
        ret = bdrv_open_backing_file(bs, backing_options);
Kevin Wolf's avatar
Kevin Wolf committed
1120
        if (ret < 0) {
1121
            goto close_and_fail;
Kevin Wolf's avatar
Kevin Wolf committed
1122
1123
1124
        }
    }

1125
1126
11