Commit dc92b1f9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux

Pull virtio changes from Rusty Russell:
 "New workflow: same git trees pulled by linux-next get sent straight to
  Linus.  Git is awkward at shuffling patches compared with quilt or mq,
  but that doesn't happen often once things get into my -next branch."

* 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (24 commits)
  lguest: fix occasional crash in example launcher.
  virtio-blk: Disable callback in virtblk_done()
  virtio_mmio: Don't attempt to create empty virtqueues
  virtio_mmio: fix off by one error allocating queue
  drivers/virtio/virtio_pci.c: fix error return code
  virtio: don't crash when device is buggy
  virtio: remove CONFIG_VIRTIO_RING
  virtio: add help to CONFIG_VIRTIO option.
  virtio: support reserved vqs
  virtio: introduce an API to set affinity for a virtqueue
  virtio-ring: move queue_index to vring_virtqueue
  virtio_balloon: not EXPERIMENTAL any more.
  virtio-balloon: dependency fix
  virtio-blk: fix NULL checking in virtblk_alloc_req()
  virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path
  virtio-blk: Add bio-based IO path for virtio-blk
  virtio: console: fix error handling in init() function
  tools: Fix pthread flag for Makefile of trace-agent used by virtio-trace
  tools: Add guest trace agent as a user tool
  virtio/console: Allocate scatterlist according to the current pipe size
  ...
parents 5e090ed7 ca16f580
......@@ -656,7 +656,6 @@ config S390_GUEST
depends on 64BIT && EXPERIMENTAL
select VIRTUALIZATION
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
help
Enabling this option adds support for virtio based paravirtual device
......
......@@ -4,7 +4,6 @@ config LGUEST_GUEST
depends on X86_32
select VIRTUALIZATION
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
help
Lguest is a tiny in-kernel hypervisor. Selecting this will
......
......@@ -14,6 +14,9 @@
#define PART_BITS 4
static bool use_bio;
module_param(use_bio, bool, S_IRUGO);
static int major;
static DEFINE_IDA(vd_index_ida);
......@@ -23,6 +26,7 @@ struct virtio_blk
{
struct virtio_device *vdev;
struct virtqueue *vq;
wait_queue_head_t queue_wait;
/* The disk structure for the kernel. */
struct gendisk *disk;
......@@ -51,53 +55,244 @@ struct virtio_blk
struct virtblk_req
{
struct request *req;
struct bio *bio;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
struct work_struct work;
struct virtio_blk *vblk;
int flags;
u8 status;
struct scatterlist sg[];
};
enum {
VBLK_IS_FLUSH = 1,
VBLK_REQ_FLUSH = 2,
VBLK_REQ_DATA = 4,
VBLK_REQ_FUA = 8,
};
static void blk_done(struct virtqueue *vq)
static inline int virtblk_result(struct virtblk_req *vbr)
{
switch (vbr->status) {
case VIRTIO_BLK_S_OK:
return 0;
case VIRTIO_BLK_S_UNSUPP:
return -ENOTTY;
default:
return -EIO;
}
}
static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
gfp_t gfp_mask)
{
struct virtio_blk *vblk = vq->vdev->priv;
struct virtblk_req *vbr;
unsigned int len;
unsigned long flags;
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
int error;
vbr = mempool_alloc(vblk->pool, gfp_mask);
if (!vbr)
return NULL;
switch (vbr->status) {
case VIRTIO_BLK_S_OK:
error = 0;
break;
case VIRTIO_BLK_S_UNSUPP:
error = -ENOTTY;
break;
default:
error = -EIO;
vbr->vblk = vblk;
if (use_bio)
sg_init_table(vbr->sg, vblk->sg_elems);
return vbr;
}
static void virtblk_add_buf_wait(struct virtio_blk *vblk,
struct virtblk_req *vbr,
unsigned long out,
unsigned long in)
{
DEFINE_WAIT(wait);
for (;;) {
prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
TASK_UNINTERRUPTIBLE);
spin_lock_irq(vblk->disk->queue->queue_lock);
if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
GFP_ATOMIC) < 0) {
spin_unlock_irq(vblk->disk->queue->queue_lock);
io_schedule();
} else {
virtqueue_kick(vblk->vq);
spin_unlock_irq(vblk->disk->queue->queue_lock);
break;
}
switch (vbr->req->cmd_type) {
case REQ_TYPE_BLOCK_PC:
vbr->req->resid_len = vbr->in_hdr.residual;
vbr->req->sense_len = vbr->in_hdr.sense_len;
vbr->req->errors = vbr->in_hdr.errors;
break;
case REQ_TYPE_SPECIAL:
vbr->req->errors = (error != 0);
break;
default:
break;
}
finish_wait(&vblk->queue_wait, &wait);
}
static inline void virtblk_add_req(struct virtblk_req *vbr,
unsigned int out, unsigned int in)
{
struct virtio_blk *vblk = vbr->vblk;
spin_lock_irq(vblk->disk->queue->queue_lock);
if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
GFP_ATOMIC) < 0)) {
spin_unlock_irq(vblk->disk->queue->queue_lock);
virtblk_add_buf_wait(vblk, vbr, out, in);
return;
}
virtqueue_kick(vblk->vq);
spin_unlock_irq(vblk->disk->queue->queue_lock);
}
static int virtblk_bio_send_flush(struct virtblk_req *vbr)
{
unsigned int out = 0, in = 0;
vbr->flags |= VBLK_IS_FLUSH;
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = 0;
sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
virtblk_add_req(vbr, out, in);
return 0;
}
static int virtblk_bio_send_data(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
unsigned int num, out = 0, in = 0;
struct bio *bio = vbr->bio;
vbr->flags &= ~VBLK_IS_FLUSH;
vbr->out_hdr.type = 0;
vbr->out_hdr.sector = bio->bi_sector;
vbr->out_hdr.ioprio = bio_prio(bio);
sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
sizeof(vbr->status));
if (num) {
if (bio->bi_rw & REQ_WRITE) {
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
out += num;
} else {
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
in += num;
}
}
virtblk_add_req(vbr, out, in);
return 0;
}
static void virtblk_bio_send_data_work(struct work_struct *work)
{
struct virtblk_req *vbr;
vbr = container_of(work, struct virtblk_req, work);
virtblk_bio_send_data(vbr);
}
static void virtblk_bio_send_flush_work(struct work_struct *work)
{
struct virtblk_req *vbr;
vbr = container_of(work, struct virtblk_req, work);
virtblk_bio_send_flush(vbr);
}
static inline void virtblk_request_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
struct request *req = vbr->req;
int error = virtblk_result(vbr);
if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
req->resid_len = vbr->in_hdr.residual;
req->sense_len = vbr->in_hdr.sense_len;
req->errors = vbr->in_hdr.errors;
} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
req->errors = (error != 0);
}
__blk_end_request_all(req, error);
mempool_free(vbr, vblk->pool);
}
static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
if (vbr->flags & VBLK_REQ_DATA) {
/* Send out the actual write data */
INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
queue_work(virtblk_wq, &vbr->work);
} else {
bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
}
static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
__blk_end_request_all(vbr->req, error);
if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
/* Send out a flush before end the bio */
vbr->flags &= ~VBLK_REQ_DATA;
INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
queue_work(virtblk_wq, &vbr->work);
} else {
bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
}
static inline void virtblk_bio_done(struct virtblk_req *vbr)
{
if (unlikely(vbr->flags & VBLK_IS_FLUSH))
virtblk_bio_flush_done(vbr);
else
virtblk_bio_data_done(vbr);
}
static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
bool bio_done = false, req_done = false;
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
do {
virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
if (vbr->bio) {
virtblk_bio_done(vbr);
bio_done = true;
} else {
virtblk_request_done(vbr);
req_done = true;
}
}
} while (!virtqueue_enable_cb(vq));
/* In case queue is stopped waiting for more buffers. */
blk_start_queue(vblk->disk->queue);
if (req_done)
blk_start_queue(vblk->disk->queue);
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
if (bio_done)
wake_up(&vblk->queue_wait);
}
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
......@@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
unsigned long num, out = 0, in = 0;
struct virtblk_req *vbr;
vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
if (!vbr)
/* When another request finishes we'll try again. */
return false;
vbr->req = req;
vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
......@@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
}
}
if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
GFP_ATOMIC) < 0) {
mempool_free(vbr, vblk->pool);
return false;
}
......@@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
return true;
}
static void do_virtblk_request(struct request_queue *q)
static void virtblk_request(struct request_queue *q)
{
struct virtio_blk *vblk = q->queuedata;
struct request *req;
......@@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q)
virtqueue_kick(vblk->vq);
}
static void virtblk_make_request(struct request_queue *q, struct bio *bio)
{
struct virtio_blk *vblk = q->queuedata;
struct virtblk_req *vbr;
BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
vbr = virtblk_alloc_req(vblk, GFP_NOIO);
if (!vbr) {
bio_endio(bio, -ENOMEM);
return;
}
vbr->bio = bio;
vbr->flags = 0;
if (bio->bi_rw & REQ_FLUSH)
vbr->flags |= VBLK_REQ_FLUSH;
if (bio->bi_rw & REQ_FUA)
vbr->flags |= VBLK_REQ_FUA;
if (bio->bi_size)
vbr->flags |= VBLK_REQ_DATA;
if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
virtblk_bio_send_flush(vbr);
else
virtblk_bio_send_data(vbr);
}
/* return id (s/n) string for *disk to *id_str
*/
static int virtblk_get_id(struct gendisk *disk, char *id_str)
......@@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk)
int err = 0;
/* We expect one virtqueue, for output. */
vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
if (IS_ERR(vblk->vq))
err = PTR_ERR(vblk->vq);
......@@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
struct virtio_blk *vblk;
struct request_queue *q;
int err, index;
int pool_size;
u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
......@@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock);
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
vblk->config_enable = true;
......@@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_free_vblk;
vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
pool_size = sizeof(struct virtblk_req);
if (use_bio)
pool_size += sizeof(struct scatterlist) * sg_elems;
vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
if (!vblk->pool) {
err = -ENOMEM;
goto out_free_vq;
......@@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_mempool;
}
q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
}
if (use_bio)
blk_queue_make_request(q, virtblk_make_request);
q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
......@@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
add_disk(vblk->disk);
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
if (err)
......
......@@ -24,6 +24,8 @@
#include <linux/err.h>
#include <linux/freezer.h>
#include <linux/fs.h>
#include <linux/splice.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/poll.h>
......@@ -474,26 +476,53 @@ static ssize_t send_control_msg(struct port *port, unsigned int event,
return 0;
}
struct buffer_token {
union {
void *buf;
struct scatterlist *sg;
} u;
/* If sgpages == 0 then buf is used, else sg is used */
unsigned int sgpages;
};
static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages)
{
int i;
struct page *page;
for (i = 0; i < nrpages; i++) {
page = sg_page(&sg[i]);
if (!page)
break;
put_page(page);
}
kfree(sg);
}
/* Callers must take the port->outvq_lock */
static void reclaim_consumed_buffers(struct port *port)
{
void *buf;
struct buffer_token *tok;
unsigned int len;
if (!port->portdev) {
/* Device has been unplugged. vqs are already gone. */
return;
}
while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
kfree(buf);
while ((tok = virtqueue_get_buf(port->out_vq, &len))) {
if (tok->sgpages)
reclaim_sg_pages(tok->u.sg, tok->sgpages);
else
kfree(tok->u.buf);
kfree(tok);
port->outvq_full = false;
}
}
static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
bool nonblock)
static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
int nents, size_t in_count,
struct buffer_token *tok, bool nonblock)
{
struct scatterlist sg[1];
struct virtqueue *out_vq;
ssize_t ret;
unsigned long flags;
......@@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
reclaim_consumed_buffers(port);
sg_init_one(sg, in_buf, in_count);
ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC);
/* Tell Host to go! */
virtqueue_kick(out_vq);
......@@ -544,6 +572,37 @@ done:
return in_count;
}
static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
bool nonblock)
{
struct scatterlist sg[1];
struct buffer_token *tok;
tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
if (!tok)
return -ENOMEM;
tok->sgpages = 0;
tok->u.buf = in_buf;
sg_init_one(sg, in_buf, in_count);
return __send_to_port(port, sg, 1, in_count, tok, nonblock);
}
static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents,
size_t in_count, bool nonblock)
{
struct buffer_token *tok;
tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
if (!tok)
return -ENOMEM;
tok->sgpages = nents;
tok->u.sg = sg;
return __send_to_port(port, sg, nents, in_count, tok, nonblock);
}
/*
* Give out the data that's requested from the buffer that we have
* queued up.
......@@ -665,6 +724,26 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
return fill_readbuf(port, ubuf, count, true);
}
static int wait_port_writable(struct port *port, bool nonblock)
{
int ret;
if (will_write_block(port)) {
if (nonblock)
return -EAGAIN;
ret = wait_event_freezable(port->waitqueue,
!will_write_block(port));
if (ret < 0)
return ret;
}
/* Port got hot-unplugged. */
if (!port->guest_connected)
return -ENODEV;
return 0;
}
static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *offp)
{
......@@ -681,18 +760,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
nonblock = filp->f_flags & O_NONBLOCK;
if (will_write_block(port)) {
if (nonblock)
return -EAGAIN;
ret = wait_event_freezable(port->waitqueue,
!will_write_block(port));
if (ret < 0)
return ret;
}
/* Port got hot-unplugged. */
if (!port->guest_connected)
return -ENODEV;
ret = wait_port_writable(port, nonblock);
if (ret < 0)
return ret;
count = min((size_t)(32 * 1024), count);
......@@ -725,6 +795,93 @@ out:
return ret;
}
struct sg_list {
unsigned int n;
unsigned int size;
size_t len;
struct scatterlist *sg;