Commit 8c194f3b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfio-v4.1-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - VFIO platform bus driver support (Baptiste Reynal, Antonios Motakis,
   testing and review by Eric Auger)

 - Split VFIO irqfd support to separate module (Alex Williamson)

 - vfio-pci VGA arbiter client (Alex Williamson)

 - New vfio-pci.ids= module option (Alex Williamson)

 - vfio-pci D3 power state support for idle devices (Alex Williamson)

* tag 'vfio-v4.1-rc1' of git://github.com/awilliam/linux-vfio: (30 commits)
  vfio-pci: Fix use after free
  vfio-pci: Move idle devices to D3hot power state
  vfio-pci: Remove warning if try-reset fails
  vfio-pci: Allow PCI IDs to be specified as module options
  vfio-pci: Add VGA arbiter client
  vfio-pci: Add module option to disable VGA region access
  vgaarb: Stub vga_set_legacy_decoding()
  vfio: Split virqfd into a separate module for vfio bus drivers
  vfio: virqfd_lock can be static
  vfio: put off the allocation of "minor" in vfio_create_group
  vfio/platform: implement IRQ masking/unmasking via an eventfd
  vfio: initialize the virqfd workqueue in VFIO generic code
  vfio: move eventfd support code for VFIO_PCI to a separate file
  vfio: pass an opaque pointer on virqfd initialization
  vfio: add local lock for virqfd instead of depending on VFIO PCI
  vfio: virqfd: rename vfio_pci_virqfd_init and vfio_pci_virqfd_exit
  vfio: add a vfio_ prefix to virqfd_enable and virqfd_disable and export
  vfio/platform: support for level sensitive interrupts
  vfio/platform: trigger an interrupt via eventfd
  vfio/platform: initial interrupts support code
  ...
parents 07e492eb 5a0ff177
...@@ -13,6 +13,11 @@ config VFIO_SPAPR_EEH ...@@ -13,6 +13,11 @@ config VFIO_SPAPR_EEH
depends on EEH && VFIO_IOMMU_SPAPR_TCE depends on EEH && VFIO_IOMMU_SPAPR_TCE
default n default n
config VFIO_VIRQFD
tristate
depends on VFIO && EVENTFD
default n
menuconfig VFIO menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework" tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API depends on IOMMU_API
...@@ -27,3 +32,4 @@ menuconfig VFIO ...@@ -27,3 +32,4 @@ menuconfig VFIO
If you don't know what to do here, say N. If you don't know what to do here, say N.
source "drivers/vfio/pci/Kconfig" source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
vfio_virqfd-y := virqfd.o
obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO) += vfio.o
obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/ obj-$(CONFIG_VFIO_PCI) += pci/
obj-$(CONFIG_VFIO_PLATFORM) += platform/
config VFIO_PCI config VFIO_PCI
tristate "VFIO support for PCI devices" tristate "VFIO support for PCI devices"
depends on VFIO && PCI && EVENTFD depends on VFIO && PCI && EVENTFD
select VFIO_VIRQFD
help help
Support for the PCI VFIO bus driver. This is required to make Support for the PCI VFIO bus driver. This is required to make
use of PCI drivers using the VFIO framework. use of PCI drivers using the VFIO framework.
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
* Author: Tom Lyon, pugs@cisco.com * Author: Tom Lyon, pugs@cisco.com
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/device.h> #include <linux/device.h>
#include <linux/eventfd.h> #include <linux/eventfd.h>
#include <linux/file.h> #include <linux/file.h>
...@@ -25,6 +27,7 @@ ...@@ -25,6 +27,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/vgaarb.h>
#include "vfio_pci_private.h" #include "vfio_pci_private.h"
...@@ -32,13 +35,81 @@ ...@@ -32,13 +35,81 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO PCI - User Level meta-driver" #define DRIVER_DESC "VFIO PCI - User Level meta-driver"
static char ids[1024] __initdata;
module_param_string(ids, ids, sizeof(ids), 0);
MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
static bool nointxmask; static bool nointxmask;
module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nointxmask, MODULE_PARM_DESC(nointxmask,
"Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
#ifdef CONFIG_VFIO_PCI_VGA
static bool disable_vga;
module_param(disable_vga, bool, S_IRUGO);
MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci");
#endif
static bool disable_idle_d3;
module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(disable_idle_d3,
"Disable using the PCI D3 low power state for idle, unused devices");
static DEFINE_MUTEX(driver_lock); static DEFINE_MUTEX(driver_lock);
static inline bool vfio_vga_disabled(void)
{
#ifdef CONFIG_VFIO_PCI_VGA
return disable_vga;
#else
return true;
#endif
}
/*
* Our VGA arbiter participation is limited since we don't know anything
* about the device itself. However, if the device is the only VGA device
* downstream of a bridge and VFIO VGA support is disabled, then we can
* safely return legacy VGA IO and memory as not decoded since the user
* has no way to get to it and routing can be disabled externally at the
* bridge.
*/
static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)
{
struct vfio_pci_device *vdev = opaque;
struct pci_dev *tmp = NULL, *pdev = vdev->pdev;
unsigned char max_busnr;
unsigned int decodes;
if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
max_busnr = pci_bus_max_busnr(pdev->bus);
decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
if (tmp == pdev ||
pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
pci_is_root_bus(tmp->bus))
continue;
if (tmp->bus->number >= pdev->bus->number &&
tmp->bus->number <= max_busnr) {
pci_dev_put(tmp);
decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
break;
}
}
return decodes;
}
static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
{
return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
}
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
static int vfio_pci_enable(struct vfio_pci_device *vdev) static int vfio_pci_enable(struct vfio_pci_device *vdev)
...@@ -48,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) ...@@ -48,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
u16 cmd; u16 cmd;
u8 msix_pos; u8 msix_pos;
pci_set_power_state(pdev, PCI_D0);
/* Don't allow our initial saved state to include busmaster */ /* Don't allow our initial saved state to include busmaster */
pci_clear_master(pdev); pci_clear_master(pdev);
...@@ -93,10 +166,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) ...@@ -93,10 +166,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
} else } else
vdev->msix_bar = 0xFF; vdev->msix_bar = 0xFF;
#ifdef CONFIG_VFIO_PCI_VGA if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vdev->has_vga = true; vdev->has_vga = true;
#endif
return 0; return 0;
} }
...@@ -153,20 +224,17 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) ...@@ -153,20 +224,17 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
* Try to reset the device. The success of this is dependent on * Try to reset the device. The success of this is dependent on
* being able to lock the device, which is not always possible. * being able to lock the device, which is not always possible.
*/ */
if (vdev->reset_works) { if (vdev->reset_works && !pci_try_reset_function(pdev))
int ret = pci_try_reset_function(pdev); vdev->needs_reset = false;
if (ret)
pr_warn("%s: Failed to reset device %s (%d)\n",
__func__, dev_name(&pdev->dev), ret);
else
vdev->needs_reset = false;
}
pci_restore_state(pdev); pci_restore_state(pdev);
out: out:
pci_disable_device(pdev); pci_disable_device(pdev);
vfio_pci_try_bus_reset(vdev); vfio_pci_try_bus_reset(vdev);
if (!disable_idle_d3)
pci_set_power_state(pdev, PCI_D3hot);
} }
static void vfio_pci_release(void *device_data) static void vfio_pci_release(void *device_data)
...@@ -885,6 +953,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -885,6 +953,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret) { if (ret) {
iommu_group_put(group); iommu_group_put(group);
kfree(vdev); kfree(vdev);
return ret;
}
if (vfio_pci_is_vga(pdev)) {
vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
vga_set_legacy_decoding(pdev,
vfio_pci_set_vga_decode(vdev, false));
}
if (!disable_idle_d3) {
/*
* pci-core sets the device power state to an unknown value at
* bootup and after being removed from a driver. The only
* transition it allows from this unknown state is to D0, which
* typically happens when a driver calls pci_enable_device().
* We're not ready to enable the device yet, but we do want to
* be able to get to D3. Therefore first do a D0 transition
* before going to D3.
*/
pci_set_power_state(pdev, PCI_D0);
pci_set_power_state(pdev, PCI_D3hot);
} }
return ret; return ret;
...@@ -895,10 +984,21 @@ static void vfio_pci_remove(struct pci_dev *pdev) ...@@ -895,10 +984,21 @@ static void vfio_pci_remove(struct pci_dev *pdev)
struct vfio_pci_device *vdev; struct vfio_pci_device *vdev;
vdev = vfio_del_group_dev(&pdev->dev); vdev = vfio_del_group_dev(&pdev->dev);
if (vdev) { if (!vdev)
iommu_group_put(pdev->dev.iommu_group); return;
kfree(vdev);
iommu_group_put(pdev->dev.iommu_group);
kfree(vdev);
if (vfio_pci_is_vga(pdev)) {
vga_client_register(pdev, NULL, NULL, NULL);
vga_set_legacy_decoding(pdev,
VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
} }
if (!disable_idle_d3)
pci_set_power_state(pdev, PCI_D0);
} }
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
...@@ -1017,10 +1117,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) ...@@ -1017,10 +1117,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
put_devs: put_devs:
for (i = 0; i < devs.cur_index; i++) { for (i = 0; i < devs.cur_index; i++) {
if (!ret) { tmp = vfio_device_data(devs.devices[i]);
tmp = vfio_device_data(devs.devices[i]); if (!ret)
tmp->needs_reset = false; tmp->needs_reset = false;
}
if (!tmp->refcnt && !disable_idle_d3)
pci_set_power_state(tmp->pdev, PCI_D3hot);
vfio_device_put(devs.devices[i]); vfio_device_put(devs.devices[i]);
} }
...@@ -1030,10 +1133,50 @@ put_devs: ...@@ -1030,10 +1133,50 @@ put_devs:
static void __exit vfio_pci_cleanup(void) static void __exit vfio_pci_cleanup(void)
{ {
pci_unregister_driver(&vfio_pci_driver); pci_unregister_driver(&vfio_pci_driver);
vfio_pci_virqfd_exit();
vfio_pci_uninit_perm_bits(); vfio_pci_uninit_perm_bits();
} }
static void __init vfio_pci_fill_ids(void)
{
char *p, *id;
int rc;
/* no ids passed actually */
if (ids[0] == '\0')
return;
/* add ids specified in the module parameter */
p = ids;
while ((id = strsep(&p, ","))) {
unsigned int vendor, device, subvendor = PCI_ANY_ID,
subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
int fields;
if (!strlen(id))
continue;
fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
&vendor, &device, &subvendor, &subdevice,
&class, &class_mask);
if (fields < 2) {
pr_warn("invalid id string \"%s\"\n", id);
continue;
}
rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
subvendor, subdevice, class, class_mask, 0);
if (rc)
pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
vendor, device, subvendor, subdevice,
class, class_mask, rc);
else
pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
vendor, device, subvendor, subdevice,
class, class_mask);
}
}
static int __init vfio_pci_init(void) static int __init vfio_pci_init(void)
{ {
int ret; int ret;
...@@ -1043,21 +1186,16 @@ static int __init vfio_pci_init(void) ...@@ -1043,21 +1186,16 @@ static int __init vfio_pci_init(void)
if (ret) if (ret)
return ret; return ret;
/* Start the virqfd cleanup handler */
ret = vfio_pci_virqfd_init();
if (ret)
goto out_virqfd;
/* Register and scan for devices */ /* Register and scan for devices */
ret = pci_register_driver(&vfio_pci_driver); ret = pci_register_driver(&vfio_pci_driver);
if (ret) if (ret)
goto out_driver; goto out_driver;
vfio_pci_fill_ids();
return 0; return 0;
out_driver: out_driver:
vfio_pci_virqfd_exit();
out_virqfd:
vfio_pci_uninit_perm_bits(); vfio_pci_uninit_perm_bits();
return ret; return ret;
} }
......
...@@ -19,230 +19,19 @@ ...@@ -19,230 +19,19 @@
#include <linux/msi.h> #include <linux/msi.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfio.h> #include <linux/vfio.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/slab.h> #include <linux/slab.h>
#include "vfio_pci_private.h" #include "vfio_pci_private.h"
/*
* IRQfd - generic
*/
struct virqfd {
struct vfio_pci_device *vdev;
struct eventfd_ctx *eventfd;
int (*handler)(struct vfio_pci_device *, void *);
void (*thread)(struct vfio_pci_device *, void *);
void *data;
struct work_struct inject;
wait_queue_t wait;
poll_table pt;
struct work_struct shutdown;
struct virqfd **pvirqfd;
};
static struct workqueue_struct *vfio_irqfd_cleanup_wq;
int __init vfio_pci_virqfd_init(void)
{
vfio_irqfd_cleanup_wq =
create_singlethread_workqueue("vfio-irqfd-cleanup");
if (!vfio_irqfd_cleanup_wq)
return -ENOMEM;
return 0;
}
void vfio_pci_virqfd_exit(void)
{
destroy_workqueue(vfio_irqfd_cleanup_wq);
}
static void virqfd_deactivate(struct virqfd *virqfd)
{
queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
}
static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
unsigned long flags = (unsigned long)key;
if (flags & POLLIN) {
/* An event has been signaled, call function */
if ((!virqfd->handler ||
virqfd->handler(virqfd->vdev, virqfd->data)) &&
virqfd->thread)
schedule_work(&virqfd->inject);
}
if (flags & POLLHUP) {
unsigned long flags;
spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
/*
* The eventfd is closing, if the virqfd has not yet been
* queued for release, as determined by testing whether the
* vdev pointer to it is still valid, queue it now. As
* with kvm irqfds, we know we won't race against the virqfd
* going away because we hold wqh->lock to get here.
*/
if (*(virqfd->pvirqfd) == virqfd) {
*(virqfd->pvirqfd) = NULL;
virqfd_deactivate(virqfd);
}
spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
}
return 0;
}
static void virqfd_ptable_queue_proc(struct file *file,
wait_queue_head_t *wqh, poll_table *pt)
{
struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
add_wait_queue(wqh, &virqfd->wait);
}
static void virqfd_shutdown(struct work_struct *work)
{
struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
u64 cnt;
eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
flush_work(&virqfd->inject);
eventfd_ctx_put(virqfd->eventfd);
kfree(virqfd);
}
static void virqfd_inject(struct work_struct *work)
{
struct virqfd *virqfd = container_of(work, struct virqfd, inject);
if (virqfd->thread)
virqfd->thread(virqfd->vdev, virqfd->data);
}
static int virqfd_enable(struct vfio_pci_device *vdev,
int (*handler)(struct vfio_pci_device *, void *),
void (*thread)(struct vfio_pci_device *, void *),
void *data, struct virqfd **pvirqfd, int fd)
{
struct fd irqfd;
struct eventfd_ctx *ctx;
struct virqfd *virqfd;
int ret = 0;
unsigned int events;
virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
if (!virqfd)
return -ENOMEM;
virqfd->pvirqfd = pvirqfd;
virqfd->vdev = vdev;
virqfd->handler = handler;
virqfd->thread = thread;
virqfd->data = data;
INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
INIT_WORK(&virqfd->inject, virqfd_inject);
irqfd = fdget(fd);
if (!irqfd.file) {
ret = -EBADF;
goto err_fd;
}
ctx = eventfd_ctx_fileget(irqfd.file);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto err_ctx;
}
virqfd->eventfd = ctx;
/*
* virqfds can be released by closing the eventfd or directly
* through ioctl. These are both done through a workqueue, so
* we update the pointer to the virqfd under lock to avoid
* pushing multiple jobs to release the same virqfd.
*/
spin_lock_irq(&vdev->irqlock);
if (*pvirqfd) {
spin_unlock_irq(&vdev->irqlock);
ret = -EBUSY;
goto err_busy;
}
*pvirqfd = virqfd;
spin_unlock_irq(&vdev->irqlock);
/*
* Install our own custom wake-up handling so we are notified via
* a callback whenever someone signals the underlying eventfd.
*/
init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
/*
* Check if there was an event already pending on the eventfd
* before we registered and trigger it as if we didn't miss it.
*/
if (events & POLLIN) {
if ((!handler || handler(vdev, data)) && thread)
schedule_work(&virqfd->inject);
}
/*
* Do not drop the file until the irqfd is fully initialized,
* otherwise we might race against the POLLHUP.
*/
fdput(irqfd);
return 0;
err_busy:
eventfd_ctx_put(ctx);
err_ctx:
fdput(irqfd);
err_fd:
kfree(virqfd);
return ret;
}
static void virqfd_disable(struct vfio_pci_device *vdev,
struct virqfd **pvirqfd)
{