Commit cd6aea9f authored by Vikram Narayanan's avatar Vikram Narayanan

lcd/iommu: Add IOMMU support

Introduce four lcd_syscalls for programming iommu for a pci device from LCD.
PCI devices can now be assigned to an LCD. During probe, one can assign a
device to a particular LCD. During device assignment, all pages inside the heap
and rammap regions are mapped in the iommu. During data plane operation, one
can map/unmap a page by means of lcd_syscall's.
Signed-off-by: Vikram Narayanan's avatarVikram Narayanan <vikram186@gmail.com>
parent 2b0bc478
......@@ -206,6 +206,26 @@ static inline int lcd_syscall_sync_reply(void)
return lcd_syscall_no_args(LCD_SYSCALL_SYNC_REPLY);
}
static inline int lcd_syscall_assign_device(int domain, int bus, int devfn)
{
return lcd_syscall_three_args(LCD_SYSCALL_ASSIGN_DEVICE, domain, bus, devfn);
}
static inline int lcd_syscall_deassign_device(int domain, int bus, int devfn)
{
return lcd_syscall_three_args(LCD_SYSCALL_DEASSIGN_DEVICE, domain, bus, devfn);
}
static inline int lcd_syscall_iommu_map_page(gpa_t gpa, unsigned int order, bool force)
{
return lcd_syscall_three_args(LCD_SYSCALL_IOMMU_MAP_PAGE, gpa_val(gpa), order, force);
}
static inline int lcd_syscall_iommu_unmap_page(gpa_t gpa)
{
return lcd_syscall_one_arg(LCD_SYSCALL_IOMMU_UNMAP_PAGE, gpa_val(gpa));
}
static inline int lcd_syscall_create_sync_ep(cptr_t slot)
{
return lcd_syscall_one_arg(LCD_SYSCALL_CREATE_SYNC_EP, cptr_val(slot));
......
#ifndef _LCD_IOMMU_H
#define _LCD_IOMMU_H
#include <linux/iommu.h>
#include <asm/lcd_domains/ept.h>
#include <asm/liblcd/address_spaces.h>
#include <lcd_domains/microkernel.h>
/**
* Map the whole heap/rammap region of LCDs in IOMMU hardware
* There are a couple of things to note here,
* 1) Not all pages in the heap region are backed by physical pages
* 2) liblcd allocator supports ondemand allocation
*
* We should either allocate a set of pages for recv skb's during boot,
* and then invoke IOMMU map as all the necessary gpa pages would have
* a valid hpa. As we got a (gpa, hpa) pair, we can insert map it in the
* IOMMU hardware.
*
* volunteered memory get mapped to inside the LCDs and are mapped to the
* RAM map region. Tx buffers are in a shared region which is volunteered
* by the KLCD to a particular LCD (and eventually mapped using mmap).
*
* @param lcd pointer to the lcd structure
* @return 0 on success and -errno on failure
*/
int lcd_iommu_map_memory(struct lcd *lcd);
/**
* Program a single gpa:hpa pair into the iommu
*
* @param lcd pointer to the lcd structure
* @param gpa guest physical address of the page to be mapped
* @param force whether to force a remap if already mapped
* @return 0 on success and -errno on failure
*/
int lcd_iommu_map_page(struct lcd *lcd, gpa_t gpa, unsigned int, bool force);
int lcd_iommu_unmap_page(struct lcd *lcd, gpa_t gpa);
/**
* unmap the mappings from iommu. The logic is similar to mapping, except
* that the mappings are removed from the iommu.
* @param lcd pointer to the lcd structure
* @return 0 on success and -errno on failure
*/
int lcd_iommu_unmap_memory(struct lcd *lcd);
/**
* Top level function for mapping a domain.
* @param lcd pointer to the lcd structure
* @param pdev pointer to the pci_dev structure
* @return 0 on success and -errno on failure
*/
int lcd_iommu_map_domain(struct lcd *lcd, struct pci_dev *pdev);
int lcd_iommu_unmap_domain(struct lcd *lcd, struct pci_dev *pdev);
/**
* Region start and end in the guest physical layout to be mapped
*/
#define HEAP_START_ADDR LCD_HEAP_OFFSET
#define HEAP_END_ADDR (HEAP_START_ADDR + (1 << 30) - 1)
/* XXX: There is a hole of 1GB in between the heap and rammap. */
#define RAMMAP_START_ADDR LCD_RAM_MAP_OFFSET
#define RAMMAP_END_ADDR (RAMMAP_START_ADDR + (1 << 30) - 1)
#define gpa_to_gfn(gpa) ((gpa) >> PAGE_SHIFT)
static inline gpa_t gfn_to_gpa(u64 gfn)
{
return __gpa(gfn << PAGE_SHIFT);
}
#endif /* _LCD_IOMMU_H */
......@@ -177,6 +177,12 @@ struct lcd {
* ==============
*/
int (*klcd_main)(void);
/* FIXME: If we need to assign more than one device to an LCD,
* this won't work. We need sophisticated mechanisms to handle
* that. For now, I hope this should be enough.
*/
struct iommu_domain *domain;
};
/* similar to task structs */
......
......@@ -30,6 +30,10 @@ enum lcd_syscall {
LCD_SYSCALL_DUMP_STACK,
LCD_SYSCALL_IRQ_ENABLE,
LCD_SYSCALL_IRQ_DISABLE,
LCD_SYSCALL_ASSIGN_DEVICE,
LCD_SYSCALL_DEASSIGN_DEVICE,
LCD_SYSCALL_IOMMU_MAP_PAGE,
LCD_SYSCALL_IOMMU_UNMAP_PAGE,
};
#endif /* LCD_DOMAINS_SYSCALL_H */
#include <lcd_domains/lcd_iommu.h>
/* Map the whole of heap and rammap regions for now.
* Once the allocation logic is figured out, one can even introduce
* a new region and map only that region inside the iommu.
*/
struct region {
char *name;
u64 start_gfn;
u64 end_gfn;
} regions[] = {
{
.name = "heap",
.start_gfn = gpa_to_gfn(HEAP_START_ADDR),
.end_gfn = gpa_to_gfn(HEAP_END_ADDR),
},
{
.name = "rammap",
.start_gfn = gpa_to_gfn(RAMMAP_START_ADDR),
.end_gfn = gpa_to_gfn(RAMMAP_END_ADDR),
}
};
/* XXX: We are indeed walking the ept *without* a lock!
* HINT: A mutex is initialized during ept init and never used.
* What are the consequences?
* - Be prepared for some stale entries and bugs
*/
int lcd_iommu_map_page(struct lcd *lcd, gpa_t gpa, unsigned int order,
bool force)
{
hpa_t hpa;
int ret;
int i;
phys_addr_t phys;
u64 gfn_start, gfn_end = 1 << order;
gfn_start = gpa_to_gfn(gpa_val(gpa));
/* If domain pointer is null */
if (!lcd->domain)
return -EINVAL;
for (i = 0; i < gfn_end; ++i) {
gpa_t ga = gfn_to_gpa(gfn_start + i);
ret = lcd_arch_ept_gpa_to_hpa(lcd->lcd_arch, ga, &hpa, false);
/* no mapping, continue */
if (ret) {
/* reset ret as this might be the last
* one in the loop. If there is no mapping
* we should not return error
*/
ret = 0;
continue;
}
/* check for an existing mapping */
phys = iommu_iova_to_phys(lcd->domain, gpa_val(ga));
/* if an entry is already present and is not equal to the current
* requested mapping and if force is set, proceed to delete the
* old mapping to proceed with a new mapping.
*/
/* old == new, no change is required */
if (phys && phys == hpa_val(hpa))
continue;
else if (phys && phys != hpa_val(hpa) && force) {
/* XXX: This won't work as expected as unmapped entries are
* not immediately flushed to the hardware. It is just queued
* for flushing. There might be a race if we need to remove
* an old mapping and program a new one immediately. If that
* occurs, dmar will warn us.
*/
printk("%s, already mapped? old %llx | new %lx\n",
__func__, phys, hpa_val(hpa));
/* unmap old mapping */
ret = iommu_unmap(lcd->domain, gpa_val(gpa), PAGE_SIZE);
BUG_ON(ret != PAGE_SIZE);
}
printk("%s, mapping gpa:hpa %lx:%lx pair\n", __func__,
gpa_val(ga), hpa_val(hpa));
/* map */
ret = iommu_map(lcd->domain, gpa_val(ga), hpa_val(hpa),
PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
if (ret)
LIBLCD_ERR("%s: iommu_map failed for hpa:gpa %p:%p",
__func__, hpa_val(hpa), gpa_val(ga));
}
return ret;
}
int lcd_iommu_unmap_page(struct lcd *lcd, gpa_t gpa)
{
hpa_t hpa;
int ret;
ret = lcd_arch_ept_gpa_to_hpa(lcd->lcd_arch, gpa, &hpa, true);
/* no mapping. Just return */
if (ret) {
LIBLCD_WARN("%s, spurious unmap to gpa:hpa %lx:%lx pair",
__func__, gpa_val(gpa), hpa_val(hpa));
return 0;
}
if (!lcd->domain)
return -EINVAL;
ret = iommu_iova_to_phys(lcd->domain, gpa_val(gpa));
if (ret) {
/* A valid mapping is present */
ret = iommu_unmap(lcd->domain, gpa_val(gpa), PAGE_SIZE);
/* bug if not a page */
BUG_ON(ret != PAGE_SIZE);
}
/* return success */
return 0;
}
int lcd_iommu_map_memory(struct lcd *lcd)
{
int i, ret;
u64 p, gfn_end;
for (i = 0; i < ARRAY_SIZE(regions); i++) {
p = regions[i].start_gfn;
gfn_end = regions[i].end_gfn;
for (; p < gfn_end; ++p) {
gpa_t ga = gfn_to_gpa(p);
hpa_t hpa;
ret =
lcd_arch_ept_gpa_to_hpa(lcd->lcd_arch, ga, &hpa,
false);
/* no entry. Just continue */
if (ret)
continue;
/* A valid entry is present.
* Program its gpa:hpa into our IOMMU domain.
* For now, let's have flags as IOMMU_RW.
* TODO: Separate READ/WRITE
*/
ret = iommu_map(lcd->domain, gpa_val(ga), hpa_val(hpa),
PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
if (ret) {
LIBLCD_ERR
("%s: iommu_map failed for hpa:gpa %p:%p",
__func__, hpa_val(hpa), gpa_val(ga));
goto fail;
}
}
}
return 0;
fail:
lcd_iommu_unmap_memory(lcd);
return ret;
}
int lcd_iommu_unmap_memory(struct lcd *lcd)
{
int i, ret;
u64 p, gfn_end;
for (i = 0; i < ARRAY_SIZE(regions); i++) {
p = regions[i].start_gfn;
gfn_end = regions[i].end_gfn;
for (; p < gfn_end; ++p) {
gpa_t ga = gfn_to_gpa(p);
ret = iommu_iova_to_phys(lcd->domain, gpa_val(ga));
/* no entry. Just continue */
if (!ret)
continue;
/* A valid mapping is present */
ret = iommu_unmap(lcd->domain, gpa_val(ga), PAGE_SIZE);
/* bug if not a page */
BUG_ON(ret != PAGE_SIZE);
}
}
/* return success */
return 0;
}
int lcd_iommu_map_domain(struct lcd *lcd, struct pci_dev *pdev)
{
int ret;
/* step 0. Check if iommu is present */
if (!iommu_present(&pci_bus_type)) {
ret = -ENODEV;
goto out;
}
if (lcd->domain) {
LIBLCD_ERR("Domain mapping %p already found!",
__func__, lcd->domain);
ret = -EEXIST;
goto out;
}
/* step 1. alloc domain */
lcd->domain = iommu_domain_alloc(&pci_bus_type);
if (!lcd->domain) {
ret = -ENOMEM;
goto out;
}
/* We might not use IRQ remapping in the near future, atleast until
* someone makes it work inside LCDs. It's ok to continue.
*/
if (!iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
printk(KERN_WARNING "%s: No interrupt remapping support\n",
__func__);
}
/* step 2. map memory */
ret = lcd_iommu_map_memory(lcd);
if (ret) {
LIBLCD_ERR("%s: assign device failed! ret %d", __func__, ret);
goto fail_map;
}
/* step 3. attach domain */
ret = iommu_attach_device(lcd->domain, &pdev->dev);
if (ret) {
LIBLCD_ERR("%s: attach device failed! ret %d", __func__, ret);
goto fail_attach;
}
return ret;
fail_attach:
lcd_iommu_unmap_memory(lcd);
fail_map:
iommu_domain_free(lcd->domain);
out:
return ret;
}
EXPORT_SYMBOL(lcd_iommu_map_domain);
int lcd_iommu_unmap_domain(struct lcd *lcd, struct pci_dev *pdev)
{
if (!lcd->domain) {
LIBLCD_ERR("%s Domain mapping not found!", __func__);
return -EINVAL;
}
/* step 1. detach device */
iommu_detach_device(lcd->domain, &pdev->dev);
/* step 2. free domain */
iommu_domain_free(lcd->domain);
return 0;
}
EXPORT_SYMBOL(lcd_iommu_unmap_domain);
......@@ -7,7 +7,9 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/kthread.h>
#include <linux/pci.h>
#include <lcd_domains/microkernel.h>
#include <lcd_domains/lcd_iommu.h>
#include <asm/lcd_domains/run.h>
#include <asm/lcd_domains/create.h>
......@@ -302,6 +304,79 @@ static int handle_syscall_irq_enable(struct lcd *lcd)
return 0;
}
static int handle_syscall_iommu_map_page(struct lcd *lcd)
{
int ret;
bool force;
gpa_t gpa;
unsigned int order;
gpa = __gpa(lcd_arch_get_syscall_arg0(lcd->lcd_arch));
order = lcd_arch_get_syscall_arg1(lcd->lcd_arch);
force = lcd_arch_get_syscall_arg2(lcd->lcd_arch);
ret = lcd_iommu_map_page(lcd, gpa, order, force);
return ret;
}
static int handle_syscall_iommu_unmap_page(struct lcd *lcd)
{
int ret;
gpa_t gpa;
gpa = __gpa(lcd_arch_get_syscall_arg0(lcd->lcd_arch));
ret = lcd_iommu_unmap_page(lcd, gpa);
return ret;
}
static int handle_syscall_assign_device(struct lcd *lcd)
{
int domain, bus, devfn;
struct pci_dev *dev;
int ret;
domain = lcd_arch_get_syscall_arg0(lcd->lcd_arch);
bus = lcd_arch_get_syscall_arg1(lcd->lcd_arch);
devfn = lcd_arch_get_syscall_arg2(lcd->lcd_arch);
dev = pci_get_domain_bus_and_slot(domain, bus, devfn);
if (dev) {
LCD_MSG("Device found %x:%x:%x. mapping into iommu domain",
bus, domain, devfn);
ret = lcd_iommu_map_domain(lcd, dev);
LCD_MSG("lcd iommu map returned %d", ret);
} else {
LCD_ERR("couldn't get device %x:%x:%x", bus, domain, devfn);
ret = -ENODEV;
}
return ret;
}
static int handle_syscall_deassign_device(struct lcd *lcd)
{
int domain, bus, devfn;
struct pci_dev *dev;
int ret;
domain = lcd_arch_get_syscall_arg0(lcd->lcd_arch);
bus = lcd_arch_get_syscall_arg1(lcd->lcd_arch);
devfn = lcd_arch_get_syscall_arg2(lcd->lcd_arch);
dev = pci_get_domain_bus_and_slot(domain, bus, devfn);
if (dev) {
LCD_MSG("Device found %x:%x:%x. mapping into iommu domain",
bus, domain, devfn);
ret = lcd_iommu_unmap_domain(lcd, dev);
LCD_MSG("lcd iommu map returned %d", ret);
} else {
LCD_ERR("couldn't get device %x:%x:%x", bus, domain, devfn);
ret = -ENODEV;
}
return ret;
}
static int handle_syscall(struct lcd *lcd, int *lcd_ret)
{
int syscall_id;
......@@ -379,6 +454,18 @@ static int handle_syscall(struct lcd *lcd, int *lcd_ret)
case LCD_SYSCALL_IRQ_ENABLE:
ret = handle_syscall_irq_enable(lcd);
break;
case LCD_SYSCALL_ASSIGN_DEVICE:
ret = handle_syscall_assign_device(lcd);
break;
case LCD_SYSCALL_DEASSIGN_DEVICE:
ret = handle_syscall_deassign_device(lcd);
break;
case LCD_SYSCALL_IOMMU_MAP_PAGE:
ret = handle_syscall_iommu_map_page(lcd);
break;
case LCD_SYSCALL_IOMMU_UNMAP_PAGE:
ret = handle_syscall_iommu_unmap_page(lcd);
break;
default:
LCD_ERR("unimplemented syscall %d", syscall_id);
ret = -ENOSYS;
......
......@@ -21,6 +21,7 @@ lcd_domains-y += $(addprefix microkernel/, \
ipc.o \
main.o \
mem.o \
iommu.o \
mem_itree.o \
run.o \
ksym.o \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment