diff --git a/lcd-domains/arch/x86/include/asm/lcd_domains/liblcd.h b/lcd-domains/arch/x86/include/asm/lcd_domains/liblcd.h index 2dbd08a371c421cc2eba72e0aea6fb514772138d..3c4a161b6ac8fb16733ef9e6796a4e73acfef420 100644 --- a/lcd-domains/arch/x86/include/asm/lcd_domains/liblcd.h +++ b/lcd-domains/arch/x86/include/asm/lcd_domains/liblcd.h @@ -206,6 +206,26 @@ static inline int lcd_syscall_sync_reply(void) return lcd_syscall_no_args(LCD_SYSCALL_SYNC_REPLY); } +static inline int lcd_syscall_assign_device(int domain, int bus, int devfn) +{ + return lcd_syscall_three_args(LCD_SYSCALL_ASSIGN_DEVICE, domain, bus, devfn); +} + +static inline int lcd_syscall_deassign_device(int domain, int bus, int devfn) +{ + return lcd_syscall_three_args(LCD_SYSCALL_DEASSIGN_DEVICE, domain, bus, devfn); +} + +static inline int lcd_syscall_iommu_map_page(gpa_t gpa, unsigned int order, bool force) +{ + return lcd_syscall_three_args(LCD_SYSCALL_IOMMU_MAP_PAGE, gpa_val(gpa), order, force); +} + +static inline int lcd_syscall_iommu_unmap_page(gpa_t gpa) +{ + return lcd_syscall_one_arg(LCD_SYSCALL_IOMMU_UNMAP_PAGE, gpa_val(gpa)); +} + static inline int lcd_syscall_create_sync_ep(cptr_t slot) { return lcd_syscall_one_arg(LCD_SYSCALL_CREATE_SYNC_EP, cptr_val(slot)); diff --git a/lcd-domains/include/lcd_domains/lcd_iommu.h b/lcd-domains/include/lcd_domains/lcd_iommu.h new file mode 100644 index 0000000000000000000000000000000000000000..8a3e88fbc9709d8a04b34dc7a37624a64125b8ab --- /dev/null +++ b/lcd-domains/include/lcd_domains/lcd_iommu.h @@ -0,0 +1,76 @@ +#ifndef _LCD_IOMMU_H +#define _LCD_IOMMU_H + +#include +#include +#include +#include + +/** + * Map the whole heap/rammap region of LCDs in IOMMU hardware + * There are a couple of things to note here, + * 1) Not all pages in the heap region are backed by physical pages + * 2) liblcd allocator supports ondemand allocation + * + * We should either allocate a set of pages for recv skb's during boot, + * and then invoke IOMMU map as all the necessary gpa pages would have + * a valid hpa. As we got a (gpa, hpa) pair, we can insert map it in the + * IOMMU hardware. + * + * volunteered memory get mapped to inside the LCDs and are mapped to the + * RAM map region. Tx buffers are in a shared region which is volunteered + * by the KLCD to a particular LCD (and eventually mapped using mmap). + * + * @param lcd pointer to the lcd structure + * @return 0 on success and -errno on failure + */ +int lcd_iommu_map_memory(struct lcd *lcd); + +/** + * Program a single gpa:hpa pair into the iommu + * + * @param lcd pointer to the lcd structure + * @param gpa guest physical address of the page to be mapped + * @param force whether to force a remap if already mapped + * @return 0 on success and -errno on failure + */ +int lcd_iommu_map_page(struct lcd *lcd, gpa_t gpa, unsigned int, bool force); + +int lcd_iommu_unmap_page(struct lcd *lcd, gpa_t gpa); + +/** + * unmap the mappings from iommu. The logic is similar to mapping, except + * that the mappings are removed from the iommu. + * @param lcd pointer to the lcd structure + * @return 0 on success and -errno on failure + */ +int lcd_iommu_unmap_memory(struct lcd *lcd); + +/** + * Top level function for mapping a domain. + * @param lcd pointer to the lcd structure + * @param pdev pointer to the pci_dev structure + * @return 0 on success and -errno on failure + */ +int lcd_iommu_map_domain(struct lcd *lcd, struct pci_dev *pdev); + +int lcd_iommu_unmap_domain(struct lcd *lcd, struct pci_dev *pdev); + +/** + * Region start and end in the guest physical layout to be mapped + */ +#define HEAP_START_ADDR LCD_HEAP_OFFSET +#define HEAP_END_ADDR (HEAP_START_ADDR + (1 << 30) - 1) + +/* XXX: There is a hole of 1GB in between the heap and rammap. */ +#define RAMMAP_START_ADDR LCD_RAM_MAP_OFFSET +#define RAMMAP_END_ADDR (RAMMAP_START_ADDR + (1 << 30) - 1) + +#define gpa_to_gfn(gpa) ((gpa) >> PAGE_SHIFT) + +static inline gpa_t gfn_to_gpa(u64 gfn) +{ + return __gpa(gfn << PAGE_SHIFT); +} + +#endif /* _LCD_IOMMU_H */ diff --git a/lcd-domains/include/lcd_domains/microkernel.h b/lcd-domains/include/lcd_domains/microkernel.h index 05bb02a297f9f708a41981ccf7c9f26f02e6bedb..7dcaca6ceb4acc4c7c30921fb6a0a36247426cdd 100644 --- a/lcd-domains/include/lcd_domains/microkernel.h +++ b/lcd-domains/include/lcd_domains/microkernel.h @@ -177,6 +177,12 @@ struct lcd { * ============== */ int (*klcd_main)(void); + + /* FIXME: If we need to assign more than one device to an LCD, + * this won't work. We need sophisticated mechanisms to handle + * that. For now, I hope this should be enough. + */ + struct iommu_domain *domain; }; /* similar to task structs */ diff --git a/lcd-domains/include/liblcd/syscall.h b/lcd-domains/include/liblcd/syscall.h index 2ab3a15aeeae3ae05e08f27e90fc84cb7198fdf7..7e5ff9a7bd976c6f998d0283b4573a7931b5e117 100644 --- a/lcd-domains/include/liblcd/syscall.h +++ b/lcd-domains/include/liblcd/syscall.h @@ -30,6 +30,10 @@ enum lcd_syscall { LCD_SYSCALL_DUMP_STACK, LCD_SYSCALL_IRQ_ENABLE, LCD_SYSCALL_IRQ_DISABLE, + LCD_SYSCALL_ASSIGN_DEVICE, + LCD_SYSCALL_DEASSIGN_DEVICE, + LCD_SYSCALL_IOMMU_MAP_PAGE, + LCD_SYSCALL_IOMMU_UNMAP_PAGE, }; #endif /* LCD_DOMAINS_SYSCALL_H */ diff --git a/lcd-domains/microkernel/iommu.c b/lcd-domains/microkernel/iommu.c new file mode 100644 index 0000000000000000000000000000000000000000..b9f6a2e75d5aeb11fb2a0731b27e460dc35d3427 --- /dev/null +++ b/lcd-domains/microkernel/iommu.c @@ -0,0 +1,272 @@ +#include + +/* Map the whole of heap and rammap regions for now. + * Once the allocation logic is figured out, one can even introduce + * a new region and map only that region inside the iommu. + */ +struct region { + char *name; + u64 start_gfn; + u64 end_gfn; +} regions[] = { + { + .name = "heap", + .start_gfn = gpa_to_gfn(HEAP_START_ADDR), + .end_gfn = gpa_to_gfn(HEAP_END_ADDR), + }, + { + .name = "rammap", + .start_gfn = gpa_to_gfn(RAMMAP_START_ADDR), + .end_gfn = gpa_to_gfn(RAMMAP_END_ADDR), + } +}; + +/* XXX: We are indeed walking the ept *without* a lock! + * HINT: A mutex is initialized during ept init and never used. + * What are the consequences? + * - Be prepared for some stale entries and bugs + */ +int lcd_iommu_map_page(struct lcd *lcd, gpa_t gpa, unsigned int order, + bool force) +{ + hpa_t hpa; + int ret; + int i; + phys_addr_t phys; + u64 gfn_start, gfn_end = 1 << order; + + gfn_start = gpa_to_gfn(gpa_val(gpa)); + + /* If domain pointer is null */ + if (!lcd->domain) + return -EINVAL; + + for (i = 0; i < gfn_end; ++i) { + gpa_t ga = gfn_to_gpa(gfn_start + i); + ret = lcd_arch_ept_gpa_to_hpa(lcd->lcd_arch, ga, &hpa, false); + + /* no mapping, continue */ + if (ret) { + /* reset ret as this might be the last + * one in the loop. If there is no mapping + * we should not return error + */ + ret = 0; + continue; + } + + /* check for an existing mapping */ + phys = iommu_iova_to_phys(lcd->domain, gpa_val(ga)); + + /* if an entry is already present and is not equal to the current + * requested mapping and if force is set, proceed to delete the + * old mapping to proceed with a new mapping. + */ + /* old == new, no change is required */ + if (phys && phys == hpa_val(hpa)) + continue; + else if (phys && phys != hpa_val(hpa) && force) { + /* XXX: This won't work as expected as unmapped entries are + * not immediately flushed to the hardware. It is just queued + * for flushing. There might be a race if we need to remove + * an old mapping and program a new one immediately. If that + * occurs, dmar will warn us. + */ + printk("%s, already mapped? old %llx | new %lx\n", + __func__, phys, hpa_val(hpa)); + /* unmap old mapping */ + ret = iommu_unmap(lcd->domain, gpa_val(gpa), PAGE_SIZE); + BUG_ON(ret != PAGE_SIZE); + } + + printk("%s, mapping gpa:hpa %lx:%lx pair\n", __func__, + gpa_val(ga), hpa_val(hpa)); + + /* map */ + ret = iommu_map(lcd->domain, gpa_val(ga), hpa_val(hpa), + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); + + if (ret) + LIBLCD_ERR("%s: iommu_map failed for hpa:gpa %p:%p", + __func__, hpa_val(hpa), gpa_val(ga)); + } + + return ret; +} + +int lcd_iommu_unmap_page(struct lcd *lcd, gpa_t gpa) +{ + hpa_t hpa; + int ret; + + ret = lcd_arch_ept_gpa_to_hpa(lcd->lcd_arch, gpa, &hpa, true); + + /* no mapping. Just return */ + if (ret) { + LIBLCD_WARN("%s, spurious unmap to gpa:hpa %lx:%lx pair", + __func__, gpa_val(gpa), hpa_val(hpa)); + return 0; + } + + if (!lcd->domain) + return -EINVAL; + + ret = iommu_iova_to_phys(lcd->domain, gpa_val(gpa)); + + if (ret) { + /* A valid mapping is present */ + ret = iommu_unmap(lcd->domain, gpa_val(gpa), PAGE_SIZE); + /* bug if not a page */ + BUG_ON(ret != PAGE_SIZE); + } + /* return success */ + return 0; +} + +int lcd_iommu_map_memory(struct lcd *lcd) +{ + int i, ret; + u64 p, gfn_end; + + for (i = 0; i < ARRAY_SIZE(regions); i++) { + p = regions[i].start_gfn; + gfn_end = regions[i].end_gfn; + + for (; p < gfn_end; ++p) { + gpa_t ga = gfn_to_gpa(p); + hpa_t hpa; + + ret = + lcd_arch_ept_gpa_to_hpa(lcd->lcd_arch, ga, &hpa, + false); + + /* no entry. Just continue */ + if (ret) + continue; + + /* A valid entry is present. + * Program its gpa:hpa into our IOMMU domain. + * For now, let's have flags as IOMMU_RW. + * TODO: Separate READ/WRITE + */ + ret = iommu_map(lcd->domain, gpa_val(ga), hpa_val(hpa), + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); + + if (ret) { + LIBLCD_ERR + ("%s: iommu_map failed for hpa:gpa %p:%p", + __func__, hpa_val(hpa), gpa_val(ga)); + goto fail; + } + } + } + + return 0; +fail: + lcd_iommu_unmap_memory(lcd); + return ret; +} + +int lcd_iommu_unmap_memory(struct lcd *lcd) +{ + int i, ret; + u64 p, gfn_end; + + for (i = 0; i < ARRAY_SIZE(regions); i++) { + p = regions[i].start_gfn; + gfn_end = regions[i].end_gfn; + + for (; p < gfn_end; ++p) { + gpa_t ga = gfn_to_gpa(p); + + ret = iommu_iova_to_phys(lcd->domain, gpa_val(ga)); + /* no entry. Just continue */ + if (!ret) + continue; + + /* A valid mapping is present */ + ret = iommu_unmap(lcd->domain, gpa_val(ga), PAGE_SIZE); + /* bug if not a page */ + BUG_ON(ret != PAGE_SIZE); + } + } + /* return success */ + return 0; +} + +int lcd_iommu_map_domain(struct lcd *lcd, struct pci_dev *pdev) +{ + int ret; + + /* step 0. Check if iommu is present */ + if (!iommu_present(&pci_bus_type)) { + ret = -ENODEV; + goto out; + } + + if (lcd->domain) { + LIBLCD_ERR("Domain mapping %p already found!", + __func__, lcd->domain); + ret = -EEXIST; + goto out; + } + + /* step 1. alloc domain */ + lcd->domain = iommu_domain_alloc(&pci_bus_type); + + if (!lcd->domain) { + ret = -ENOMEM; + goto out; + } + + /* We might not use IRQ remapping in the near future, atleast until + * someone makes it work inside LCDs. It's ok to continue. + */ + if (!iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { + printk(KERN_WARNING "%s: No interrupt remapping support\n", + __func__); + } + + /* step 2. map memory */ + ret = lcd_iommu_map_memory(lcd); + if (ret) { + LIBLCD_ERR("%s: assign device failed! ret %d", __func__, ret); + goto fail_map; + } + + /* step 3. attach domain */ + ret = iommu_attach_device(lcd->domain, &pdev->dev); + if (ret) { + LIBLCD_ERR("%s: attach device failed! ret %d", __func__, ret); + goto fail_attach; + } + + return ret; + +fail_attach: + lcd_iommu_unmap_memory(lcd); +fail_map: + iommu_domain_free(lcd->domain); +out: + return ret; +} + +EXPORT_SYMBOL(lcd_iommu_map_domain); + +int lcd_iommu_unmap_domain(struct lcd *lcd, struct pci_dev *pdev) +{ + if (!lcd->domain) { + LIBLCD_ERR("%s Domain mapping not found!", __func__); + return -EINVAL; + } + + /* step 1. detach device */ + iommu_detach_device(lcd->domain, &pdev->dev); + + /* step 2. free domain */ + iommu_domain_free(lcd->domain); + + return 0; +} + +EXPORT_SYMBOL(lcd_iommu_unmap_domain); diff --git a/lcd-domains/microkernel/run.c b/lcd-domains/microkernel/run.c index 325c8d54d98e4dffe2d58d09d1d916197e7b5a1d..3cf9223c252072c90c753658e28badd94d4e087f 100644 --- a/lcd-domains/microkernel/run.c +++ b/lcd-domains/microkernel/run.c @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include #include #include @@ -302,6 +304,79 @@ static int handle_syscall_irq_enable(struct lcd *lcd) return 0; } +static int handle_syscall_iommu_map_page(struct lcd *lcd) +{ + int ret; + bool force; + gpa_t gpa; + unsigned int order; + + gpa = __gpa(lcd_arch_get_syscall_arg0(lcd->lcd_arch)); + order = lcd_arch_get_syscall_arg1(lcd->lcd_arch); + force = lcd_arch_get_syscall_arg2(lcd->lcd_arch); + + ret = lcd_iommu_map_page(lcd, gpa, order, force); + return ret; +} + +static int handle_syscall_iommu_unmap_page(struct lcd *lcd) +{ + int ret; + gpa_t gpa; + + gpa = __gpa(lcd_arch_get_syscall_arg0(lcd->lcd_arch)); + + ret = lcd_iommu_unmap_page(lcd, gpa); + return ret; +} + +static int handle_syscall_assign_device(struct lcd *lcd) +{ + int domain, bus, devfn; + struct pci_dev *dev; + int ret; + + domain = lcd_arch_get_syscall_arg0(lcd->lcd_arch); + bus = lcd_arch_get_syscall_arg1(lcd->lcd_arch); + devfn = lcd_arch_get_syscall_arg2(lcd->lcd_arch); + + dev = pci_get_domain_bus_and_slot(domain, bus, devfn); + if (dev) { + LCD_MSG("Device found %x:%x:%x. mapping into iommu domain", + bus, domain, devfn); + ret = lcd_iommu_map_domain(lcd, dev); + LCD_MSG("lcd iommu map returned %d", ret); + } else { + LCD_ERR("couldn't get device %x:%x:%x", bus, domain, devfn); + ret = -ENODEV; + } + return ret; +} + +static int handle_syscall_deassign_device(struct lcd *lcd) +{ + int domain, bus, devfn; + struct pci_dev *dev; + int ret; + + domain = lcd_arch_get_syscall_arg0(lcd->lcd_arch); + bus = lcd_arch_get_syscall_arg1(lcd->lcd_arch); + devfn = lcd_arch_get_syscall_arg2(lcd->lcd_arch); + + dev = pci_get_domain_bus_and_slot(domain, bus, devfn); + if (dev) { + LCD_MSG("Device found %x:%x:%x. mapping into iommu domain", + bus, domain, devfn); + ret = lcd_iommu_unmap_domain(lcd, dev); + LCD_MSG("lcd iommu map returned %d", ret); + } else { + LCD_ERR("couldn't get device %x:%x:%x", bus, domain, devfn); + ret = -ENODEV; + } + return ret; +} + + static int handle_syscall(struct lcd *lcd, int *lcd_ret) { int syscall_id; @@ -379,6 +454,18 @@ static int handle_syscall(struct lcd *lcd, int *lcd_ret) case LCD_SYSCALL_IRQ_ENABLE: ret = handle_syscall_irq_enable(lcd); break; + case LCD_SYSCALL_ASSIGN_DEVICE: + ret = handle_syscall_assign_device(lcd); + break; + case LCD_SYSCALL_DEASSIGN_DEVICE: + ret = handle_syscall_deassign_device(lcd); + break; + case LCD_SYSCALL_IOMMU_MAP_PAGE: + ret = handle_syscall_iommu_map_page(lcd); + break; + case LCD_SYSCALL_IOMMU_UNMAP_PAGE: + ret = handle_syscall_iommu_unmap_page(lcd); + break; default: LCD_ERR("unimplemented syscall %d", syscall_id); ret = -ENOSYS; diff --git a/lcd-domains/scripts/Kbuild.microkernel b/lcd-domains/scripts/Kbuild.microkernel index 8590026cb2ced0fe322563841c62fe3155b73371..f326d1292ce4bf8940ac74470481eb1c52a977fa 100644 --- a/lcd-domains/scripts/Kbuild.microkernel +++ b/lcd-domains/scripts/Kbuild.microkernel @@ -21,6 +21,7 @@ lcd_domains-y += $(addprefix microkernel/, \ ipc.o \ main.o \ mem.o \ + iommu.o \ mem_itree.o \ run.o \ ksym.o \