Commit 742eca0d authored by Charlie Jacobsen's avatar Charlie Jacobsen Committed by Vikram Narayanan
Browse files

Separates utcb (message buffer) and the stack.

The utcb is in a separate page of memory now. The stack can now
be made bigger than a page if necessary. This isn't a final
solution, but it will work for now while trying to get async
working. (Obviously, calling send/recv async won't work right
now since there is just one global message buffer.)
parent 105c8b36
......@@ -42,7 +42,7 @@ int klcd_call(cptr_t endpoint);
int klcd_reply(void);
int klcd_create(cptr_t *slot_out);
int klcd_config(cptr_t lcd, gva_t pc, gva_t sp, gpa_t gva_root,
gpa_t stack_page);
gpa_t utcb_page);
int klcd_run(cptr_t lcd);
int klcd_cap_grant(cptr_t lcd, cptr_t src, cptr_t dest);
int klcd_cap_page_grant_map(cptr_t lcd, cptr_t page, cptr_t dest, gpa_t gpa);
......@@ -295,13 +295,13 @@ static inline int lcd_create(cptr_t *slot_out)
*
* The lcd's cspace is configured using lcd_cap_grant.
*
* Stack should be the guest physical address where stack/utcb should be
* utcb_page should be the guest physical address where the utcb should be
* mapped.
*/
static inline int lcd_config(cptr_t lcd, gva_t pc, gva_t sp, gpa_t gva_root,
gpa_t stack_page)
gpa_t utcb_page)
{
return klcd_config(lcd, pc, sp, gva_root, stack_page);
return klcd_config(lcd, pc, sp, gva_root, utcb_page);
}
/**
* Runs / resumes an lcd.
......@@ -581,14 +581,14 @@ struct lcd_info {
* Boot page
*/
char *boot_page_base;
/*
* The creating lcd has a cptr to the boot page
*/
cptr_t boot_page_cptrs[1 << LCD_BOOT_PAGES_ORDER];
/*
* Boot mem page infos
*/
struct list_head boot_mem_list;
/*
* Stack page infos
*/
struct list_head stack_mem_list;
/*
* Paging mem page infos
*/
......@@ -636,85 +636,10 @@ static inline void lcd_unload_module(struct lcd_info *mi,
* you don't pass the capability to any other lcd, etc., this will stop and
* destroy the lcd.
*
* Guest Physical Memory Layout
* ============================
*
* No gdt/tss/idt for now (easier). See Documentation/lcd-domains/vmx.txt.
*
* From bottom to top,
*
* -- The bottom 1 MB is unmapped / reserved in case the module is expecting
* the standard physical memory layout of a PC. (Of course, it or its
* creator would need to map something there to emulate that memory.) No
* memory mapped here for the gcc stack protector, so make sure you have
* that turned off when building the code for the lcd.
*
* -- Guest virtual page tables come next, 4 MBs. This puts a (big) upper
* limit on the size of the module that can be mapped. The page tables
* in the hierarchy are allocated on demand as the module is mapped.
*
* -- The stack/UTCB used by the initial thread when the lcd boots. (The
* microkernel manages this page.)
*
* -- The module itself.
*
* -- A huge chunk of free/unmapped guest physical memory available to the
* module.
*
* -- The upper part is unusable (see Intel SDM V3 28.2.2). The last
* usable byte is at 0x0000 FFFF FFFF FFFF.
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* | Unusable |
* +---------------------------+ 0x0000 FFFF FFFF FFFF
* | |
* : Free :
* | |
* +---------------------------+ (variable)
* | |
* : Module :
* | |
* +---------------------------+ 0x0000 0000 0050 2000
* | Stack/UTCB |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 1000
* | Boot Info |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 0000
* | Guest Virtual Page Tables |
* | (4 MBs max) |
* +---------------------------+ 0x0000 0000 0010 0000
* | Free / Unmapped |
* | (1 MB) |
* +---------------------------+ 0x0000 0000 0000 0000
*
* Guest Virtual Memory Layout
* ===========================
*
* The lower part has the same layout as the guest physical.
*
* The module is mapped per the guest virtual addresses in the lcd_module_page
* list returned from the module loader, so that relinking is unnecessary.
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* The module | |
* gets mapped | Upper Part |
* somewhere in : (mostly free) :
* here --------> | |
* | |
* +---------------------------+ 0x0000 0000 0050 1000
* | Stack/UTCB |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 2000
* | Boot info |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 0000
* | Guest Virtual Page Tables |
* | (4 MBs max) |
* +---------------------------+ 0x0000 0000 0010 0000
* | Free / Unmapped |
* | (1 MB) |
* +---------------------------+ 0x0000 0000 0000 0000
* Address Spaces
* ==============
*
* See types.h.
*
* Initial CSPACE
* ==============
......
......@@ -24,17 +24,20 @@
static inline struct lcd_utcb * lcd_get_utcb(void)
{
struct lcd_utcb *out;
/*
* The utcb is in the bottom of the stack page. All we have to
* do is mask off the low 12 bits to get the address.
/*
* UTCB is no longer on the stack. It's at a fixed global
* address.
*
* Why? The async runtime messes with stacks, so we can't assume
* the utcb is sitting at the bottom of the current stack page
* (masking off bits).
*
* Obviously this won't work if we have more than one thread using
* the guest physical/virtual address spaces. So we may have to
* revisit this. (Our utlimate goal is to avoid a global message
* buffer altogether.)
*/
asm volatile(
"movq %%rsp, %0 \n\t"
"andq $0xFFFFFFFFFFFFF000, %0 \n\t"
: "=g" (out)
::);
return out;
return (struct lcd_utcb *)gva_val(LCD_UTCB_GVA);
}
#define LCD_MK_REG_ACCESS(idx) \
......
......@@ -6,7 +6,7 @@
#ifdef LCD_DOMAINS
#include <lcd-domains/liblcd-config.h>
#include <lcd-domains/types.h>
#include <linux/sched.h>
#define assert(XX) do { BUG_ON(!(XX)); } while(0)
#else
#include <stdint.h>
#include <stdlib.h>
......
......@@ -8,7 +8,6 @@
#ifdef LCD_DOMAINS
#include <lcd-domains/liblcd-config.h>
#include <lcd-domains/types.h>
#define assert(XX) do { BUG_ON(!XX); } while(0)
#else /* ! LCD_DOMAINS */
#include <assert.h>
#endif /* LCD_DOMAINS */
......
......@@ -273,18 +273,132 @@ static inline hpa_t hva2hpa(hva_t hva)
/* BOOT ADDRESS SPACE & INFO ------------------------------------------- */
/*
* Guest Physical Memory Layout
* ============================
*
* No gdt/tss/idt for now (easier). See Documentation/lcd-domains/vmx.txt.
*
* From bottom to top,
*
* -- The bottom 1 MB is unmapped / reserved in case the module is expecting
* the standard physical memory layout of a PC. (Of course, it or its
* creator would need to map something there to emulate that memory.) No
* memory mapped here for the gcc stack protector, so make sure you have
* that turned off when building the code for the lcd.
*
* -- Guest virtual page tables come next, 4 MBs. This puts a (big) upper
* limit on the size of the module that can be mapped. The page tables
* in the hierarchy are allocated on demand as the module is mapped.
*
* -- The UTCB used by the initial thread when the lcd boots. (The
* microkernel manages this page.)
*
* -- The stack
*
* -- Heap. The module is put at the beginning of the heap.
*
* -- A huge chunk of free/unmapped guest physical memory available to the
* module.
*
* -- The upper part is unusable (see Intel SDM V3 28.2.2). The last
* usable byte is at 0x0000 FFFF FFFF FFFF.
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* | Unusable |
* +---------------------------+ 0x0000 FFFF FFFF FFFF
* | |
* : Free :
* | |
* +---------------------------+ 0x0000 0000 0150 4000
* | |
* | Heap |
* | (module mapped |
* | at the bottom) |
* : (16 MBs) :
* | |
* +---------------------------+ 0x0000 0000 0050 4000
* | Stack |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 3000
* | Guard page |
* | (not mapped) |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 2000
* | UTCB |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 1000
* | Boot Info |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 0000
* | Guest Virtual Page Tables |
* | (4 MBs max) |
* +---------------------------+ 0x0000 0000 0010 0000
* | Free / Unmapped |
* | (1 MB) |
* +---------------------------+ 0x0000 0000 0000 0000
*
* Guest Virtual Memory Layout
* ===========================
*
* The lower part has the same layout as the guest physical.
*
* The module is mapped per the guest virtual addresses in the lcd_module_page
* list returned from the module loader, so that relinking is unnecessary.
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* The module | |
* gets mapped | Upper Part |
* somewhere in : (mostly free) :
* here --------> | |
* | |
* +---------------------------+ 0x0000 0000 0150 4000
* | |
* | Heap |
* : (16 MBs) :
* | |
* +---------------------------+ 0x0000 0000 0050 4000
* | Stack |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 3000
* | Guard page |
* | (not mapped) |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 2000
* | UTCB |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 1000
* | Boot info |
* | (4 KBs) |
* +---------------------------+ 0x0000 0000 0050 0000
* | Guest Virtual Page Tables |
* | (4 MBs max) |
* +---------------------------+ 0x0000 0000 0010 0000
* | Free / Unmapped |
* | (1 MB) |
* +---------------------------+ 0x0000 0000 0000 0000
*/
#define LCD_BOOT_PAGES_ORDER 2
#define LCD_STACK_PAGES_ORDER 0
#define LCD_GV_PAGING_MEM_GPA __gpa(1 << 20)
/* guest physical addresses */
#define LCD_GV_PAGING_MEM_GPA __gpa(1 << 20) /* low 1 MB is empty */
#define LCD_GV_PAGING_MEM_SIZE (4 << 20)
#define LCD_BOOT_PAGES_GPA gpa_add(LCD_GV_PAGING_MEM_GPA, \
LCD_GV_PAGING_MEM_SIZE)
#define LCD_BOOT_PAGES_SIZE ((1 << LCD_BOOT_PAGES_ORDER) * (4 << 10))
#define LCD_STACK_GPA gpa_add(LCD_BOOT_PAGES_GPA, LCD_BOOT_PAGES_SIZE)
#define LCD_STACK_SIZE (4 << 10)
#define LCD_UTCB_GPA gpa_add(LCD_BOOT_PAGES_GPA, LCD_BOOT_PAGES_SIZE)
#define LCD_UTCB_SIZE (4 << 10)
#define LCD_UTCB_STACK_GAP (4 << 10) /* guard page, not mapped */
#define LCD_STACK_GPA gpa_add(LCD_UTCB_GPA, LCD_UTCB_SIZE + LCD_UTCB_STACK_GAP)
#define LCD_STACK_SIZE ((1 << LCD_STACK_PAGES_ORDER) * (4 << 10))
#define LCD_MODULE_GPA gpa_add(LCD_STACK_GPA, LCD_STACK_SIZE)
/* guest virtual addresses */
#define LCD_GV_PAGING_MEM_GVA __gva(gpa_val(LCD_GV_PAGING_MEM_GPA))
#define LCD_BOOT_PAGES_GVA __gva(gpa_val(LCD_BOOT_PAGES_GPA))
#define LCD_UTCB_GVA __gva(gpa_val(LCD_UTCB_GPA))
#define LCD_STACK_GVA __gva(gpa_val(LCD_STACK_GPA))
#define LCD_NUM_BOOT_CPTRS 8
......@@ -326,9 +440,11 @@ struct lcd_boot_info {
* Bootstrap page info --------------------
*/
unsigned num_boot_mem_pi;
unsigned num_stack_mem_pi;
unsigned num_paging_mem_pi;
unsigned num_free_mem_pi;
struct lcd_boot_info_for_page *boot_mem_pi_start;
struct lcd_boot_info_for_page *stack_mem_pi_start;
struct lcd_boot_info_for_page *paging_mem_pi_start;
struct lcd_boot_info_for_page *free_mem_pi_start;
/*
......
......@@ -43,9 +43,9 @@ fail1:
}
int klcd_config(cptr_t lcd, gva_t pc, gva_t sp, gpa_t gva_root,
gpa_t stack_page)
gpa_t utcb_page)
{
return __lcd_config(current->lcd, lcd, pc, sp, gva_root, stack_page);
return __lcd_config(current->lcd, lcd, pc, sp, gva_root, utcb_page);
}
int klcd_run(cptr_t lcd)
......@@ -342,6 +342,7 @@ static int init_lcd_info(struct lcd_info **mi)
* Init list for boot page infos
*/
INIT_LIST_HEAD(&i->boot_mem_list);
INIT_LIST_HEAD(&i->stack_mem_list);
INIT_LIST_HEAD(&i->paging_mem_list);
INIT_LIST_HEAD(&i->free_mem_list);
......@@ -370,6 +371,10 @@ static void free_lcd_info(struct lcd_info *i)
e = list_entry(cursor, struct lcd_page_info_list_elem, list);
kfree(e);
}
list_for_each_safe(cursor, next, &i->stack_mem_list) {
e = list_entry(cursor, struct lcd_page_info_list_elem, list);
kfree(e);
}
list_for_each_safe(cursor, next, &i->paging_mem_list) {
e = list_entry(cursor, struct lcd_page_info_list_elem, list);
kfree(e);
......@@ -380,7 +385,9 @@ static void free_lcd_info(struct lcd_info *i)
}
/*
* Any remaining pages allocated will be freed when the lcd is torn
* down (e.g., boot pages)
* down (e.g., boot pages) because it is the sole owner (for example,
* when we allocated pages for the stack, we deleted the cap in our
* cspace so that the lcd is the sole owner)
*/
/*
......@@ -1253,14 +1260,38 @@ fail1:
static int map_gv_memory_and_stack(cptr_t lcd, struct create_module_cxt *cxt)
{
int ret;
gpa_t utcb_top;
/*
* Map paging mem and stack/utcb
* Map paging mem, boot mem, and utcb first
*/
return gv_map_range(lcd, cxt,
utcb_top = gpa_add(LCD_UTCB_GPA, LCD_UTCB_SIZE);
ret = gv_map_range(lcd, cxt,
LCD_GV_PAGING_MEM_GVA,
LCD_GV_PAGING_MEM_GPA,
(gpa_val(LCD_MODULE_GPA) -
(gpa_val(utcb_top) -
gpa_val(LCD_GV_PAGING_MEM_GPA)) >> PAGE_SHIFT);
if (ret) {
LCD_ERR("map 1");
goto fail1;
}
/*
* Map stack (skip over guard page)
*/
ret = gv_map_range(lcd, cxt,
LCD_STACK_GVA,
LCD_STACK_GPA,
(LCD_STACK_SIZE) >> PAGE_SHIFT);
if (ret) {
LCD_ERR("map 2");
goto fail2;
}
return 0;
fail2:
fail1:
return ret; /* everything freed when LCD is destroyed */
}
static int do_boot_pages(cptr_t lcd, struct lcd_info *mi,
......@@ -1274,14 +1305,15 @@ static int do_boot_pages(cptr_t lcd, struct lcd_info *mi,
unsigned long offset;
gpa_t gpa;
cptr_t dest_slot;
cptr_t boot_pages_cptrs[1 << LCD_BOOT_PAGES_ORDER];
/*
* Allocate the boot pages on the host
*/
ret = klcd_pages_alloc(mi->boot_page_cptrs, &hp_base_out, &hv_base_out,
ret = klcd_pages_alloc(boot_pages_cptrs, &hp_base_out, &hv_base_out,
LCD_BOOT_PAGES_ORDER);
if (ret) {
LCD_ERR("pages alloc");
goto fail1;
goto out1;
}
mi->boot_page_base = hva2va(hv_base_out);
/*
......@@ -1296,17 +1328,17 @@ static int do_boot_pages(cptr_t lcd, struct lcd_info *mi,
ret = __lcd_alloc_cptr(cxt->cache, &dest_slot);
if (ret) {
LCD_ERR("alloc failed");
goto fail2;
goto out2;
}
/*
* Grant and map in lcd's guest physical
*/
ret = lcd_cap_page_grant_map(lcd, mi->boot_page_cptrs[i],
ret = lcd_cap_page_grant_map(lcd, boot_pages_cptrs[i],
dest_slot,
gpa);
if (ret) {
LCD_ERR("couldn't map boot page in lcd's gp");
goto fail3;
goto out2;
}
/*
* Will be mapped in guest virtual along with the stack
......@@ -1318,7 +1350,7 @@ static int do_boot_pages(cptr_t lcd, struct lcd_info *mi,
e = kmalloc(sizeof(*e), GFP_KERNEL);
if (!e) {
LCD_ERR("alloc page info");
goto fail4;
goto out2;
}
INIT_LIST_HEAD(&e->list);
e->my_cptr = dest_slot;
......@@ -1329,11 +1361,104 @@ static int do_boot_pages(cptr_t lcd, struct lcd_info *mi,
*/
offset += PAGE_SIZE;
}
fail4:
fail3:
fail2:
fail1:
return ret; /* we failed; pages will be unmapped when lcd is
goto out2;
out2:
/*
* Make sure we delete our capabilities to the alloc'd
* boot pages. Those that were granted to the LCD (perhaps
* during failure) will be freed when the LCD is torn down. Those
* that weren't will be freed right now.
*/
for (i = 0; i < (1 << LCD_BOOT_PAGES_ORDER); i++)
lcd_cap_delete(boot_pages_cptrs[i]);
out1:
return ret; /* if we failed, pages will be unmapped when lcd is
* destroyed, page infos, cptrs, etc. freed when
* lcd_info destroyed */
}
static int do_stack_pages(cptr_t lcd, struct lcd_info *mi,
struct create_module_cxt *cxt)
{
int i;
int ret;
hpa_t hp_base_out;
hva_t hv_base_out;
struct lcd_page_info_list_elem *e;
unsigned long offset;
gpa_t gpa;
cptr_t dest_slot;
cptr_t stack_pages_cptrs[1 << LCD_STACK_PAGES_ORDER];
/*
* Allocate the stack pages on the host
*/
ret = klcd_pages_alloc(stack_pages_cptrs, &hp_base_out, &hv_base_out,
LCD_STACK_PAGES_ORDER);
if (ret) {
LCD_ERR("pages alloc");
goto out1;
}
/*
* Grant and map them in the lcd
*/
offset = 0;
for (i = 0; i < (1 << LCD_STACK_PAGES_ORDER); i++) {
gpa = gpa_add(LCD_STACK_GPA, offset);
/*
* Alloc slot in dest
*/
ret = __lcd_alloc_cptr(cxt->cache, &dest_slot);
if (ret) {
LCD_ERR("alloc failed");
goto out2;
}
/*
* Grant and map in lcd's guest physical
*/
ret = lcd_cap_page_grant_map(lcd, stack_pages_cptrs[i],
dest_slot,
gpa);
if (ret) {
LCD_ERR("couldn't map stack page in lcd's gp");
goto out2;
}
/*
* Will be mapped in guest virtual along with the boot
* and paging mem pages ...
*/
/*
* Set up page info
*/
e = kmalloc(sizeof(*e), GFP_KERNEL);
if (!e) {
LCD_ERR("alloc page info");
goto out2;
}
INIT_LIST_HEAD(&e->list);
e->my_cptr = dest_slot;
e->page_gpa = gpa;
list_add(&e->list, &mi->stack_mem_list);
/*
* Bump offset
*/
offset += PAGE_SIZE;
}
goto out2;
out2:
/*
* Make sure we delete our capabilities to the alloc'd
* pages. Those that were granted to the LCD (perhaps
* during failure) will be freed when the LCD is torn down. Those
* that weren't will be freed right now.
*/
for (i = 0; i < (1 << LCD_STACK_PAGES_ORDER); i++)
lcd_cap_delete(stack_pages_cptrs[i]);
out1:
return ret; /* if we failed, pages will be unmapped when lcd is
* destroyed, page infos, cptrs, etc. freed when
* lcd_info destroyed */
}
......@@ -1342,7 +1467,6 @@ static int setup_addr_space(cptr_t lcd, struct lcd_info *mi)
{
struct create_module_cxt *cxt;
int ret;
int i;
/*
* Set up guest virtual cxt
*/
......@@ -1372,12 +1496,20 @@ static int setup_addr_space(cptr_t lcd, struct lcd_info *mi)
goto fail4;
}
/*
* Map guest virtual paging memory, boot page, and stack/utcb
* Set up stack pages
*/
ret = do_stack_pages(lcd, mi, cxt);
if (ret) {
LCD_ERR("setting up stack pages");
goto fail5;
}
/*
* Map guest virtual paging memory, boot pages, utcb, and stack
*/
ret = map_gv_memory_and_stack(lcd, cxt);
if (ret) {
LCD_ERR("mapping paging mem");
goto fail5;
goto fail6;
}
/*
* Copy over list of paging mem infos before we kill cxt
......@@ -1391,17 +1523,13 @@ static int setup_addr_space(cptr_t lcd, struct lcd_info *mi)
return 0;
fail6:
fail5:
/* Delete our caps to boot pages. When lcd is destroyed, they
* will then be freed.
*/
for (i = 0; i < (1 << LCD_BOOT_PAGES_ORDER); i++)
lcd_cap_delete(mi->boot_page_cptrs[i]);
fail4:
fail3:
fail2:
/* gv_destroy just removes our caps to the gv paging pages and boot
* pages; gv paging mem, boot pages, etc. will be freed when lcd is
/* gv_destroy just removes our caps to the gv paging pages;
* boot pages, etc. will be freed when lcd is
* destroyed.
*
* module pages will be freed when lcd_unload_module is called
......@@ -1448,7 +1576,7 @@ int klcd_create_module_lcd(cptr_t *slot_out, char *mdir, char *mname,
ret = lcd_config(*slot_out, (*mi)->init,
gva_add(LCD_STACK_GVA, LCD_STACK_SIZE - 1), /* stack */
LCD_GV_PAGING_MEM_GPA,
LCD_STACK_GPA);