Commit dbc4e40c authored by Charlie Jacobsen's avatar Charlie Jacobsen Committed by Vikram Narayanan

Separated lcd into container and thread objects.

Updated code. Removed gdt/tss/idt for now. Added doc directory
and some initial doc.
parent 6fc91823
==============================
OVERVIEW
==============================
==============================
LOCKING
==============================
Locks are used on an lcd_arch's ept and its list of lcd_arch_threads.
We use mutexes for now, so some functions are not safe to call from
interrupt context (we can sleep when we lock a mutex).
This makes debugging easier since we can interrupt locks (no deadlocks).
==============================
GDT/TSS/IDT
==============================
We probably don't need a gdt/tss/idt right now. I'm removing that.
In the future, we could provide a secure interface for an lcd thread to
change its gdt/tss/idt info in its vmcs. The lcd thread could allocate a gdt,
etc. inside, and use the interface to update the gdtr, tr, idtr as needed.
The lcd thread could also tell the microkernel not to vmexit for certain
external interrupts.
If we do tss, each thread will probably need its own - since the tss contains
a stack pointer that varies with the threads.
I'm putting the gdt / tss init code here for reference since it was quite
complicated to do (some of the macros won't be relevant anymore):
/*
* Alloc zero'd page for gdt
*/
lcd->gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
if (!lcd->gdt) {
printk(KERN_ERR "vmx init gdt: failed to alloc gdt\n");
ret = -ENOMEM;
goto fail;
}
/*
*===--- Populate gdt; see layout in lcd-domains-arch.h. ---===
*/
/*
* Code Segment
*/
desc = lcd->gdt + (LCD_ARCH_CS_SELECTOR >> 3); /* div by 8 */
vmx_pack_desc(desc,
0, /* base */
0xFFFFF, /* limit (granularity = 1) */
0xB, /* code seg type, exec/read/accessed */
0x1, /* code/data segment desc type */
0x0, /* dpl = 0 */
0x1, /* present */
0x0, /* avl (not used) */
0x1, /* 64-bit code */
0x0, /* d must be cleared for 64-bit code */
0x1); /* 4KB granularity */
/*
* Data Segment (for %fs)
*/
desc = lcd->gdt + (LCD_ARCH_FS_SELECTOR >> 3); /* div by 8 */
vmx_pack_desc(desc,
0, /* base */
0xFFFFF, /* limit (granularity = 1) */
0x3, /* data seg type, exec/read/accessed */
0x1, /* code/data segment desc type */
0x0, /* dpl = 0 */
0x1, /* present */
0x0, /* avl (not used) */
0x0, /* l (not 64-bit code) */
0x1, /* d (linux uses 1 for d ...) */
0x1); /* 4KB granularity */
/*
* Data Segment (for %gs)
*/
desc = lcd->gdt + (LCD_ARCH_GS_SELECTOR >> 3); /* div by 8 */
vmx_pack_desc(desc,
0, /* base */
0xFFFFF, /* limit (granularity = 1) */
0x3, /* data seg type, exec/read/accessed */
0x1, /* code/data segment desc type */
0x0, /* dpl = 0 */
0x1, /* present */
0x0, /* avl (not used) */
0x0, /* l (not 64-bit code) */
0x1, /* d (linux uses 1 for d ...) */
0x1); /* 4KB granularity */
/*
* Task Segment (descriptor)
*/
tssd = (struct tss_desc *)(lcd->gdt + (LCD_ARCH_TR_SELECTOR >> 3));
set_tssldt_descriptor(tssd,
gpa_val(LCD_ARCH_TSS_BASE),/* base */
0xB, /* type = 64-bit busy tss */
LCD_ARCH_TSS_LIMIT); /* limit */
/*
*===--- Map GDT in guest physical address space ---===
*/
ret = lcd_arch_ept_map(lcd,
/* gpa */
LCD_ARCH_GDTR_BASE,
/* hpa */
va2hpa(lcd->gdt),
/* create paging structs as needed */
1,
/* no overwrite */
0);
======================================= END GDT CODE
/*
* Alloc zero'd page for tss.
*
* Only the first part of the page will be filled by the tss. This is
* done for now to make the address space layout simpler, but
* could perhaps be improved later.
*/
lcd->tss = (struct lcd_arch_tss *)get_zeroed_page(GFP_KERNEL);
if (!lcd->tss) {
printk(KERN_ERR "vmx_init_tss: failed to alloc tss\n");
ret = -ENOMEM;
goto fail;
}
base_tss = &(lcd->tss->base_tss);
/*
* Set up 64-bit TSS (See Intel SDM V3 7.7)
*
* XXX: No interrupt stack tables are used (since the lcd won't be
* handling interrupts for now).
*
* Privilege Level 0 Stack
*
* XXX: This should be moved to arch-dep interface code, so
* that arch-indep code can set it. Relies on arch-indep
* code to use gva = gpa!
*/
base_tss->sp0 = gpa_val(LCD_ARCH_STACK_TOP);
/*
* The TSS must have a minimal I/O bitmap with one byte of 1's
*
* Intel SDM V1 16.5.2
*/
base_tss->io_bitmap_base = offsetof(struct lcd_arch_tss, io_bitmap);
vcpu->tss->io_bitmap[0] = 0xff;
/*
*===--- Map TSS in guest physical address space ---===
*/
ret = lcd_arch_ept_map(vcpu,
/* gpa */
LCD_ARCH_TSS_BASE,
/* hpa */
va2hpa(vcpu->tss),
/* create paging structs as needed */
1,
/* no overwrite */
0);
......@@ -3,7 +3,7 @@
#include <asm/vmx.h>
#include <linux/spinlock.h>
#include <lcd-domains/utcb.h>
#include <linux/bitmap.h>
/* ADDRESS SPACE TYPES ---------------------------------------- */
......@@ -140,19 +140,15 @@ enum lcd_arch_reg {
#define LCD_ARCH_EPT_WALK_LENGTH 4
#define LCD_ARCH_EPTP_WALK_SHIFT 3
#define LCD_ARCH_PTRS_PER_EPTE (1 << 9)
#define LCD_ARCH_PTRS_PER_EPTE (1 << 9) /* 512 table entries */
typedef unsigned long lcd_arch_epte_t;
struct lcd_arch_ept {
spinlock_t lock;
lcd_arch_epte_t *root;
u64 vmcs_ptr;
bool access_dirty_enabled;
};
typedef unsigned long lcd_arch_epte_t; /* table entry */
struct lcd_arch_tss {
/*
* ** Currently not used. Keeping it here in case we add tss's back
* in later. See Documentation/lcd-domains/vmx.txt. **
*
* Intel SDM V3 7.7
*
* Base TSS before I/O bitmap, etc.
......@@ -167,28 +163,37 @@ struct lcd_arch_tss {
u8 io_bitmap[1];
} __attribute__((packed));
struct lcd_arch {
struct lcd_arch_thread {
/*
* Public Data
* Containing lcd_arch
*/
struct {
gva_t gv_fault_addr;
gpa_t gp_fault_addr;
} run_info;
struct lcd_arch *lcd_arch;
/*
* Private Data
* List of lcd_arch_thread's inside the containing lcd_arch.
* Protected by lock inside lcd_arch.
*/
struct list_head lcd_arch_threads;
/*
* CPU we're running on / vmloaded on
*/
int cpu;
/*
* = 1 if we've launched (need to use vm resume instead of launch
* if we have already launched; intel makes us track this)
*/
int launched;
/*
* VPID of the vm, used by the tlb for vpid-specific invalidations
*/
int vpid;
/*
* vmcs data structure; *must* be accessed using vmread / vmwrite
*/
struct lcd_arch_vmcs *vmcs;
struct lcd_arch_ept ept;
struct desc_struct *gdt;
struct lcd_arch_tss *tss;
struct lcd_utcb *utcb;
/*
* Exit info
*/
u8 fail;
u64 exit_reason;
u64 exit_qualification;
......@@ -196,12 +201,21 @@ struct lcd_arch {
u32 exit_intr_info;
u32 error_code;
u32 vec_no;
/*
* Stuff we need to save explicitly
*/
u64 host_rsp;
u64 regs[LCD_ARCH_NUM_REGS];
u64 cr2;
int shutdown;
int ret_code;
/*
* MSR autload area (used during vm enters and exits). Fields must
* be word aligned!
*/
struct msr_autoload {
#if LCD_ARCH_NUM_AUTOLOAD_MSRS > 0
struct vmx_msr_entry guest[LCD_ARCH_NUM_AUTOLOAD_MSRS];
......@@ -213,6 +227,62 @@ struct lcd_arch {
} msr_autoload;
};
/*
* Guest Physical Memory Layout & Segment Registers
* ================================================
*
* No gdt/tss/idt for now (easier), but perhaps in the future (see
* Documentation/lcd-domains/vmx.txt). We can get away with this since
* we set all of the hidden fields in the segment registers - like %fs, %gs,
* etc.
*
* See Intel SDM V3 26.3.1.2, 26.3.1.3 for register requirements
* See Intel SDM V3 3.4.2, 3.4.3 for segment register layout
* See Intel SDM V3 2.4.1 - 2.4.4 for gdtr, ldtr, idtr, tr
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* | |
* : :
* : Free / Unmapped :
* : :
* | |
* LCD_ARCH_TOP----------> +---------------------------+ 0x0000 0000 0000 1000
* | Reserved |
* | (not mapped, catch NULLs) | (4 KBs)
* +---------------------------+ 0x0000 0000 0000 0000
*/
#define LCD_ARCH_TOP (1 << 12)
#define LCD_ARCH_FS_BASE __gpa(0UL)
#define LCD_ARCH_FS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GS_BASE __gpa(0UL)
#define LCD_ARCH_GS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GDTR_BASE __gpa(0UL)
#define LCD_ARCH_GDTR_LIMIT 0x0 /* no gdt */
#define LCD_ARCH_TSS_BASE __gpa(0UL)
#define LCD_ARCH_TSS_LIMIT 0x0 /* no tss */
#define LCD_ARCH_IDTR_BASE __gpa(0UL)
#define LCD_ARCH_IDTR_LIMIT 0x0 /* no idt right now */
struct lcd_arch {
/*
* The guest physical address space is shared by all lcd_arch_thread's.
*/
struct {
spinlock_t lock;
lcd_arch_epte_t *root;
u64 vmcs_ptr; /* to be loaded in vmcs EPT_POINTER field */
bool access_dirty_enabled;
} ept;
/*
* List of lcd_arch_thread's inside this lcd_arch.
*/
struct {
struct list_head list;
struct mutex lock;
} lcd_arch_threads;
};
/**
* Initializes the arch-dependent code for LCD (detects required
* features, turns on VMX on *all* cpu's).
......@@ -339,68 +409,6 @@ int lcd_arch_set_pc(struct lcd_arch *vcpu, gva_t a);
*/
int lcd_arch_set_gva_root(struct lcd_arch *vcpu, gpa_t a);
/*
* GDT Layout
* ==========
* 0 = NULL
* 1 = Code segment
* 2 = Data segment (%fs, default not present)
* 3 = Data segment (%gs, default not present)
* 4 = Task segment
*
* See Intel SDM V3 26.3.1.2, 26.3.1.3 for register requirements.
* See Intel SDM V3 3.4.2, 3.4.3 for segment register layout
* See Intel SDM V3 2.4.1 - 2.4.4 for gdtr, ldtr, idtr, tr
*/
#define LCD_ARCH_FS_BASE __gpa(0UL)
#define LCD_ARCH_FS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GS_BASE __gpa(0UL)
#define LCD_ARCH_GS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GDTR_BASE __gpa(1UL << PAGE_SHIFT)
#define LCD_ARCH_GDTR_LIMIT 0xFFFUL
#define LCD_ARCH_TSS_BASE __gpa(2UL << PAGE_SHIFT)
/* tss base + limit = address of last byte in tss, hence -1 */
#define LCD_ARCH_TSS_LIMIT (sizeof(struct lcd_arch_tss) - 1)
#define LCD_ARCH_IDTR_BASE __gpa(0UL)
#define LCD_ARCH_IDTR_LIMIT 0x0 /* no idt right now */
#define LCD_ARCH_CS_SELECTOR (1 << 3)
#define LCD_ARCH_FS_SELECTOR (2 << 3)
#define LCD_ARCH_GS_SELECTOR (3 << 3)
#define LCD_ARCH_TR_SELECTOR (4 << 3) /* TI must be 0 */
#define LCD_ARCH_LDTR_SELECTOR (0 << 3) /* unusable */
/*
* Guest Physical Memory Layout
* ============================
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* | |
* : :
* : Free / Unmapped :
* : :
* | |
* LCD_ARCH_STACK_TOP,---> +---------------------------+ 0x0000 0000 0000 4000
* LCD_ARCH_FREE | |
* | Stack |
* : (grows down) : (4 KBs)
* : :
* | |
* | User Thread Ctrl Block |
* LCD_ARCH_UTCB---------> +---------------------------+ 0x0000 0000 0000 3000
* | TSS |
* | only sizeof(tss) is | (4 KBs)
* | used |
* LCD_ARCH_TSS_BASE-----> +---------------------------+ 0x0000 0000 0000 2000
* | GDT | (4 KBs)
* LCD_ARCH_GDTR_BASE----> +---------------------------+ 0x0000 0000 0000 1000
* | Reserved |
* | (not mapped) | (4 KBs)
* +---------------------------+ 0x0000 0000 0000 0000
*/
#define LCD_ARCH_UTCB __gpa(3UL << PAGE_SHIFT)
#define LCD_ARCH_STACK_TOP __gpa(4UL << PAGE_SHIFT)
#define LCD_ARCH_FREE LCD_ARCH_STACK_TOP
/*
* Accessor Macro for syscalls
......
......@@ -10,7 +10,7 @@ static int test01(void)
struct lcd_arch_vmcs *vmcs;
vmcs = vmx_alloc_vmcs(raw_smp_processor_id());
if (!vmcs) {
printk(KERN_ERR "lcd arch: test01 failed\n");
LCD_ARCH_ERR("failed");
return -1;
}
vmx_free_vmcs(vmcs);
......@@ -18,94 +18,47 @@ static int test01(void)
}
static int test02(void)
{
struct lcd_arch *lcd;
lcd = (struct lcd_arch *)kmalloc(sizeof(*lcd), GFP_KERNEL);
if (!lcd) {
printk(KERN_ERR "lcd arch : test02 failed to alloc lcd\n");
goto fail_alloc;
}
if (vmx_allocate_vpid(lcd)) {
printk(KERN_ERR "lcd arch : test02 vpid alloc failed\n");
goto fail;
}
if (lcd->vpid != 1) {
printk(KERN_ERR "lcd arch : test02 actual vpid = %d\n",
lcd->vpid);
goto fail_free;
}
vmx_free_vpid(lcd);
if (vmx_allocate_vpid(lcd)) {
printk(KERN_ERR "lcd arch : test02 2nd vpid alloc failed\n");
goto fail;
}
if (lcd->vpid != 1) {
printk(KERN_ERR "lcd arch : test02 2nd actual vpid = %d\n",
lcd->vpid);
goto fail_free;
}
vmx_free_vpid(lcd);
kfree(lcd);
return 0;
fail_free:
vmx_free_vpid(lcd);
fail:
kfree(lcd);
fail_alloc:
return -1;
}
static int test03(void)
{
struct lcd_arch *lcd;
int i;
char *buf;
lcd = (struct lcd_arch *)kmalloc(sizeof(*lcd), GFP_KERNEL);
int ret = -1;
lcd = lcd_arch_create();
if (!lcd) {
printk(KERN_ERR "lcd arch : test03 failed to alloc lcd\n");
LCD_ARCH_ERR("failed to alloc lcd");
goto fail_alloc;
}
if (vmx_init_ept(lcd)) {
printk(KERN_ERR "lcd arch : test03 ept init failed\n");
goto fail;
}
buf = (char *)lcd->ept.root;
for (i = 0; i < PAGE_SIZE; i++) {
if (buf[i]) {
printk(KERN_ERR "lcd arch : test03 nonzero in ept\n");
goto fail_mem;
LCD_ARCH_ERR("nonzero in ept");
goto out;
}
}
vmx_free_ept(lcd);
kfree(lcd);
return 0;
ret = 0;
fail_mem:
vmx_free_ept(lcd);
fail:
kfree(lcd);
out:
lcd_arch_destroy(lcd);
fail_alloc:
return -1;
return ret;
}
static int test04_help(struct lcd_arch *lcd, gpa_t base)
static int test03_help(struct lcd_arch *lcd, gpa_t base)
{
hpa_t actual;
unsigned long off;
for (off = 0; off < 0x40000; off += PAGE_SIZE) {
if (lcd_arch_ept_gpa_to_hpa(lcd, gpa_add(base, off), &actual)) {
printk(KERN_ERR "lcd arch : test04 failed lookup at %lx\n",
LCD_ARCH_ERR("failed lookup at %lx",
gpa_val(gpa_add(base, off)));
return -1;
}
if (hpa_val(actual) != gpa_val(gpa_add(base, off))) {
printk(KERN_ERR "lcd arch : test04 expected hpa %lx got %lx\n",
LCD_ARCH_ERR("expected hpa %lx got %lx\n",
gpa_val(gpa_add(base, off)),
hpa_val(actual));
return -1;
......@@ -114,29 +67,23 @@ static int test04_help(struct lcd_arch *lcd, gpa_t base)
return 0;
}
static int test04(void)
static int test03(void)
{
struct lcd_arch *lcd;
gpa_t base;
int ret;
ret = -1;
int ret = -1;
lcd = (struct lcd_arch *)kmalloc(sizeof(*lcd), GFP_KERNEL);
lcd = lcd_arch_create();
if (!lcd) {
printk(KERN_ERR "lcd arch : test04 failed to alloc lcd\n");
LCD_ARCH_ERR("failed to alloc lcd");
goto fail1;
}
if (vmx_init_ept(lcd)) {
printk(KERN_ERR "lcd arch : test04 ept init failed\n");
goto fail2;
}
/*
* Map 0x0 - 0x400000 (first 4 MBs, takes two page tables)
*/
if (lcd_arch_ept_map_range(lcd, __gpa(0), __hpa(0), 1024)) {
printk(KERN_ERR "lcd arch: test04 failed to map first 4 MBs\n");
LCD_ARCH_ERR("failed to map first 4 MBs");
goto fail3;
}
......@@ -144,7 +91,7 @@ static int test04(void)
* Map 0x40000000 - 0x40400000 (1GB -- 1GB + 4MBs)
*/
if (lcd_arch_ept_map_range(lcd, __gpa(1 << 30), __hpa(1 << 30), 1024)) {
printk(KERN_ERR "lcd arch: test04 failed to map 2nd 4 MBs\n");
LCD_ARCH_ERR("failed to map 2nd 4 MBs");
goto fail4;
}
......@@ -153,7 +100,7 @@ static int test04(void)
*/
if (lcd_arch_ept_map_range(lcd, __gpa(1UL << 39),
__hpa(1UL << 39), 1024)) {
printk(KERN_ERR "lcd arch: test04 failed to map 3rd 4 MBs\n");
LCD_ARCH_ERR("failed to map 3rd 4 MBs");
goto fail5;
}
......@@ -162,13 +109,13 @@ static int test04(void)
*/
base = __gpa(0);
if (test04_help(lcd, base))
if (test03_help(lcd, base))
goto fail6;
base = __gpa(1 << 30);
if (test04_help(lcd, base))
if (test03_help(lcd, base))
goto fail6;
base = __gpa(1UL << 39);
if (test04_help(lcd, base))
if (test03_help(lcd, base))
goto fail6;
ret = 0;
......@@ -182,183 +129,97 @@ fail5:
fail4:
lcd_arch_ept_unmap_range(lcd, __gpa(0), 1024);
fail3:
vmx_free_ept(lcd);
fail2:
kfree(lcd);
lcd_arch_destory(lcd);
fail1:
return ret;
}
static int test05(void)
{
struct lcd_arch *lcd;
hpa_t hpa;
lcd = (struct lcd_arch *)kmalloc(sizeof(*lcd), GFP_KERNEL);
if (!lcd) {
printk(KERN_ERR "lcd arch : test05 failed to alloc lcd\n");
goto fail_alloc;
}
if (vmx_init_ept(lcd)) {
printk(KERN_ERR "lcd arch : test05 ept init failed\n");
goto fail_ept;
}
if (vmx_init_gdt(lcd)) {
printk(KERN_ERR "lcd arch : test05 gdt init failed\n");
goto fail_gdt;
}
if (lcd_arch_ept_gpa_to_hpa(lcd, LCD_ARCH_GDTR_BASE, &hpa)) {
printk(KERN_ERR "lcd arch : test05 lookup failed\n");
goto fail_lookup;
}
if (hpa_val(hpa) != hpa_val(va2hpa(lcd->gdt))) {
printk(KERN_ERR "lcd arch : test05 unexpected gdt addr\n");
goto fail_lookup;
}
vmx_destroy_gdt(lcd);
vmx_free_ept(lcd);
kfree(lcd);
return 0;
fail_lookup:
fail_gdt:
vmx_free_ept(lcd);
fail_ept:
kfree(lcd);
fail_alloc:
return -1;
}
static int test06(void)
{
struct lcd_arch *lcd;
hpa_t hpa;
lcd = (struct lcd_arch *)kmalloc(sizeof(*lcd), GFP_KERNEL);