All new accounts created on Gitlab now require administrator approval. If you invite any collaborators, please let Flux staff know so they can approve the accounts.

Commit dbc4e40c authored by Charlie Jacobsen's avatar Charlie Jacobsen Committed by Vikram Narayanan

Separated lcd into container and thread objects.

Updated code. Removed gdt/tss/idt for now. Added doc directory
and some initial doc.
parent 6fc91823
==============================
OVERVIEW
==============================
==============================
LOCKING
==============================
Locks are used on an lcd_arch's ept and its list of lcd_arch_threads.
We use mutexes for now, so some functions are not safe to call from
interrupt context (we can sleep when we lock a mutex).
This makes debugging easier since we can interrupt locks (no deadlocks).
==============================
GDT/TSS/IDT
==============================
We probably don't need a gdt/tss/idt right now. I'm removing that.
In the future, we could provide a secure interface for an lcd thread to
change its gdt/tss/idt info in its vmcs. The lcd thread could allocate a gdt,
etc. inside, and use the interface to update the gdtr, tr, idtr as needed.
The lcd thread could also tell the microkernel not to vmexit for certain
external interrupts.
If we do tss, each thread will probably need its own - since the tss contains
a stack pointer that varies with the threads.
I'm putting the gdt / tss init code here for reference since it was quite
complicated to do (some of the macros won't be relevant anymore):
/*
* Alloc zero'd page for gdt
*/
lcd->gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
if (!lcd->gdt) {
printk(KERN_ERR "vmx init gdt: failed to alloc gdt\n");
ret = -ENOMEM;
goto fail;
}
/*
*===--- Populate gdt; see layout in lcd-domains-arch.h. ---===
*/
/*
* Code Segment
*/
desc = lcd->gdt + (LCD_ARCH_CS_SELECTOR >> 3); /* div by 8 */
vmx_pack_desc(desc,
0, /* base */
0xFFFFF, /* limit (granularity = 1) */
0xB, /* code seg type, exec/read/accessed */
0x1, /* code/data segment desc type */
0x0, /* dpl = 0 */
0x1, /* present */
0x0, /* avl (not used) */
0x1, /* 64-bit code */
0x0, /* d must be cleared for 64-bit code */
0x1); /* 4KB granularity */
/*
* Data Segment (for %fs)
*/
desc = lcd->gdt + (LCD_ARCH_FS_SELECTOR >> 3); /* div by 8 */
vmx_pack_desc(desc,
0, /* base */
0xFFFFF, /* limit (granularity = 1) */
0x3, /* data seg type, exec/read/accessed */
0x1, /* code/data segment desc type */
0x0, /* dpl = 0 */
0x1, /* present */
0x0, /* avl (not used) */
0x0, /* l (not 64-bit code) */
0x1, /* d (linux uses 1 for d ...) */
0x1); /* 4KB granularity */
/*
* Data Segment (for %gs)
*/
desc = lcd->gdt + (LCD_ARCH_GS_SELECTOR >> 3); /* div by 8 */
vmx_pack_desc(desc,
0, /* base */
0xFFFFF, /* limit (granularity = 1) */
0x3, /* data seg type, exec/read/accessed */
0x1, /* code/data segment desc type */
0x0, /* dpl = 0 */
0x1, /* present */
0x0, /* avl (not used) */
0x0, /* l (not 64-bit code) */
0x1, /* d (linux uses 1 for d ...) */
0x1); /* 4KB granularity */
/*
* Task Segment (descriptor)
*/
tssd = (struct tss_desc *)(lcd->gdt + (LCD_ARCH_TR_SELECTOR >> 3));
set_tssldt_descriptor(tssd,
gpa_val(LCD_ARCH_TSS_BASE),/* base */
0xB, /* type = 64-bit busy tss */
LCD_ARCH_TSS_LIMIT); /* limit */
/*
*===--- Map GDT in guest physical address space ---===
*/
ret = lcd_arch_ept_map(lcd,
/* gpa */
LCD_ARCH_GDTR_BASE,
/* hpa */
va2hpa(lcd->gdt),
/* create paging structs as needed */
1,
/* no overwrite */
0);
======================================= END GDT CODE
/*
* Alloc zero'd page for tss.
*
* Only the first part of the page will be filled by the tss. This is
* done for now to make the address space layout simpler, but
* could perhaps be improved later.
*/
lcd->tss = (struct lcd_arch_tss *)get_zeroed_page(GFP_KERNEL);
if (!lcd->tss) {
printk(KERN_ERR "vmx_init_tss: failed to alloc tss\n");
ret = -ENOMEM;
goto fail;
}
base_tss = &(lcd->tss->base_tss);
/*
* Set up 64-bit TSS (See Intel SDM V3 7.7)
*
* XXX: No interrupt stack tables are used (since the lcd won't be
* handling interrupts for now).
*
* Privilege Level 0 Stack
*
* XXX: This should be moved to arch-dep interface code, so
* that arch-indep code can set it. Relies on arch-indep
* code to use gva = gpa!
*/
base_tss->sp0 = gpa_val(LCD_ARCH_STACK_TOP);
/*
* The TSS must have a minimal I/O bitmap with one byte of 1's
*
* Intel SDM V1 16.5.2
*/
base_tss->io_bitmap_base = offsetof(struct lcd_arch_tss, io_bitmap);
vcpu->tss->io_bitmap[0] = 0xff;
/*
*===--- Map TSS in guest physical address space ---===
*/
ret = lcd_arch_ept_map(vcpu,
/* gpa */
LCD_ARCH_TSS_BASE,
/* hpa */
va2hpa(vcpu->tss),
/* create paging structs as needed */
1,
/* no overwrite */
0);
......@@ -3,7 +3,7 @@
#include <asm/vmx.h>
#include <linux/spinlock.h>
#include <lcd-domains/utcb.h>
#include <linux/bitmap.h>
/* ADDRESS SPACE TYPES ---------------------------------------- */
......@@ -140,19 +140,15 @@ enum lcd_arch_reg {
#define LCD_ARCH_EPT_WALK_LENGTH 4
#define LCD_ARCH_EPTP_WALK_SHIFT 3
#define LCD_ARCH_PTRS_PER_EPTE (1 << 9)
#define LCD_ARCH_PTRS_PER_EPTE (1 << 9) /* 512 table entries */
typedef unsigned long lcd_arch_epte_t;
struct lcd_arch_ept {
spinlock_t lock;
lcd_arch_epte_t *root;
u64 vmcs_ptr;
bool access_dirty_enabled;
};
typedef unsigned long lcd_arch_epte_t; /* table entry */
struct lcd_arch_tss {
/*
* ** Currently not used. Keeping it here in case we add tss's back
* in later. See Documentation/lcd-domains/vmx.txt. **
*
* Intel SDM V3 7.7
*
* Base TSS before I/O bitmap, etc.
......@@ -167,28 +163,37 @@ struct lcd_arch_tss {
u8 io_bitmap[1];
} __attribute__((packed));
struct lcd_arch {
struct lcd_arch_thread {
/*
* Public Data
* Containing lcd_arch
*/
struct {
gva_t gv_fault_addr;
gpa_t gp_fault_addr;
} run_info;
struct lcd_arch *lcd_arch;
/*
* Private Data
* List of lcd_arch_thread's inside the containing lcd_arch.
* Protected by lock inside lcd_arch.
*/
struct list_head lcd_arch_threads;
/*
* CPU we're running on / vmloaded on
*/
int cpu;
/*
* = 1 if we've launched (need to use vm resume instead of launch
* if we have already launched; intel makes us track this)
*/
int launched;
/*
* VPID of the vm, used by the tlb for vpid-specific invalidations
*/
int vpid;
/*
* vmcs data structure; *must* be accessed using vmread / vmwrite
*/
struct lcd_arch_vmcs *vmcs;
struct lcd_arch_ept ept;
struct desc_struct *gdt;
struct lcd_arch_tss *tss;
struct lcd_utcb *utcb;
/*
* Exit info
*/
u8 fail;
u64 exit_reason;
u64 exit_qualification;
......@@ -196,12 +201,21 @@ struct lcd_arch {
u32 exit_intr_info;
u32 error_code;
u32 vec_no;
/*
* Stuff we need to save explicitly
*/
u64 host_rsp;
u64 regs[LCD_ARCH_NUM_REGS];
u64 cr2;
int shutdown;
int ret_code;
/*
* MSR autload area (used during vm enters and exits). Fields must
* be word aligned!
*/
struct msr_autoload {
#if LCD_ARCH_NUM_AUTOLOAD_MSRS > 0
struct vmx_msr_entry guest[LCD_ARCH_NUM_AUTOLOAD_MSRS];
......@@ -213,6 +227,62 @@ struct lcd_arch {
} msr_autoload;
};
/*
* Guest Physical Memory Layout & Segment Registers
* ================================================
*
* No gdt/tss/idt for now (easier), but perhaps in the future (see
* Documentation/lcd-domains/vmx.txt). We can get away with this since
* we set all of the hidden fields in the segment registers - like %fs, %gs,
* etc.
*
* See Intel SDM V3 26.3.1.2, 26.3.1.3 for register requirements
* See Intel SDM V3 3.4.2, 3.4.3 for segment register layout
* See Intel SDM V3 2.4.1 - 2.4.4 for gdtr, ldtr, idtr, tr
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* | |
* : :
* : Free / Unmapped :
* : :
* | |
* LCD_ARCH_TOP----------> +---------------------------+ 0x0000 0000 0000 1000
* | Reserved |
* | (not mapped, catch NULLs) | (4 KBs)
* +---------------------------+ 0x0000 0000 0000 0000
*/
#define LCD_ARCH_TOP (1 << 12)
#define LCD_ARCH_FS_BASE __gpa(0UL)
#define LCD_ARCH_FS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GS_BASE __gpa(0UL)
#define LCD_ARCH_GS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GDTR_BASE __gpa(0UL)
#define LCD_ARCH_GDTR_LIMIT 0x0 /* no gdt */
#define LCD_ARCH_TSS_BASE __gpa(0UL)
#define LCD_ARCH_TSS_LIMIT 0x0 /* no tss */
#define LCD_ARCH_IDTR_BASE __gpa(0UL)
#define LCD_ARCH_IDTR_LIMIT 0x0 /* no idt right now */
struct lcd_arch {
/*
* The guest physical address space is shared by all lcd_arch_thread's.
*/
struct {
spinlock_t lock;
lcd_arch_epte_t *root;
u64 vmcs_ptr; /* to be loaded in vmcs EPT_POINTER field */
bool access_dirty_enabled;
} ept;
/*
* List of lcd_arch_thread's inside this lcd_arch.
*/
struct {
struct list_head list;
struct mutex lock;
} lcd_arch_threads;
};
/**
* Initializes the arch-dependent code for LCD (detects required
* features, turns on VMX on *all* cpu's).
......@@ -339,68 +409,6 @@ int lcd_arch_set_pc(struct lcd_arch *vcpu, gva_t a);
*/
int lcd_arch_set_gva_root(struct lcd_arch *vcpu, gpa_t a);
/*
* GDT Layout
* ==========
* 0 = NULL
* 1 = Code segment
* 2 = Data segment (%fs, default not present)
* 3 = Data segment (%gs, default not present)
* 4 = Task segment
*
* See Intel SDM V3 26.3.1.2, 26.3.1.3 for register requirements.
* See Intel SDM V3 3.4.2, 3.4.3 for segment register layout
* See Intel SDM V3 2.4.1 - 2.4.4 for gdtr, ldtr, idtr, tr
*/
#define LCD_ARCH_FS_BASE __gpa(0UL)
#define LCD_ARCH_FS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GS_BASE __gpa(0UL)
#define LCD_ARCH_GS_LIMIT 0xFFFFFFFF
#define LCD_ARCH_GDTR_BASE __gpa(1UL << PAGE_SHIFT)
#define LCD_ARCH_GDTR_LIMIT 0xFFFUL
#define LCD_ARCH_TSS_BASE __gpa(2UL << PAGE_SHIFT)
/* tss base + limit = address of last byte in tss, hence -1 */
#define LCD_ARCH_TSS_LIMIT (sizeof(struct lcd_arch_tss) - 1)
#define LCD_ARCH_IDTR_BASE __gpa(0UL)
#define LCD_ARCH_IDTR_LIMIT 0x0 /* no idt right now */
#define LCD_ARCH_CS_SELECTOR (1 << 3)
#define LCD_ARCH_FS_SELECTOR (2 << 3)
#define LCD_ARCH_GS_SELECTOR (3 << 3)
#define LCD_ARCH_TR_SELECTOR (4 << 3) /* TI must be 0 */
#define LCD_ARCH_LDTR_SELECTOR (0 << 3) /* unusable */
/*
* Guest Physical Memory Layout
* ============================
*
* +---------------------------+ 0xFFFF FFFF FFFF FFFF
* | |
* : :
* : Free / Unmapped :
* : :
* | |
* LCD_ARCH_STACK_TOP,---> +---------------------------+ 0x0000 0000 0000 4000
* LCD_ARCH_FREE | |
* | Stack |
* : (grows down) : (4 KBs)
* : :
* | |
* | User Thread Ctrl Block |
* LCD_ARCH_UTCB---------> +---------------------------+ 0x0000 0000 0000 3000
* | TSS |
* | only sizeof(tss) is | (4 KBs)
* | used |
* LCD_ARCH_TSS_BASE-----> +---------------------------+ 0x0000 0000 0000 2000
* | GDT | (4 KBs)
* LCD_ARCH_GDTR_BASE----> +---------------------------+ 0x0000 0000 0000 1000
* | Reserved |
* | (not mapped) | (4 KBs)
* +---------------------------+ 0x0000 0000 0000 0000
*/
#define LCD_ARCH_UTCB __gpa(3UL << PAGE_SHIFT)
#define LCD_ARCH_STACK_TOP __gpa(4UL << PAGE_SHIFT)
#define LCD_ARCH_FREE LCD_ARCH_STACK_TOP
/*
* Accessor Macro for syscalls
......
This diff is collapsed.
#ifndef LCD_DOMAINS_IPC_H
#define LCD_DOMAINS_IPC_H
#include <linux/list.h>
#include <linux/mutex.h>
struct sync_endpoint {
struct list_head senders;
struct list_head receivers;
struct mutex lock;
};
#endif /* LCD_DOMAINS_IPC_H */
......@@ -3,77 +3,129 @@
#include <linux/module.h>
#include <asm/lcd-domains-arch.h>
#include <linux/sched.h>
/*
* lcd_status = status of kthread / lcd
* ====================================
* lcd_thread_status
* =================
*
* LCD_STATUS_UNFORMED = still setting up, or status not set yet
* LCD_STATUS_SUSPENDED = lcd is paused, and kthread is going to sleep / is
* asleep
* LCD_STATUS_RUNNABLE = lcd is paused, and kthread is awake, or should
* awaken
* (the status should be set to this if the lcd /
* kthread are suspended, and you want it to wake up)
* LCD_STATUS_RUNNING = lcd is running, or is about to run
* LCD_STATUS_KILL = lcd and kthread should die
* LCD_STATUS_DEAD = lcd is not running, most parts have been destroyed
* (lcd_struct is only hanging around to provide
* status info); the kthread is ready to die and be
* reaped
* Similar to status used for task_struct's.
*
* LCD_THREAD_UNFORMED = still setting up, or status not set yet
* LCD_THREAD_SUSPENDED = lcd_thread is going to sleep / is asleep
* LCD_THREAD_RUNNABLE = set status to this to wake up lcd_thread
* LCD_THREAD_RUNNING = lcd_thread is running, or is about to run
* LCD_THREAD_KILL = set status to this to kill lcd_thread (as soon as
* possible)
* LCD_THREAD_DEAD = lcd_thread is not running, most parts have been
* destroyed (only hanging around to provide
* status info)
*/
enum lcd_status {
LCD_STATUS_UNFORMED = 0,
LCD_STATUS_SUSPENDED = 1,
LCD_STATUS_RUNNABLE = 2,
LCD_STATUS_RUNNING = 3,
LCD_STATUS_KILL = 4,
LCD_STATUS_DEAD = 5,
enum lcd_thread_status {
LCD_THREAD_UNFORMED = 0,
LCD_THREAD_SUSPENDED = 1,
LCD_THREAD_RUNNABLE = 2,
LCD_THREAD_RUNNING = 3,
LCD_THREAD_KILL = 4,
LCD_THREAD_DEAD = 5,
};
struct lcd {
struct lcd;
struct lcd_thread {
/*
* Display name
* The containing lcd
*/
char name[MODULE_NAME_LEN];
struct lcd *lcd;
/*
* List of threads in containing lcd
*/
struct list_head lcd_threads;
/*
* Thread control block, accessible by lcd_thread while running inside
* lcd. Contains message registers, etc.
*/
struct lcd_utcb *utcb;
/*
* Status (enum lcd_status)
* Status (see above)
*/
int status;
/*
* Arch-dependent state of lcd
* Arch-dependent state of lcd_thread
*/
struct lcd_arch_thread *lcd_arch_thread;
};
/*
* LCD Memory Layout
* =================
*
* The layout below reflects the guest physical *and* virtual memory
* layout. Guest virtual paging memory is filled on on demand
* during lcd initialization (so parts may not be backed by host physical
* memory), and stacks/utcb's are also filled in on demand as threads are
* created and added to the lcd. The lcd starts with one lcd_thread, so
* Stack 0 / utcb 0 will be mapped after the lcd is initialized.
*
* Guest physical addresses are mapped one-to-one to the same guest
* virtual addresses.
*
* The module is mapped to the same guest physical / guest virtual
* address space as the host, to avoid relocating symbols.
*
* +---------------------------+
* module mapped | |
* somewhere in : :
* here -------> : :
* at a higher | |
* address | |
* | |
* +---------------------------+
* | Stack 1023 |
* : : (4 KBs)
* | utcb 1023 |
* +---------------------------+
* | ... |
* : :
* | ... |
* +---------------------------+
* | Stack 1 |
* : : (4 KBs)
* | utcb 1 |
* +---------------------------+
* | Stack 0 |
* : : (4 KBs)
* | utcb 0 |
* +---------------------------+
* | Guest Virtual | (4 MBs)
* | Paging Memory |
* LCD_ARCH_FREE---> +---------------------------+
* | |
* : Reserved Arch Memory :
* | |
* +---------------------------+ 0x0000 0000 0000 0000
*/
#define LCD_PAGING_MEM_SIZE (4 << 20)
struct lcd {
/*
* Display name
*/
struct lcd_arch *lcd_arch;
char name[MODULE_NAME_LEN];
/*
* Guest virtual paging.
* Guest virtual paging:
*
* root is the host virtual address that points to the root of
* the lcd's guest virtual paging hierarchy.
*
* We use a simple bitmap for allocating memory used for page tables
* for the lcd's guest virtual address space. This is only needed when
* the lcd is being set up - mapping the arch bits, module code, the
* first lcd_thread's tcb, etc.
*/
struct {
/*
* = 0 if root_hva invalid
* = 1 if root_hva valid
*/
int present;
/*
* Host virtual address of the root of the lcd's
* (initial) guest virtual paging hierarchy.
*/
pgd_t *root;
/*
* Pointer to start of guest physical address space
* used for paging.
*/
gpa_t paging_mem_bot;
/*
* Pointer to next free page in guest physical
* address space that can be used for a page table.
*/
gpa_t paging_mem_brk;
/*
* Top of region in guest physical address space
* for page tables.
*/
gpa_t paging_mem_top;
} gv;
pgd_t *root;
DECLARE_BITMAP(gv_paging_bmap, (LCD_PAGING_MEM_SIZE >> PAGE_SIZE));
};
/**
......@@ -83,7 +135,7 @@ struct lcd {
* -- Spawns a kernel thread that will host the lcd.
* -- The kernel thread will create the lcd and map the module into
* the lcd. The kernel thread will then wait with the lcd's status
* set to LCD_STATUS_SUSPENDED.
* set to LCD_THREAD_SUSPENDED.
* -- Call lcd_run_as_module to start running the lcd.
* -- Returns NULL if we fail to create the kernel thread, or if the
* kernel thread failed to initialize the lcd, etc.
......
......@@ -25,6 +25,7 @@
#include <asm/lcd-domains-arch.h>
#include <lcd-domains/lcd-domains.h>
#include <lcd-domains/syscall.h>
#include <lcd-domains/ipc.h>
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LCD driver");
......@@ -1452,10 +1453,52 @@ static int lcd_module_kthread_die(struct lcd *lcd,
return run_ret_val;
}
static void copy_msg_regs(struct lcd *from, struct lcd *to)
{
int i;
for (i = 0; i < from->utcb.max_valid_reg_idx; i++)
to->utcb.regs[i] = from->utcb.regs[i];
/*
* reset
*/
from->utcb.max_valid_reg_idx = 0;
}
static void copy_msg_cap(struct lcd *from, struct lcd *to,
cptr_t from_ptr, cptr_t to_ptr)
{
int ret;
ret = lcd_cnode_grant(from->cspace, to->cspace, from_ptr, to_ptr,
LCD_CAP_RIGHT_ALL);
if (ret) {
LCD_ERR("failed to transfer cap @ %d in lcd %p to slot @ %d in lcd %p",
from_ptr, from, to_ptr, to);
}
}
static int lcd_handle_syscall(struct lcd *lcd)
{
int syscall_id;
syscall_id = LCD_ARCH_GET_SYSCALL_NUM(lcd->lcd_arch);