Commit c989d312 authored by Weibin Sun's avatar Weibin Sun Committed by Vikram Narayanan
Browse files

merge

parent b458fdbb
#ifndef LCD_LCD_H
#define LCD_LCD_H
#include <linux/bitmap.h>
/* Memory management */
#define NR_PT_PAGES (1 << 15) /* #pages for page table */
#define PT_PAGES_START (0x1ULL << 24) /* above 16MB */
#define PT_PAGES_END (PT_PAGES_START + (NR_PT_PAGES << PAGE_SHIFT))
#define NORMAL_MEM_START (0x1ULL << 30)
#define EPT_LEVELS 4
#define VMX_EPT_FAULT_READ 0x01
#define VMX_EPT_FAULT_WRITE 0x02
#define VMX_EPT_FAULT_INS 0x04
typedef unsigned long epte_t;
#define __EPTE_READ 0x01
#define __EPTE_WRITE 0x02
#define __EPTE_EXEC 0x04
#define __EPTE_IPAT 0x40
#define __EPTE_SZ 0x80
#define __EPTE_A 0x100
#define __EPTE_D 0x200
#define __EPTE_TYPE(n) (((n) & 0x7) << 3)
enum {
EPTE_TYPE_UC = 0, /* uncachable */
EPTE_TYPE_WC = 1, /* write combining */
EPTE_TYPE_WT = 4, /* write through */
EPTE_TYPE_WP = 5, /* write protected */
EPTE_TYPE_WB = 6, /* write back */
};
#define __EPTE_NONE 0
#define __EPTE_FULL (__EPTE_READ | __EPTE_WRITE | __EPTE_EXEC)
#define EPTE_ADDR (~(PAGE_SIZE - 1))
#define EPTE_FLAGS (PAGE_SIZE - 1)
#define ADDR_TO_IDX(la, n) \
((((unsigned long) (la)) >> (12 + 9 * (n))) & ((1 << 9) - 1))
/* VMCS related */
struct vmcs_config {
int size;
int order;
u32 revision_id;
u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl;
u32 cpu_based_2nd_exec_ctrl;
u32 vmexit_ctrl;
u32 vmentry_ctrl;
};
struct vmcs {
u32 revision_id;
u32 abort;
......@@ -14,6 +72,7 @@ struct vmx_capability {
};
extern struct vmx_capability vmx_capability;
extern struct vmcs_config vmcs_config;
#define NR_AUTOLOAD_MSRS 8
......@@ -43,18 +102,12 @@ struct vmx_vcpu {
int vpid;
int launched;
struct mmu_notifier mmu_notifier;
spinlock_t ept_lock;
unsigned long ept_root;
unsigned long eptp;
bool ept_ad_enabled;
struct page *pgd_table;
struct page *pud_table_0;
struct page *pmd_table_1;
struct page *pte_table_0;
unsigned long page_table_pool;
DECLARE_BITMAP(bmp_pt_pages, NR_PT_PAGES);
u8 fail;
u64 exit_reason;
......
#ifndef LCD_INTERNAL_H
#define LCD_INTERNAL_H
#endif
This diff is collapsed.
/*
* LCD MM Rules:
* - No huge/large page
* - Dedicaed page table structure pages
*/
#include <linux/bitmap.h>
#define NR_PT_PAGES (1 << 15) // Pages for page table
#define PT_PAGES_START (0x1ULL << 24) // Above 16MB
#define PT_PAGES_END (PT_PAGES_START + (NR_PT_PAGES << PAGE_SHIFT))
void init_mm(void) {
}
unsigned long alloc_pt_pfn(struct vmx_vcpu *vcpu) {
unsigned long which = bitmap_find_next_zero_area(vcpu->bmp_pt_pages,
NR_PT_PAGES,
0, 1, 0);
if (which >= NR_PT_PAGES) {
return 0;
} else {
bitmap_set(vcpu->bmp_pt_pages, which, 1);
return (PT_PAGES_START >> PAGE_SHIFT) + which;
}
}
int free_pt_pfn(struct vmx_vcpu *vcpu, unsigned long pfn) {
unsigned long which = pfn - (PT_PAGES_START >> PAGE_SHIFT);
if (which >= (PT_PAGES_END >> PAGE_SHIFT)) {
return -EINVL;
} else {
bitmap_clear(vcpu->bmp_pt_pages, which, 1);
return 0;
}
}
int ept_alloc_pt_item(struct vmx_vcpu *vcpu, unsigned long *gpfn,
unsigned long *page) {
int ret = 0;
*gpfn = alloc_pt_pfn(vcpu);
if (*gpfn == 0) {
ret = -ENOMEM;
} else {
// FIXME: Which mem flag do we need?
*page = __get_free_page(GFP_ATOMIC);
if (!(*page)) {
ret = -ENOMEM;
} else {
memset((void*)(*page), 0, PAGE_SIZE);
ret = ept_set_epte(vcpu, ((*gpfn) << PAGE_SHIFT), *page, 0);
}
}
return ret;
}
int ept_gpa_to_hva(struct vmx_vcpu* vcpu, unsigned long gpa,
unsigned long* hva) {
epte_t *epte;
int ret = ept_lookup_gpa(vcpu, gpa, 0, &epte);
if (ret) {
printk(KERN_ERR "ept: failed to lookup GPA: %p\n",
(void*)gpa);
} else {
if (!epte_present(*epte)) {
printk(KERN_ERR "ept: epte not present when translating\n");
ret = -EINVL;
} else {
*hva = (unsigned long)epte_page_vaddr(*epte);
}
}
return ret;
}
// Set gva => gpa. No page allocated for gva/gpa, just
// pte value setting. But page table structures along the
// path can be created by setting 'create' to 1.
int map_gva_to_gpa(struct vmx_vcpu *vcpu,
unsigned long gva,
unsigned long gpa,
int create, int overwrite) {
int ret = 0;
unsigned long gpfn, page;
epte_t *epte;
pud_t *pud_dir, *pud;
pmd_t *pmd_dir, *pmd;
pte_t *pte_dir, *pte;
pgd_t *pgd;
if (!vcpu->pt) {
if (!create)
return -ENOENT;
else {
ret = ept_alloc_pt_item(vcpu, &gpfn, &page);
if (ret)
return ret;
else {
vcpu->pt = (pgd_t*)page;
vcpu->pt_gpa = (gpfn << PAGE_SHIFT);
}
}
}
pgd = vcpu->pt + pgd_index(gva);
if (!pgd_present(*pgd)) {
if (!create) {
return -ENOENT;
} else {
ret = ept_alloc_pt_item(vcpu, &gpfn, &page);
if (ret) {
return ret;
} else {
set_pgd(pgd, mk_kernel_pgd((gpfn << PAGE_SHIFT)));
pud_dir = (pud_t*)page;
} // ept_alloc_pt_item
} // !create
} else {
ret = ept_gpa_to_hva(vcpu, pgd_val(*pgd)&PTE_PFN_MASK,
(unsigned long*)(&pud_dir));
if (ret)
return ret;
} // !pgd_present
pud = pud_dir + pud_index(gva);
if (!pud_present(*pud)) {
if (!create) {
return -ENOENT;
} else {
ret = ept_alloc_pt_item(vcpu, &gpfn, &page);
if (ret) {
return ret;
} else {
set_pud(pud, __pud((gpfn << PAGE_SHIFT)|_KERNPG_TABLE));
pmd_dir = (pmd_t*)page;
} // ept_alloc_pt_item
} // !create
} else {
ret = ept_gpa_to_hva(vcpu, pud_val(*pud)&PTE_PFN_MASK,
(unsigned long*)(&pmd_dir));
if (ret)
return ret;
} // !pud_present
pmd = pmd_dir + pmd_index(gva);
if (!pmd_present(*pmd)) {
if (!create) {
return -ENOENT;
} else {
ret = ept_alloc_pt_item(vcpu, &gpfn, &page);
if (ret) {
return ret;
} else {
set_pmd(pmd, __pmd((gpfn << PAGE_SHIFT)|_KERNPG_TABLE));
pte_dir = (pte_t*)page;
} // ept_alloc_pt_item
} // !create
} else {
ret = ept_gpa_to_hva(vcpu, pmd_val(*pmd)&PTE_PFN_MASK,
(unsigned long*)(&pte_dir));
if (ret)
return ret;
} // !pmd_present
pte = pte_dir + pte_index(gva);
if (!pte_present(*pte) || overwrite) {
set_pte(pte, __pte((gpa & PTE_PFN_MASK)|__PAGE_KERNEL_EXEC));
} else {
printk(KERN_ERR "mm: pte conflicts %p %p\n", (void*)gpa, (void*)(*pte));
ret = -EINVL;
}
return ret;
}
#include "utils.h"
#include <linux/mm.h>
#ifndef LCD_UTILS_H
#define LCD_UTILS_H
#endif
This diff is collapsed.
/*
* vmx_ops.c - VMX primitive operations
*
* To be included by other higher level VMX setup source files.
*/
#ifndef LCD_VMX_OPS_C
#define LCD_VMX_OPS_C
static inline bool cpu_has_secondary_exec_ctrls(void)
{
return vmcs_config.cpu_based_exec_ctrl &
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
}
static inline bool cpu_has_vmx_vpid(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_ENABLE_VPID;
}
static inline bool cpu_has_vmx_invpcid(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_ENABLE_INVPCID;
}
static inline bool cpu_has_vmx_invvpid_single(void)
{
return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
}
static inline bool cpu_has_vmx_invvpid_global(void)
{
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}
static inline bool cpu_has_vmx_ept(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_ENABLE_EPT;
}
static inline bool cpu_has_vmx_invept_individual_addr(void)
{
return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
}
static inline bool cpu_has_vmx_invept_context(void)
{
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
}
static inline bool cpu_has_vmx_invept_global(void)
{
return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
}
static inline bool cpu_has_vmx_ept_ad_bits(void)
{
return vmx_capability.ept & VMX_EPT_AD_BIT;
}
static inline void __invvpid(int ext, u16 vpid, gva_t gva)
{
struct {
u64 vpid : 16;
u64 rsvd : 48;
u64 gva;
} operand = { vpid, 0, gva };
asm volatile (ASM_VMX_INVVPID
/* CF==1 or ZF==1 --> rc = -1 */
"; ja 1f ; ud2 ; 1:"
: : "a"(&operand), "c"(ext) : "cc", "memory");
}
static inline void vpid_sync_vcpu_single(u16 vpid)
{
if (vpid == 0)
return;
if (cpu_has_vmx_invvpid_single())
__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
}
static inline void vpid_sync_vcpu_global(void)
{
if (cpu_has_vmx_invvpid_global())
__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
}
static inline void vpid_sync_context(u16 vpid)
{
if (cpu_has_vmx_invvpid_single())
vpid_sync_vcpu_single(vpid);
else
vpid_sync_vcpu_global();
}
static inline u16 vmx_read_ldt(void)
{
u16 ldt;
asm("sldt %0" : "=g"(ldt));
return ldt;
}
static unsigned long segment_base(u16 selector)
{
struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
struct desc_struct *d;
unsigned long table_base;
unsigned long v;
if (!(selector & ~3))
return 0;
table_base = gdt->address;
if (selector & 4) { /* from ldt */
u16 ldt_selector = vmx_read_ldt();
if (!(ldt_selector & ~3))
return 0;
table_base = segment_base(ldt_selector);
}
d = (struct desc_struct *)(table_base + (selector & ~7));
v = get_desc_base(d);
#ifdef CONFIG_X86_64
if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
#endif
return v;
}
static inline unsigned long vmx_read_tr_base(void)
{
u16 tr;
asm("str %0" : "=g"(tr));
return segment_base(tr);
}
static void __vmx_setup_cpu(void)
{
struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
unsigned long sysenter_esp;
unsigned long tmpl;
/*
* Linux uses per-cpu TSS and GDT, so set these when switching
* processors.
*/
vmcs_writel(HOST_TR_BASE, vmx_read_tr_base()); /* 22.2.4 */
vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
rdmsrl(MSR_FS_BASE, tmpl);
vmcs_writel(HOST_FS_BASE, tmpl); /* 22.2.4 */
rdmsrl(MSR_GS_BASE, tmpl);
vmcs_writel(HOST_GS_BASE, tmpl); /* 22.2.4 */
}
static void __vmx_get_cpu_helper(void *ptr)
{
struct vmx_vcpu *vcpu = ptr;
BUG_ON(raw_smp_processor_id() != vcpu->cpu);
vmcs_clear(vcpu->vmcs);
if (__get_cpu_var(local_vcpu) == vcpu)
__get_cpu_var(local_vcpu) = NULL;
}
/**
* vmx_get_cpu - called before using a cpu
* @vcpu: VCPU that will be loaded.
*
* Disables preemption. Call vmx_put_cpu() when finished.
*/
static void vmx_get_cpu(struct vmx_vcpu *vcpu)
{
int cur_cpu = get_cpu();
if (__get_cpu_var(local_vcpu) != vcpu) {
__get_cpu_var(local_vcpu) = vcpu;
if (vcpu->cpu != cur_cpu) {
if (vcpu->cpu >= 0)
smp_call_function_single(
vcpu->cpu,
__vmx_get_cpu_helper, (void *) vcpu, 1);
else
vmcs_clear(vcpu->vmcs);
vpid_sync_context(vcpu->vpid);
ept_sync_context(vcpu->eptp);
vcpu->launched = 0;
vmcs_load(vcpu->vmcs);
__vmx_setup_cpu();
vcpu->cpu = cur_cpu;
} else {
vmcs_load(vcpu->vmcs);
}
}
}
/**
* vmx_put_cpu - called after using a cpu
* @vcpu: VCPU that was loaded.
*/
static void vmx_put_cpu(struct vmx_vcpu *vcpu)
{
put_cpu();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
u32 vmx_msr_low, vmx_msr_high;
u32 ctl = ctl_min | ctl_opt;
rdmsr(msr, vmx_msr_low, vmx_msr_high);
ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
/* Ensure minimum (required) set of control bits are supported. */
if (ctl_min & ~ctl)
return -EIO;
*result = ctl;
return 0;
}
static __init bool allow_1_setting(u32 msr, u32 ctl)
{
u32 vmx_msr_low, vmx_msr_high;
rdmsr(msr, vmx_msr_low, vmx_msr_high);
return vmx_msr_high & ctl;
}
static inline void __vmxon(u64 addr)
{
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
}
static inline void __vmxoff(void)
{
asm volatile (ASM_VMX_VMXOFF : : : "cc");
}
static void vmcs_clear(struct vmcs *vmcs)
{
u64 phys_addr = __pa(vmcs);
u8 error;
asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0"
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc", "memory");
if (error)
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
vmcs, phys_addr);
}
static void vmcs_load(struct vmcs *vmcs)
{
u64 phys_addr = __pa(vmcs);
u8 error;
asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc", "memory");
if (error)
printk(KERN_ERR "vmx: vmptrld %p/%llx failed\n",
vmcs, phys_addr);
}
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
unsigned long value;
asm volatile (ASM_VMX_VMREAD_RDX_RAX
: "=a"(value) : "d"(field) : "cc");
return value;
}
static __always_inline u16 vmcs_read16(unsigned long field)
{
return vmcs_readl(field);
}
static __always_inline u32 vmcs_read32(unsigned long field)
{
return vmcs_readl(field);
}
static __always_inline u64 vmcs_read64(unsigned long field)
{
return vmcs_readl(field);
}
static noinline void vmwrite_error(unsigned long field, unsigned long value)
{
printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
dump_stack();
}
static void vmcs_writel(unsigned long field, unsigned long value)
{
u8 error;
asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
: "=q"(error) : "a"(value), "d"(field) : "cc");
if (unlikely(error))
vmwrite_error(field, value);
}
static void vmcs_write16(unsigned long field, u16 value)
{
vmcs_writel(field, value);
}
static void vmcs_write32(unsigned long field, u32 value)