Commit 18122896 authored by Charlie Jacobsen's avatar Charlie Jacobsen Committed by Vikram Narayanan

Finished lcd_vmx_init and its dependencies.

Added a few missing macros to arch/x86/include/vmx.h,
and RESERVED masks for easily determining which bits
in a vmx control are reserved (needed in adjust_vmx_controls).

Re-factored setup_vmcs_config and adjust_vmx_controls.
setup_vmcs_config does pretty much the same thing, but it
fails immediately if a control isn't available --
adjust_vmx_controls confirms that the exact desired
controls are available, and sets the reserved bits to
1 or 0 as needed. Cleaner comments and organization.
Re-factored the vmx basic settings to
vmcs_config_basic_settings.

Removed some of the vmx feature check code that was in
the original lcd_vmx_init, as setup_vmcs_config now does
that.

Essentially a straight copy over of:

-- __vmx_enable
-- vmx_enable
-- vmx_disable
-- vmx_free_vmxon_areas
-- __vmxon
-- __vmxoff

The only difference is I shifted tbl and cache
invalidation to vmx_enable (originally in __vmx_enable)
and added some comments.

Straight copy over of
-- vmx_alloc_vmcs
-- vmx_free_vmcs
-- invvpid, invept code, with slight renaming
parent 8a6ad472
#ifndef LCD_VMX_H
#define LCD_VMX_H
struct lcd_vmx_config {
int size;
int order;
u32 revision_id;
u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl;
u32 cpu_based_2nd_exec_ctrl;
u32 vmexit_ctrl;
u32 vmentry_ctrl;
};
struct lcd_vmx_vmcs {
u32 revision_id;
u32 abort;
char data[0];
};
struct lcd_vmx_capability {
u32 ept;
u32 vpid;
};
#define LCD_VMX_NUM_AUTOLOAD_MSRS 8
enum lcd_vmx_reg {
......
......@@ -52,14 +52,14 @@
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
#define CPU_BASED_RESERVED_MASK 0x04066173
/*
* Definitions of Secondary Processor-Based VM-Execution Controls.
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_DESCRIPTOR_TABLE_EXIT 0x00000004
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
......@@ -68,17 +68,22 @@
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_ENABLE_VMFUNCTIONS 0x00002000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define SECONDARY_EXEC_EPT_VIOLATION_VE 0x00040000
#define SECONDARY_EXEC_RESERVED_MASK 0xFFFB8000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
#define PIN_BASED_POSTED_INTR 0x00000080
#define PIN_BASED_RESERVED_MASK 0xFFFFFF16
#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
......@@ -92,6 +97,7 @@
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_RESERVED_MASK 0xFF836DFD
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
......@@ -103,6 +109,7 @@
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_RESERVED_MASK 0xFFFF11FD
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
......
......@@ -10,11 +10,564 @@
*
*/
#include <asm/virtext.h>
#include <linux/bitmap.h>
#include <asm/lcd-vmx.h>
static struct lcd_vmx_vmcs_config vmcs_config;
static struct lcd_vmx_capability vmx_capability;
static atomic_t lcd_vmx_enable_failed;
static DEFINE_PER_CPU(int, vmx_enabled);
static DEFINE_PER_CPU(struct vmcs *, vmxon_area);
static struct {
DECLARE_BITMAP(bitmap, LCD_VMX_NUM_VPIDS);
DEFINE_SPINLOCK(lock);
} vpids;
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static DEFINE_PER_CPU(struct lcd *, local_vcpu);
static unsigned long *msr_bitmap;
/* INVEPT / INVVPID --------------------------------------------------*/
static inline bool cpu_has_vmx_invvpid_single(void) {
return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
}
static inline bool cpu_has_vmx_invvpid_global(void) {
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}
static inline bool cpu_has_vmx_invept_individual_addr(void) {
return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
}
static inline bool cpu_has_vmx_invept_context(void) {
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
}
static inline bool cpu_has_vmx_invept_global(void) {
return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
}
static inline bool cpu_has_vmx_ept_ad_bits(void) {
return vmx_capability.ept & VMX_EPT_AD_BIT;
}
static inline void __invept(int ext, u64 eptp, u64 gpa) {
struct {
u64 eptp, gpa;
} operand = {eptp, gpa};
asm volatile (ASM_VMX_INVEPT
/* CF==1 or ZF==1 --> rc = -1 */
"; ja 1f ; ud2 ; 1:\n"
: : "a" (&operand), "c" (ext) : "cc", "memory");
}
static inline void invept_global(void) {
if (cpu_has_vmx_invept_global())
__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
}
static inline void invept_context(u64 eptp) {
if (cpu_has_vmx_invept_context())
__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
else
invept_global();
}
static inline void invept_individual_addr(u64 eptp, u64 gpa) {
if (cpu_has_vmx_invept_individual_addr())
__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
eptp, gpa);
else
invept_context(eptp);
}
static inline void __invvpid(int ext, u16 vpid, u64 gva) {
struct {
u64 vpid : 16;
u64 rsvd : 48;
u64 gva;
} operand = { vpid, 0, gva };
asm volatile (ASM_VMX_INVVPID
/* CF==1 or ZF==1 --> rc = -1 */
"; ja 1f ; ud2 ; 1:"
: : "a"(&operand), "c"(ext) : "cc", "memory");
}
static inline void invvpid_global(void) {
if (cpu_has_vmx_invvpid_global())
__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
}
static inline void invvpid_single_context(u16 vpid) {
/*
* Don't invalidate host mappings
*/
if (vpid == 0)
return;
if (cpu_has_vmx_invvpid_single())
__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
else
invvpid_global();
}
/* VMCS SETUP --------------------------------------------------*/
static void vmx_free_vmcs(struct vmcs *vmcs) {
free_pages((unsigned long)vmcs, vmcs_config.order);
}
/**
* Allocates memory for a vmcs on cpu, and sets the
* revision id.
*/
static struct vmcs *vmx_alloc_vmcs(int cpu) {
int node;
struct page *pages;
struct vmcs *vmcs;
node = cpu_to_node(cpu);
pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order);
if (!pages)
return NULL;
vmcs = page_address(pages);
memset(vmcs, 0, vmcs_config.size);
vmcs->revision_id = vmcs_config.revision_id;
return vmcs;
}
/* VMX ON/OFF --------------------------------------------------*/
static inline void __vmxon(u64 addr) {
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&addr), "m"(addr)
: "memory", "cc");
}
static inline void __vmxoff(void) {
asm volatile (ASM_VMX_VMXOFF : : : "cc");
}
static int __vmx_enable(struct vmcs *vmxon_buf) {
u64 phys_addr;
u64 old;
u64 test_bits;
phys_addr = __pa(vmxon_buf);
/*
* Intel SDM V3 23.7
*/
/*
* We can't use vmx if someone else is
*/
if (read_cr4() & X86_CR4_VMXE)
return -EBUSY;
write_cr4(read_cr4() | X86_CR4_VMXE);
/*
* Set MSR_IA32_FEATURE_CONTROL
*/
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
test_bits = FEATURE_CONTROL_LOCKED;
test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
if (tboot_enabled())
test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
if ((old & test_bits) != test_bits) {
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
}
/*
* Turn on vmx
*/
__vmxon(phys_addr);
return 0;
}
/**
* Turn on vmx on calling cpu, using per cpu vmxon_area.
*
* unused is ignored (NULL is passed in lcd_vmx_init).
*
* Important: Assumes preemption is disabled (it will be
* if called via on_each_cpu).
*/
static void vmx_enable(void *unused) {
int ret;
struct vmcs *vmxon_buf;
vmxon_buf = __get_cpu_var(vmxon_area);
/*
* Turn on vmx
*/
ret = __vmx_enable(vmxon_buf);
if (ret)
goto failed;
/*
* Flush TLB and caches of any old VPID and EPT
* mappings.
*/
invvpid_global();
invept_global();
__get_cpu_var(vmx_enabled) = 1;
printk(KERN_INFO "lcd vmx: VMX enabled on CPU %d\n",
raw_smp_processor_id());
return;
failed:
atomic_inc(&vmx_enable_failed);
printk(KERN_ERR "lcd vmx: failed to enable VMX, err = %d\n", ret);
return;
}
/**
* Turns off vmx on calling cpu.
*
* unused is ignored (NULL is passed in lcd_vmx_init).
*
* Important: Assumes preemption is disabled. (It will
* be if called from on_each_cpu.)
*/
static void vmx_disable(void *unused) {
if (__get_cpu_var(vmx_enabled)) {
__vmxoff();
write_cr4(read_cr4() & ~X86_CR4_VMXE);
__get_cpu_var(vmx_enabled) = 0;
}
}
/**
* Frees any vmxon areas allocated for cpu's.
*/
static void vmx_free_vmxon_areas(void) {
int cpu;
for_each_possible_cpu(cpu) {
if (per_cpu(vmxon_area, cpu)) {
vmx_free_vmcs(per_cpu(vmxon_area, cpu));
per_cpu(vmxon_area, cpu) = NULL;
}
}
}
/* VMX SETTINGS --------------------------------------------------*/
/**
* Checks and sets basic vmcs settings (vmxon region size, etc.)
*/
static int vmcs_config_basic_settings(struct vmcs_config *vmcs_conf) {
u32 msr_low;
u32 msr_high;
/*
* Read and store basic vmcs settings.
*
* Intel SDM V3 Appendix A
*/
rdmsr(MSR_IA32_VMX_BASIC, msr_low, msr_high);
/*
* VMCS size is never greater than 4KBs
*/
if ((msr_high & 0x1fff) > PAGE_SIZE)
return -EIO;
/*
* 64-bit CPUs always have VMX_BASIC_MSR[48] == 0. Controls
* physical address width.
*/
if (msr_high & (1u<<16))
return -EIO;
/*
* Require Write-Back (WB) memory type for VMCS accesses.
*/
if (((msr_high >> 18) & 15) != 6)
return -EIO;
vmcs_conf->size = msr_high & 0x1fff;
vmcs_conf->order = get_order(vmcs_config.size);
vmcs_conf->revision_id = msr_low;
return 0;
}
/**
* Returns 0 if controls not allowed, non-zero otherwise. If
* successful, controls is updated with reserved bits properly
* set. The negation of the reserved mask is used to ignore
* reserved bits during the `checking' process.
*/
static int adjust_vmx_controls(u32 *controls, u32 reserved_mask, u32 msr) {
u32 msr_low, msr_high;
/*
* Make sure the desired controls are possible. In the pin-based
* exec, primary and secondary exec, vmentry, and vmexit MSRs:
*
* -- The low word contains the *minimum required* bits that must
* be set to 1 (i.e., if the bit in the low msr is 1, the vmx
* control bit must be 1).
*
* -- The high word contains the *maximum allowed* bits that can
* be set to 1 (i.e., if the bit in the high msr is 0, the vmx
* control must be 0).
*
* If these conditions aren't met, vmentry fails. Some of these
* bits are reserved, so a mask is used to ensure we're only
* checking those bits we care about.
*
* See Intel SDM V3 Appendix A.
*/
rdmsr(msr, msr_low, msr_high);
if (((msr_high & (~reserved_mask)) & *controls == *controls) &&
((msr_low & (~reserved_mask)) | *controls == *controls)) {
*controls &= msr_high;
*controls |= msr_low;
return 0;
} else {
return -1;
}
}
/**
* Populates default settings in vmcs_conf for
* vm entries, vm exits, vm execution (e.g., interrupt handling),
* etc.
*/
static int setup_vmcs_config(struct vmcs_config *vmcs_conf) {
u32 basic_vmx_msr_low;
u32 basic_vmx_msr_high;
u32 pin_based_exec_controls;
u32 primary_proc_based_exec_controls;
u32 secondary_proc_based_exec_controls;
u32 vmexit_controls;
u32 vmentry_controls;
/*
* Basic VMX Configuration
*/
if (vmcs_config_basic_settings(vmcs_conf) < 0)
return -EIO;
/*
* VMX Execution Controls (Intel SDM V3 24.6)
*/
/*
* Pin Based Execution Controls (exceptions, nmi's, ...)
*
* -- external interrupts and nmi's cause vm exit.
*/
pin_based_exec_controls = PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING;
if (adjust_vmx_controls(&pin_based_exec_controls,
PIN_BASED_RESERVED_MASK,
MSR_IA32_VMX_PINBASED_CTLS) < 0)
return -EIO;
/*
* Primary Processor Execution Controls
*
* -- HLT Exit
* -- Invalidate PG Exit
* -- MWAIT Exit
* -- RDPMC Exit
* -- L/S CR8 Exit
* -- MOV DR Exit
* -- Unconditional I/O Exit (no I/O bitmap)
* -- Use MSR Bitmaps
* -- MONITOR Exit
* -- Activate Secondary Proc Exec Controls
*
* Note: TSC offsetting and TPR Shadowing are not set. We are
* currently not virtualizing access to the TPR.
*/
primary_proc_based_exec_controls = CPU_BASED_HLT_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_MWAIT_EXITING |
CPU_BASED_RDPMC_EXITING |
CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
if (adjust_vmx_controls(&primary_proc_based_exec_controls,
CPU_BASED_RESERVED_MASK,
MSR_IA32_VMX_PROCBASED_CTLS))
return -EIO;
/*
* Secondary Processor Execution Controls
*
* -- Enable EPT
* -- Enable RDTSCP
* -- Enable VPID
* -- WBINVD Exit
* -- Enable Unrestricted Guest
* -- Enable INVPCID
*/
secondary_proc_based_exec_controls = SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_ENABLE_INVPCID;
if (adjust_vmx_controls(secondary_proc_based_exec_controls,
SECONDARY_EXEC_RESERVED_MASK,
MSR_IA32_VMX_PROCBASED_CTLS2) < 0)
return -EIO;
/*
* Remember the EPT and VPID capabilities
*/
rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
vmx_capability.ept, vmx_capability.vpid);
/*
* VM Exit Controls (Intel SDM V3 24.7)
*
* -- Host Address Space (host in 64-bit mode on vm exit)
* -- Acknowledge interrupts on vm exit
* -- Save / Load IA32_EFER MSR on vm exit
*/
vmexit_controls = VM_EXIT_HOST_ADDR_SPACE_SIZE |
VM_EXIT_ACK_INTR_ON_EXIT |
VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_LOAD_IA32_EFER;
if (adjust_vmx_controls(&vmexit_controls,
VM_EXIT_RESERVED_MASK,
MSR_IA32_VMX_EXIT_CTLS) < 0)
return -EIO;
/*
* VM Entry Controls (Intel SDM V3 24.8)
*
* -- IA-32E Mode inside guest
* -- Load IA-32 EFER MSR on entry
*/
vmentry_controls = VM_ENTRY_IA32E_MODE |
VM_ENTRY_LOAD_IA32_EFER;
if (adjust_vmx_controls(&vmentry_controls,
VM_ENTRY_RESERVED_MASK,
MSR_IA32_VMX_ENTRY_CTLS) < 0)
return -EIO;
vmcs_conf->pin_based_exec_ctrl = pin_based_exec_controls;
vmcs_conf->primary_proc_based_exec_controls =
primary_proc_based_exec_controls;
vmcs_conf->secondary_proc_based_exec_controls =
secondary_proc_based_exec_controls;
vmcs_conf->vmexit_controls = vmexit_controls;
vmcs_conf->vmentry_controls = vmentry_controls;
return 0;
}
int lcd_vmx_init(void) {
int ret;
int cpu;
/*
* Check For VMX Features
*/
if (!cpu_has_vmx()) {
printk(KERN_ERR "lcd vmx: CPU does not support VMX\n");
return -EIO;
}
if (setup_vmcs_config(&vmcs_config) < 0) {
printk(KERN_ERR "lcd vmx: CPU does not support a required VMX setting\n");
return -EIO;
}
/*
* Set up default MSR bitmap
*/
msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!msr_bitmap)
return -ENOMEM;
memset(msr_bitmap, 0xff, PAGE_SIZE);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE);
/*
* VPID 0 is reserved for host. See INVVPID instruction.
*/
set_bit(0, vpids.bitmap);
/*
* Allocate vmxon buffers for each cpu. A vmxon buffer is
* (currently) the same size as a vmcs, so we can re-use
* the vmx_alloc_vmcs routine.
*/
for_each_possible_cpu(cpu) {
struct vmcs *vmxon_buf;
vmxon_buf = vmx_alloc_vmcs(cpu);
if (!vmxon_buf) {
vmx_free_vmxon_areas();
return -ENOMEM;
}
per_cpu(vmxon_area, cpu) = vmxon_buf;
}
/*
* Turn on vmx on each cpu
*
* Note: on_each_cpu disables preemption
*/
atomic_set(&vmx_enable_failed, 0);
if (on_each_cpu(vmx_enable, NULL, 1)) {
printk(KERN_ERR "lcd vmx: timeout waiting for VMX mode enable.\n");
ret = -EIO;
goto failed1; /* sadly we can't totally recover */
}
if (atomic_read(&vmx_enable_failed)) {
ret = -EBUSY;
goto failed2;