Commit b2b47c21 authored by Rusty Russell's avatar Rusty Russell Committed by Linus Torvalds
Browse files

lguest: documentation II: Guest



Documentation: The Guest
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f938d2c8
This diff is collapsed.
......@@ -4,15 +4,15 @@
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
/*
* This is where we begin: we have a magic signature which the launcher looks
* for. The plan is that the Linux boot protocol will be extended with a
/*G:020 This is where we begin: we have a magic signature which the launcher
* looks for. The plan is that the Linux boot protocol will be extended with a
* "platform type" field which will guide us here from the normal entry point,
* but for the moment this suffices. We pass the virtual address of the boot
* info to lguest_init().
* but for the moment this suffices. The normal boot code uses %esi for the
* boot header, so we do too. We convert it to a virtual address by adding
* PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
*
* We put it in .init.text will be discarded after boot.
*/
* The .section line puts this code in .init.text so it will be discarded after
* boot. */
.section .init.text, "ax", @progbits
.ascii "GenuineLguest"
/* Set up initial stack. */
......@@ -21,7 +21,9 @@
addl $__PAGE_OFFSET, %eax
jmp lguest_init
/* The templates for inline patching. */
/*G:055 We create a macro which puts the assembler code between lgstart_ and
* lgend_ markers. These templates end up in the .init.text section, so they
* are discarded after boot. */
#define LGUEST_PATCH(name, insns...) \
lgstart_##name: insns; lgend_##name:; \
.globl lgstart_##name; .globl lgend_##name
......@@ -30,24 +32,47 @@ LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
/*:*/
.text
/* These demark the EIP range where host should never deliver interrupts. */
.global lguest_noirq_start
.global lguest_noirq_end
/*
* We move eflags word to lguest_data.irq_enabled to restore interrupt state.
* For page faults, gpfs and virtual interrupts, the hypervisor has saved
* eflags manually, otherwise it was delivered directly and so eflags reflects
* the real machine IF state, ie. interrupts on. Since the kernel always dies
* if it takes such a trap with interrupts disabled anyway, turning interrupts
* back on unconditionally here is OK.
*/
/*G:045 There is one final paravirt_op that the Guest implements, and glancing
* at it you can see why I left it to last. It's *cool*! It's in *assembler*!
*
* The "iret" instruction is used to return from an interrupt or trap. The
* stack looks like this:
* old address
* old code segment & privilege level
* old processor flags ("eflags")
*
* The "iret" instruction pops those values off the stack and restores them all
* at once. The only problem is that eflags includes the Interrupt Flag which
* the Guest can't change: the CPU will simply ignore it when we do an "iret".
* So we have to copy eflags from the stack to lguest_data.irq_enabled before
* we do the "iret".
*
* There are two problems with this: firstly, we need to use a register to do
* the copy and secondly, the whole thing needs to be atomic. The first
* problem is easy to solve: push %eax on the stack so we can use it, and then
* restore it at the end just before the real "iret".
*
* The second is harder: copying eflags to lguest_data.irq_enabled will turn
* interrupts on before we're finished, so we could be interrupted before we
* return to userspace or wherever. Our solution to this is to surround the
* code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
* Host that it is *never* to interrupt us there, even if interrupts seem to be
* enabled. */
ENTRY(lguest_iret)
pushl %eax
movl 12(%esp), %eax
lguest_noirq_start:
/* Note the %ss: segment prefix here. Normal data accesses use the
* "ds" segment, but that will have already been restored for whatever
* we're returning to (such as userspace): we can't trust it. The %ss:
* prefix makes sure we use the stack segment, which is still valid. */
movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
popl %eax
iret
......
......@@ -27,18 +27,38 @@
#define LG_CLOCK_MIN_DELTA 100UL
#define LG_CLOCK_MAX_DELTA ULONG_MAX
/*G:031 First, how does our Guest contact the Host to ask for privileged
* operations? There are two ways: the direct way is to make a "hypercall",
* to make requests of the Host Itself.
*
* Our hypercall mechanism uses the highest unused trap code (traps 32 and
* above are used by real hardware interrupts). Seventeen hypercalls are
* available: the hypercall number is put in the %eax register, and the
* arguments (when required) are placed in %edx, %ebx and %ecx. If a return
* value makes sense, it's returned in %eax.
*
* Grossly invalid calls result in Sudden Death at the hands of the vengeful
* Host, rather than returning failure. This reflects Winston Churchill's
* definition of a gentleman: "someone who is only rude intentionally". */
#define LGUEST_TRAP_ENTRY 0x1F
static inline unsigned long
hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
/* "int" is the Intel instruction to trigger a trap. */
asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
/* The call is in %eax (aka "a"), and can be replaced */
: "=a"(call)
/* The other arguments are in %eax, %edx, %ebx & %ecx */
: "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
/* "memory" means this might write somewhere in memory.
* This isn't true for all calls, but it's safe to tell
* gcc that it might happen so it doesn't get clever. */
: "memory");
return call;
}
/*:*/
void async_hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3);
......@@ -52,31 +72,40 @@ struct hcall_ring
u32 eax, edx, ebx, ecx;
};
/* All the good stuff happens here: guest registers it with LGUEST_INIT */
/*G:032 The second method of communicating with the Host is to via "struct
* lguest_data". The Guest's very first hypercall is to tell the Host where
* this is, and then the Guest and Host both publish information in it. :*/
struct lguest_data
{
/* Fields which change during running: */
/* 512 == enabled (same as eflags) */
/* 512 == enabled (same as eflags in normal hardware). The Guest
* changes interrupts so often that a hypercall is too slow. */
unsigned int irq_enabled;
/* Interrupts blocked by guest. */
/* Fine-grained interrupt disabling by the Guest */
DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);
/* Virtual address of page fault. */
/* The Host writes the virtual address of the last page fault here,
* which saves the Guest a hypercall. CR2 is the native register where
* this address would normally be found. */
unsigned long cr2;
/* Async hypercall ring. 0xFF == done, 0 == pending. */
/* Async hypercall ring. Instead of directly making hypercalls, we can
* place them in here for processing the next time the Host wants.
* This batching can be quite efficient. */
/* 0xFF == done (set by Host), 0 == pending (set by Guest). */
u8 hcall_status[LHCALL_RING_SIZE];
/* The actual registers for the hypercalls. */
struct hcall_ring hcalls[LHCALL_RING_SIZE];
/* Fields initialized by the hypervisor at boot: */
/* Fields initialized by the Host at boot: */
/* Memory not to try to access */
unsigned long reserve_mem;
/* ID of this guest (used by network driver to set ethernet address) */
/* ID of this Guest (used by network driver to set ethernet address) */
u16 guestid;
/* KHz for the TSC clock. */
u32 tsc_khz;
/* Fields initialized by the guest at boot: */
/* Fields initialized by the Guest at boot: */
/* Instruction range to suppress interrupts even if enabled */
unsigned long noirq_start, noirq_end;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment