Commit d08c3ad1 authored by Charlie Jacobsen's avatar Charlie Jacobsen Committed by Vikram Narayanan
Browse files

test-v2: Use 2MB pages instead of 1GB pages for guest virtual.

This only costs us 64 KBs versus 8 KBs, which isn't much on a modern
arch.
parent ac26533c
......@@ -16,6 +16,66 @@
#include <lcd_config/post_hook.h>
/* MEMORY REGIONS -------------------------------------------------- */
/*
* This assumes the PAT has Writeback in PAT0, Uncacheable in PAT1.
*
* See Intel SDM V3 11.12.3 for how PAT indexing is done.
*/
#define LCD_UNCACHEABLE_FLAGS (_PAGE_PRESENT | _PAGE_RW | _PAGE_PWT)
#define LCD_WRITEBACK_FLAGS (_PAGE_PRESENT | _PAGE_RW)
struct lcd_mem_region {
const char *name;
unsigned long offset;
unsigned long size;
pteval_t flags;
};
#define LCD_NR_MEM_REGIONS 6
static struct lcd_mem_region lcd_mem_regions[LCD_NR_MEM_REGIONS] = {
{
"miscellaneous",
.offset = LCD_MISC_REGION_OFFSET,
.size = LCD_MISC_REGION_SIZE,
.flags = LCD_WRITEBACK_FLAGS,
},
{
"stack",
.offset = LCD_STACK_REGION_OFFSET,
.size = LCD_STACK_REGION_SIZE,
.flags = LCD_WRITEBACK_FLAGS,
},
{
"heap",
.offset = LCD_HEAP_REGION_OFFSET,
.size = LCD_HEAP_REGION_SIZE,
.flags = LCD_WRITEBACK_FLAGS,
},
{
"ram map",
.offset = LCD_RAM_MAP_REGION_OFFSET,
.size = LCD_RAM_MAP_REGION_SIZE,
.flags = LCD_WRITEBACK_FLAGS,
},
{
"ioremap",
.offset = LCD_IOREMAP_REGION_OFFSET,
.size = LCD_IOREMAP_REGION_SIZE,
.flags = LCD_UNCACHEABLE_FLAGS,
},
{
"kernel module",
.offset = LCD_KERNEL_MODULE_REGION_OFFSET,
.size = LCD_KERNEL_MODULE_REGION_SIZE,
.flags = LCD_WRITEBACK_FLAGS,
},
};
/* ------------------------------------------------------------ */
static int do_grant_and_map_for_mem(cptr_t lcd, struct lcd_create_ctx *ctx,
void *mem, gpa_t map_base,
cptr_t *dest)
......@@ -109,7 +169,7 @@ static int setup_phys_addr_space(cptr_t lcd, struct lcd_create_ctx *ctx,
if (ret)
goto fail1;
c = &(lcd_to_boot_info(ctx)->lcd_boot_cptrs.gv);
ret = do_grant_and_map_for_mem(lcd, ctx, ctx->gv_pgd,
ret = do_grant_and_map_for_mem(lcd, ctx, ctx->gv_pg_tables,
LCD_BOOTSTRAP_PAGE_TABLES_GP_ADDR, c);
if (ret)
goto fail2;
......@@ -135,53 +195,82 @@ fail1:
return ret;
}
static void setup_lcd_pud(struct lcd_create_ctx *ctx)
static void setup_lcd_pmd(struct lcd_mem_region *reg, pmd_t *pmd,
unsigned int gigabyte_idx)
{
/*
* This assumes the PAT has Writeback in PAT0, Uncacheable in PAT1.
*
* See Intel SDM V3 11.12.3 for how PAT indexing is done.
*/
unsigned int i;
unsigned int ioremap_offset = LCD_IOREMAP_REGION_OFFSET >> 30;
unsigned int after_ioremap_offset = (LCD_IOREMAP_REGION_OFFSET +
LCD_IOREMAP_REGION_SIZE) >> 30;
pteval_t wb_flags = _PAGE_PRESENT | _PAGE_RW | _PAGE_PSE;
pteval_t uc_flags = _PAGE_PRESENT | _PAGE_RW | _PAGE_PSE | _PAGE_PWT;
pud_t *pud_entry;
/*
* Map first GBs as write back
*/
for (i = 0; i < ioremap_offset; i++) {
pud_entry = &ctx->gv_pud[i];
set_pud(pud_entry,
__pud((LCD_PHYS_BASE + i * (1UL << 30)) | wb_flags));
unsigned int k;
unsigned long gp;
for (k = 0; k < 512; k++) {
/*
* Guest physical address we put in the table entry is:
*
* base +
* offset to the 1GB region for this pmd +
* offset for this table entry (some multiple of 2MBs)
*/
gp = LCD_PHYS_BASE + gigabyte_idx * (1UL << 30) +
k * (1UL << 21);
set_pmd(&pmd[k], __pmd(gp | reg->flags));
}
}
static void setup_lcd_pmds(pmd_t *pmds)
{
unsigned int i, j, lo, hi;
pmd_t *pmd_entry;
struct lcd_mem_region *reg;
/*
* Map ioremap GBs as uncacheable
* Map each memory region, filling out entire pmd's so that we're
* mapping multiples of 1GB
*/
for (i = ioremap_offset; i < after_ioremap_offset; i++) {
pud_entry = &ctx->gv_pud[i];
set_pud(pud_entry,
__pud((LCD_PHYS_BASE + i * (1UL << 30)) | uc_flags));
pmd_entry = pmds;
for (i = 0; i < LCD_NR_MEM_REGIONS; i++) {
reg = &lcd_mem_regions[i];
/*
* Set up enough pmd's to fill in memory region
*/
lo = reg->offset >> 30;
hi = lo + (reg->size >> 30);
for (j = lo; j < hi; j++) {
setup_lcd_pmd(reg, pmd_entry, j);
pmd_entry += 512;
}
}
}
static void setup_lcd_pud(pud_t *pud)
{
unsigned int i, j, lo, hi;
pud_t *pud_entry;
struct lcd_mem_region *reg;
/*
* Map remaining GBs as write back
* Map each memory region
*/
for (i = after_ioremap_offset; i < 512; i++) {
pud_entry = &ctx->gv_pud[i];
set_pud(pud_entry,
__pud((LCD_PHYS_BASE + i * (1UL << 30)) | wb_flags));
for (i = 0; i < LCD_NR_MEM_REGIONS; i++) {
reg = &lcd_mem_regions[i];
/*
* Map entire memory region (some are bigger than 1GB)
*/
lo = reg->offset >> 30;
hi = lo + (reg->size >> 30);
for (j = lo; j < hi; j++) {
pud_entry = &pud[j];
set_pud(pud_entry,
__pud((LCD_PHYS_BASE + j * (1UL << 30)) |
reg->flags));
}
}
}
static void setup_lcd_pgd(struct lcd_create_ctx *ctx)
static void setup_lcd_pgd(pgd_t *pgd)
{
pgd_t *pgd_entry;
gpa_t pud_gpa;
pteval_t flags = 0;
pgd_entry = &ctx->gv_pgd[511]; /* only map last pud for high 512 GBs */
pgd_entry = &pgd[511]; /* only map last pud for high 512 GBs */
/* pud comes after pgd */
pud_gpa = gpa_add(LCD_BOOTSTRAP_PAGE_TABLES_GP_ADDR, PAGE_SIZE);
......@@ -193,53 +282,22 @@ static void setup_lcd_pgd(struct lcd_create_ctx *ctx)
__pgd(gpa_val(pud_gpa) | flags));
}
static void dump_virt_pud(pud_t *pud)
{
unsigned idx;
pud_t *entry;
printk("\n\n pud 511 entries:\n------------------\n\n");
for (idx = 0; idx < 512; idx++) {
entry = &pud[idx];
printk(" %03u %lx\n", idx, pud_val(*entry));
}
}
static void dump_virt_pgd(pgd_t *pgd)
{
unsigned int idx;
pgd_t *entry;
printk(" pgd entries:\n------------------\n\n");
for (idx = 0; idx < 512; idx++) {
entry = &pgd[idx];
printk(" %03u %lx\n", idx, pgd_val(*entry));
}
}
void dump_virt_addr_space(struct lcd_create_ctx *ctx)
{
printk("DUMP LCD VIRT PAGE TABLES:\n");
dump_virt_pgd(ctx->gv_pgd);
dump_virt_pud(ctx->gv_pud);
}
static void setup_virt_addr_space(struct lcd_create_ctx *ctx)
{
/*
* pgd and pud (PML4 and PDPT) should already be zero'd out
* page tables should already be zero'd out
*
* Set up root pgd
*/
setup_lcd_pgd(ctx);
setup_lcd_pgd(ctx->gv_pg_tables);
/*
* Set up pud (only one for high 512 GBs)
*/
setup_lcd_pud(ctx);
dump_virt_addr_space(ctx);
setup_lcd_pud(ctx->gv_pg_tables + 512); /* skip over pgd */
/*
* Set up pmd's (one for each 1GB region)
*/
setup_lcd_pmds(ctx->gv_pg_tables + 1024); /* skip over pgd and pud */
}
static int setup_addr_spaces(cptr_t lcd, struct lcd_create_ctx *ctx,
......@@ -378,8 +436,7 @@ static int get_pages_for_lcd(struct lcd_create_ctx *ctx)
goto fail3;
}
memset(lcd_page_address(p2), 0, LCD_BOOTSTRAP_PAGE_TABLES_SIZE);
ctx->gv_pgd = lcd_page_address(p2);
ctx->gv_pud = lcd_page_address(p2 + 1);
ctx->gv_pg_tables = lcd_page_address(p2);
/*
* Alloc stack
*/
......
......@@ -75,28 +75,31 @@ int __liblcd_heap_init(void);
/*
* RAM MAPPING --------------------------------------------------
*
* RAM mapping area is 8 GBs (2^26 pages).
* RAM mapping area is 1 GB (2^18 pages).
*
* The minimum address space block you can allocate from the RAM map
* area is 64 MBs (2^14 = 16,384 pages). This leads to a lot of internal
* area is 2 MBs (2^9 = 512 pages). This leads to a lot of internal
* fragmentation, but this is tolerable, so long as we don't need to
* map RAM more than 100 or so times. (The actual memory object will
* likely be considerably smaller than 64 MBs, so only the first
* map RAM more than 200 or so times. (The actual memory object will
* likely be considerably smaller than 2 MBs, so only the first
* few KBs may actually be backed/mapped in guest physical. But the
* entire 64 MB region will be considered occupied by the internal
* entire 2 MB region will be considered occupied by the internal
* RAM map allocator.)
*
* The maximum address space block you can allocate from the RAM map
* area is 1 GB (2^18 = 262,144 pages).
* area is 256 MB (2^16 = 65,536 pages).
*
* In hindsight, maybe a simple bitmap would have been just as good
* here since there are only 8GB/64MB = 128 allocation blocks. Oh well.
* here since there are only 1GB/64MB = 512 allocation blocks. Oh well. (These
* mapping regions were originally gonna be a lot bigger, but I had
* to switch over to 2MB guest virtual pages instead of 1GB pages. Etc.
* etc.)
*/
#define LCD_RAM_MAP_NR_PAGES_ORDER \
(ilog2(LCD_RAM_MAP_REGION_SIZE >> PAGE_SHIFT))
#define LCD_RAM_MAP_SIZE LCD_RAM_MAP_REGION_SIZE
#define LCD_RAM_MAP_MIN_ORDER 14
#define LCD_RAM_MAP_MAX_ORDER 18
#define LCD_RAM_MAP_MIN_ORDER 9
#define LCD_RAM_MAP_MAX_ORDER 16
/**
* __liblcd_ram_map_init -- Call during boot after heap initialized
......
......@@ -45,15 +45,15 @@
*
* -- 1 GB for heap region (unmapped at boot; only bottom part is used)
*
* -- 2 GB HOLE (unmapped)
* -- 1 GB HOLE (unmapped)
*
* -- 8 GB for RAM memory mapping region (think: kmap)
* -- 1 GB for RAM memory mapping region (think: kmap)
*
* -- 48 GB HOLE (unmapped)
* -- 1 GB HOLE (unmapped)
*
* -- 256 GB for ioremap region (unmapped at boot)
* -- 1 GB for ioremap region (unmapped at boot)
*
* -- 190 GB HOLE (unmapped)
* -- 500 GB HOLE (unmapped)
*
* -- 2 GB for kernel module mapping area. The kernel module itself
* is mapped at the correct offset into this area so that
......@@ -75,21 +75,19 @@
* | (2 GB) |
* +---------------------------+ 0x0000 007f 8000 0000 (510 GB)
* | HOLE / Unmapped |
* | (190 GB) |
* +---------------------------+ 0x0000 0050 0000 0000 (320 GB)
* | (500 GB) |
* +---------------------------+ 0x0000 0002 8000 0000 (10 GB)
* | ioremap Region |
* | (64 GB aligned) |
* | (256 GB) |
* +---------------------------+ 0x0000 0010 0000 0000 (64 GB)
* | (1 GB) |
* +---------------------------+ 0x0000 0002 4000 0000 (9 GB)
* | HOLE / Unmapped |
* | (48 GB) |
* +---------------------------+ 0x0000 0004 0000 0000 (16 GB)
* | (1 GB) |
* +---------------------------+ 0x0000 0002 0000 0000 (8 GB)
* | RAM Map Region |
* | (8 GB aligned) |
* | (8 GB) |
* +---------------------------+ 0x0000 0002 0000 0000 (8 GB)
* | (1 GB) |
* +---------------------------+ 0x0000 0001 c000 0000 (7 GB)
* | HOLE / Unmapped |
* | (2 GB) |
* | (1 GB) |
* +---------------------------+ 0x0000 0001 8000 0000 (6 GB)
* | Heap Region |
* | (1 GB) |
......@@ -143,8 +141,8 @@
* | |
* +---------------------------+ 0x0000 0000 0000 0000
*
* Everything else is unmapped. We use huge 1 GB pages. This means there
* are only two 4 KB pages needed to set up the entire guest virtual
* Everything else is unmapped. We use 2MB pages. This means there
* are only 10-20 4 KB pages needed to set up the entire guest virtual
* address space.
*
* (Why don't we map everything in the high physical range so we can
......@@ -154,19 +152,19 @@
/* Region sizes */
#define LCD_MISC_REGION_SIZE (1UL << 30) /* .................... 1 GB */
#define LCD_STACK_REGION_SIZE (1UL << 30) /* ................... 1 GB */
#define LCD_HEAP_REGION_SIZE (1UL << 30) /* .................... 1 GB */
#define LCD_RAM_MAP_REGION_SIZE (8UL << 30) /* ................. 8 GBs */
#define LCD_IOREMAP_REGION_SIZE (256UL << 30) /* ............... 256 GBs */
#define LCD_KERNEL_MODULE_REGION_SIZE (2UL << 30) /* ........... 2 GBs */
#define LCD_MISC_REGION_SIZE (1UL << 30) /* .................... 1 GB */
#define LCD_STACK_REGION_SIZE (1UL << 30) /* ................... 1 GB */
#define LCD_HEAP_REGION_SIZE (1UL << 30) /* .................... 1 GB */
#define LCD_RAM_MAP_REGION_SIZE (1UL << 30) /* ................. 1 GB */
#define LCD_IOREMAP_REGION_SIZE (1L << 30) /* ................. 1 GB */
#define LCD_KERNEL_MODULE_REGION_SIZE (2UL << 30) /* ........... 2 GBs */
/* Component Sizes. */
#define LCD_UTCB_SIZE PAGE_SIZE /* ........................... 4 KBs */
#define LCD_BOOTSTRAP_PAGES_SIZE (1 * PAGE_SIZE) /* .......... 4 KBs */
#define LCD_BOOTSTRAP_PAGE_TABLES_SIZE (2 * PAGE_SIZE) /* .... 8 KBs */
#define LCD_STACK_SIZE (2 * PAGE_SIZE) /* .................... 8 KBs */
#define LCD_UTCB_SIZE PAGE_SIZE /* ........................... 4 KBs */
#define LCD_BOOTSTRAP_PAGES_SIZE (1 * PAGE_SIZE) /* .......... 4 KBs */
#define LCD_BOOTSTRAP_PAGE_TABLES_SIZE (16 * PAGE_SIZE) /* ... 64 KBs */
#define LCD_STACK_SIZE (2 * PAGE_SIZE) /* .................... 8 KBs */
/* Orders (for convenience) */
......@@ -201,19 +199,19 @@
/* HOLE */
#define LCD_RAM_MAP_REGION_OFFSET \
(LCD_HEAP_REGION_OFFSET + LCD_HEAP_REGION_SIZE + (2UL << 30))
(LCD_HEAP_REGION_OFFSET + LCD_HEAP_REGION_SIZE + (1UL << 30))
#define LCD_RAM_MAP_OFFSET LCD_RAM_MAP_REGION_OFFSET
/* HOLE */
#define LCD_IOREMAP_REGION_OFFSET \
(LCD_RAM_MAP_REGION_OFFSET + LCD_RAM_MAP_REGION_SIZE + (48UL << 30))
(LCD_RAM_MAP_REGION_OFFSET + LCD_RAM_MAP_REGION_SIZE + (1UL << 30))
#define LCD_IOREMAP_OFFSET LCD_IOREMAP_REGION_OFFSET
/* HOLE */
#define LCD_KERNEL_MODULE_REGION_OFFSET \
(LCD_IOREMAP_REGION_OFFSET + LCD_IOREMAP_REGION_SIZE + (190UL << 30))
(LCD_IOREMAP_REGION_OFFSET + LCD_IOREMAP_REGION_SIZE + (500UL << 30))
/* Addresses */
......@@ -269,7 +267,7 @@ __lcd_build_checks__(void)
BUILD_BUG_ON(sizeof(struct lcd_boot_info) > LCD_BOOTSTRAP_PAGES_SIZE);
/* We need exactly two page tables for the gv address space. */
BUILD_BUG_ON(LCD_BOOTSTRAP_PAGE_TABLES_SIZE >> PAGE_SHIFT != 2);
BUILD_BUG_ON(LCD_BOOTSTRAP_PAGE_TABLES_SIZE >> PAGE_SHIFT != 16);
/* Number of LCD stack pages needs to be a power-of-two multiple. */
BUILD_BUG_ON_NOT_POWER_OF_2(LCD_STACK_SIZE >> PAGE_SHIFT);
......@@ -280,6 +278,11 @@ __lcd_build_checks__(void)
* starting gv address for the kernel map in the host. */
BUILD_BUG_ON(gva_val(LCD_KERNEL_MODULE_REGION_GV_ADDR) !=
__START_KERNEL_map);
/* All memory should fit into 512 GBs. This is because we only
* use one pud (internally in common/module_create.c). */
BUILD_BUG_ON((LCD_KERNEL_MODULE_REGION_OFFSET +
LCD_KERNEL_MODULE_REGION_SIZE) >= (512UL << 30));
}
/* HELPERS -------------------------------------------------- */
......
......@@ -126,8 +126,7 @@ struct lcd_create_ctx {
* All of these are *creator-relative* addresses
*/
struct lcd_boot_info *lcd_boot_info;
pgd_t *gv_pgd;
pud_t *gv_pud;
void *gv_pg_tables;
void *stack;
void *m_init_bits;
void *m_core_bits;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment