Commit 2ef14f46 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm changes from Peter Anvin:
 "This is a huge set of several partly interrelated (and concurrently
  developed) changes, which is why the branch history is messier than
  one would like.

  The *really* big items are two humonguous patchsets mostly developed
  by Yinghai Lu at my request, which completely revamps the way we
  create initial page tables.  In particular, rather than estimating how
  much memory we will need for page tables and then build them into that
  memory -- a calculation that has shown to be incredibly fragile -- we
  now build them (on 64 bits) with the aid of a "pseudo-linear mode" --
  a #PF handler which creates temporary page tables on demand.

  This has several advantages:

  1. It makes it much easier to support things that need access to data
     very early (a followon patchset uses this to load microcode way
     early in the kernel startup).

  2. It allows the kernel and all the kernel data objects to be invoked
     from above the 4 GB limit.  This allows kdump to work on very large
     systems.

  3. It greatly reduces the difference between Xen and native (Xen's
     equivalent of the #PF handler are the temporary page tables created
     by the domain builder), eliminating a bunch of fragile hooks.

  The patch series also gets us a bit closer to W^X.

  Additional work in this pull is the 64-bit get_user() work which you
  were also involved with, and a bunch of cleanups/speedups to
  __phys_addr()/__pa()."

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (105 commits)
  x86, mm: Move reserving low memory later in initialization
  x86, doc: Clarify the use of asm("%edx") in uaccess.h
  x86, mm: Redesign get_user with a __builtin_choose_expr hack
  x86: Be consistent with data size in getuser.S
  x86, mm: Use a bitfield to mask nuisance get_user() warnings
  x86/kvm: Fix compile warning in kvm_register_steal_time()
  x86-32: Add support for 64bit get_user()
  x86-32, mm: Remove reference to alloc_remap()
  x86-32, mm: Remove reference to resume_map_numa_kva()
  x86-32, mm: Rip out x86_32 NUMA remapping code
  x86/numa: Use __pa_nodebug() instead
  x86: Don't panic if can not alloc buffer for swiotlb
  mm: Add alloc_bootmem_low_pages_nopanic()
  x86, 64bit, mm: hibernate use generic mapping_init
  x86, 64bit, mm: Mark data/bss/brk to nx
  x86: Merge early kernel reserve for 32bit and 64bit
  x86: Add Crash kernel low reservation
  x86, kdump: Remove crashkernel range find limit for 64bit
  memblock: Add memblock_mem_size()
  x86, boot: Not need to check setup_header version for setup_data
  ...
parents cb715a83 0da3e7f5
......@@ -594,6 +594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
is selected automatically. Check
Documentation/kdump/kdump.txt for further details.
crashkernel_low=size[KMG]
[KNL, x86] parts under 4G.
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
......
......@@ -1055,6 +1055,44 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
address of the struct boot_params; %ebp, %edi and %ebx must be zero.
**** 64-bit BOOT PROTOCOL
For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
and we need a 64-bit boot protocol.
In 64-bit boot protocol, the first step in loading a Linux kernel
should be to setup the boot parameters (struct boot_params,
traditionally known as "zero page"). The memory for struct boot_params
could be allocated anywhere (even above 4G) and initialized to all zero.
Then, the setup header at offset 0x01f1 of kernel image on should be
loaded into struct boot_params and examined. The end of setup header
can be calculated as follows:
0x0202 + byte value at offset 0x0201
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
also fill the additional fields of the struct boot_params as described
in zero-page.txt.
After setting up the struct boot_params, the boot loader can load
64-bit kernel in the same way as that of 16-bit boot protocol, but
kernel could be loaded above 4G.
In 64-bit boot protocol, the kernel is started by jumping to the
64-bit kernel entry point, which is the start address of loaded
64-bit kernel plus 0x200.
At entry, the CPU must be in 64-bit mode with paging enabled.
The range with setup_header.init_size from start address of loaded
kernel and zero page and command line buffer get ident mapping;
a GDT must be loaded with the descriptors for selectors
__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
address of the struct boot_params.
**** EFI HANDOVER PROTOCOL
This protocol allows boot loaders to defer initialisation to the EFI
......
......@@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void)
octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1);
if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
panic("Cannot allocate SWIOTLB buffer");
mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops;
}
......
......@@ -2027,6 +2027,16 @@ static void __init patch_tlb_miss_handler_bitmap(void)
flushi(&valid_addr_bitmap_insn[0]);
}
static void __init register_page_bootmem_info(void)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES
int i;
for_each_online_node(i)
if (NODE_DATA(i)->node_spanned_pages)
register_page_bootmem_info_node(NODE_DATA(i));
#endif
}
void __init mem_init(void)
{
unsigned long codepages, datapages, initpages;
......@@ -2044,20 +2054,8 @@ void __init mem_init(void)
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
#ifdef CONFIG_NEED_MULTIPLE_NODES
{
int i;
for_each_online_node(i) {
if (NODE_DATA(i)->node_spanned_pages != 0) {
totalram_pages +=
free_all_bootmem_node(NODE_DATA(i));
}
}
totalram_pages += free_low_memory_core_early(MAX_NUMNODES);
}
#else
register_page_bootmem_info();
totalram_pages = free_all_bootmem();
#endif
/* We subtract one to account for the mem_map_zero page
* allocated below.
......
......@@ -1277,10 +1277,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
......
......@@ -285,16 +285,26 @@ struct biosregs {
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
/* cmdline.c */
int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize);
int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option);
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize);
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
if (cmd_line_ptr >= 0x100000)
return -1; /* inaccessible */
return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
}
static inline int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option);
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
if (cmd_line_ptr >= 0x100000)
return -1; /* inaccessible */
return __cmdline_find_option_bool(cmd_line_ptr, option);
}
......
......@@ -27,7 +27,7 @@ static inline int myisspace(u8 c)
* Returns the length of the argument (regardless of if it was
* truncated to fit in the buffer), or -1 on not found.
*/
int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize)
int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
{
addr_t cptr;
char c;
......@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
st_bufcpy /* Copying this to buffer */
} state = st_wordstart;
if (!cmdline_ptr || cmdline_ptr >= 0x100000)
return -1; /* No command line, or inaccessible */
if (!cmdline_ptr)
return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
......@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
* Returns the position of that option (starts counting with 1)
* or 0 on not found
*/
int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
{
addr_t cptr;
char c;
......@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
st_wordskip, /* Miscompare, skip */
} state = st_wordstart;
if (!cmdline_ptr || cmdline_ptr >= 0x100000)
return -1; /* No command line, or inaccessible */
if (!cmdline_ptr)
return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
......
......@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr)
return *((char *)(fs + addr));
}
#include "../cmdline.c"
static unsigned long get_cmd_line_ptr(void)
{
unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
return cmd_line_ptr;
}
int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize);
return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
}
int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
}
#endif
......@@ -37,6 +37,12 @@
__HEAD
.code32
ENTRY(startup_32)
/*
* 32bit entry is 0 and it is ABI so immutable!
* If we come here directly from a bootloader,
* kernel(text+data+bss+brk) ramdisk, zero_page, command line
* all need to be under the 4G limit.
*/
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
......@@ -154,6 +160,12 @@ ENTRY(startup_32)
btsl $_EFER_LME, %eax
wrmsr
/* After gdt is loaded */
xorl %eax, %eax
lldt %ax
movl $0x20, %eax
ltr %ax
/*
* Setup for the jump to 64bit mode
*
......@@ -176,28 +188,18 @@ ENTRY(startup_32)
lret
ENDPROC(startup_32)
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
hlt
jmp 1b
#include "../../kernel/verify_cpu.S"
/*
* Be careful here startup_64 needs to be at a predictable
* address so I can export it in an ELF header. Bootloaders
* should look at the ELF header to find this address, as
* it may change in the future.
*/
.code64
.org 0x200
ENTRY(startup_64)
/*
* 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
* 64bit bootloader. If we come here from a bootloader we depend on
* an identity mapped page table being provied that maps our
* entire text+data+bss and hopefully all of memory.
* 64bit bootloader.
* If we come here from a bootloader, kernel(text+data+bss+brk),
* ramdisk, zero_page, command line could be above 4G.
* We depend on an identity mapped page table being provided
* that maps our entire kernel(text+data+bss+brk), zero page
* and command line.
*/
#ifdef CONFIG_EFI_STUB
/*
......@@ -247,9 +249,6 @@ preferred_addr:
movl %eax, %ss
movl %eax, %fs
movl %eax, %gs
lldt %ax
movl $0x20, %eax
ltr %ax
/*
* Compute the decompressed kernel start address. It is where
......@@ -349,6 +348,15 @@ relocated:
*/
jmp *%rbp
.code32
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
hlt
jmp 1b
#include "../../kernel/verify_cpu.S"
.data
gdt:
.word gdt_end - gdt
......
......@@ -374,6 +374,14 @@ xloadflags:
#else
# define XLF0 0
#endif
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
/* kernel/boot_param/ramdisk could be loaded above 4g */
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
#else
# define XLF1 0
#endif
#ifdef CONFIG_EFI_STUB
# ifdef CONFIG_X86_64
# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
......@@ -383,7 +391,7 @@ xloadflags:
#else
# define XLF23 0
#endif
.word XLF0 | XLF23
.word XLF0 | XLF1 | XLF23
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
......
#ifndef _ASM_X86_INIT_32_H
#define _ASM_X86_INIT_32_H
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
#ifdef CONFIG_X86_32
extern void __init early_ioremap_page_table_range_init(void);
#endif
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long pmd_flag; /* page flag for PMD entry */
bool kernel_mapping; /* kernel mapping or ident mapping */
};
extern void __init zone_sizes_init(void);
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long addr, unsigned long end);
extern unsigned long __init
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
extern unsigned long __initdata pgt_buf_start;
extern unsigned long __meminitdata pgt_buf_end;
extern unsigned long __meminitdata pgt_buf_top;
#endif /* _ASM_X86_INIT_32_H */
#endif /* _ASM_X86_INIT_H */
......@@ -48,11 +48,11 @@
# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
#else
/* Maximum physical address we can use pages from */
# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL)
# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
/* Maximum address we can reach in physical address mode */
# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL)
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
/* Allocate one page for the pdp and the second for the code */
# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
......
......@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
#include <asm/numaq.h>
extern void resume_map_numa_kva(pgd_t *pgd);
#else /* !CONFIG_NUMA */
static inline void resume_map_numa_kva(pgd_t *pgd) {}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
......
......@@ -54,8 +54,6 @@ static inline int numa_cpu_node(int cpu)
#ifdef CONFIG_X86_32
# include <asm/numa_32.h>
#else
# include <asm/numa_64.h>
#endif
#ifdef CONFIG_NUMA
......
#ifndef _ASM_X86_NUMA_64_H
#define _ASM_X86_NUMA_64_H
extern unsigned long numa_free_all_bootmem(void);
#endif /* _ASM_X86_NUMA_64_H */
......@@ -17,6 +17,10 @@
struct page;
#include <linux/range.h>
extern struct range pfn_mapped[];
extern int nr_pfn_mapped;
static inline void clear_user_page(void *page, unsigned long vaddr,
struct page *pg)
{
......@@ -44,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* case properly. Once all supported versions of gcc understand it, we can
* remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
*/
#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x)))
#define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
......
......@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
#endif
#define __phys_addr_symbol(x) __phys_addr(x)
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
#ifdef CONFIG_FLATMEM
......
......@@ -3,4 +3,40 @@
#include <asm/page_64_types.h>
#ifndef __ASSEMBLY__
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* use the carry flag to determine if x was < __START_KERNEL_map */
x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
return x;
}
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
extern unsigned long __phys_addr_symbol(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
#define __phys_addr_symbol(x) \
((unsigned long)(x) - __START_KERNEL_map + phys_base)
#endif
#define __phys_reloc_hide(x) (x)
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
void clear_page(void *page);
void copy_page(void *to, void *from);
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PAGE_64_H */
......@@ -50,26 +50,4 @@
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
#ifndef __ASSEMBLY__
void clear_page(void *page);
void copy_page(void *to, void *from);
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
extern unsigned long __phys_addr(unsigned long);
#define __phys_reloc_hide(x) (x)
#define vmemmap ((struct page *)VMEMMAP_START)
extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
#endif /* _ASM_X86_PAGE_64_DEFS_H */
......@@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void)
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
}
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment