init_64.c 18.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 *  linux/arch/x86_64/mm/init.c
 *
 *  Copyright (C) 1995  Linus Torvalds
 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
 *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
 */

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/proc_fs.h>
24
#include <linux/pci.h>
25
#include <linux/pfn.h>
26
#include <linux/poison.h>
27
#include <linux/dma-mapping.h>
28
29
#include <linux/module.h>
#include <linux/memory_hotplug.h>
30
#include <linux/nmi.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44

#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/dma.h>
#include <asm/fixmap.h>
#include <asm/e820.h>
#include <asm/apic.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/smp.h>
45
#include <asm/sections.h>
Linus Torvalds's avatar
Linus Torvalds committed
46
47
48
49
50

#ifndef Dprintk
#define Dprintk(x...)
#endif

51
const struct dma_mapping_ops* dma_ops;
52
53
EXPORT_SYMBOL(dma_ops);

54
55
static unsigned long dma_reserve __initdata;

Linus Torvalds's avatar
Linus Torvalds committed
56
57
58
59
60
61
62
63
64
65
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);

/*
 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
 * physical space so we can cache the place of the first one and move
 * around without checking the pgd every time.
 */

void show_mem(void)
{
66
67
	long i, total = 0, reserved = 0;
	long shared = 0, cached = 0;
Linus Torvalds's avatar
Linus Torvalds committed
68
69
70
	pg_data_t *pgdat;
	struct page *page;

71
	printk(KERN_INFO "Mem-info:\n");
Linus Torvalds's avatar
Linus Torvalds committed
72
	show_free_areas();
73
	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
Linus Torvalds's avatar
Linus Torvalds committed
74

75
	for_each_online_pgdat(pgdat) {
Linus Torvalds's avatar
Linus Torvalds committed
76
               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77
78
79
80
81
			/* this loop can take a while with 256 GB and 4k pages
			   so update the NMI watchdog */
			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
				touch_nmi_watchdog();
			}
Bob Picco's avatar
Bob Picco committed
82
83
			if (!pfn_valid(pgdat->node_start_pfn + i))
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
84
85
			page = pfn_to_page(pgdat->node_start_pfn + i);
			total++;
86
87
88
89
90
91
			if (PageReserved(page))
				reserved++;
			else if (PageSwapCache(page))
				cached++;
			else if (page_count(page))
				shared += page_count(page) - 1;
Linus Torvalds's avatar
Linus Torvalds committed
92
93
               }
	}
94
95
96
97
	printk(KERN_INFO "%lu pages of RAM\n", total);
	printk(KERN_INFO "%lu reserved pages\n",reserved);
	printk(KERN_INFO "%lu pages shared\n",shared);
	printk(KERN_INFO "%lu pages swap cached\n",cached);
Linus Torvalds's avatar
Linus Torvalds committed
98
99
100
101
}

int after_bootmem;

102
static __init void *spp_getpage(void)
Linus Torvalds's avatar
Linus Torvalds committed
103
104
105
106
107
108
109
110
111
112
113
114
115
{ 
	void *ptr;
	if (after_bootmem)
		ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
	else
		ptr = alloc_bootmem_pages(PAGE_SIZE);
	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");

	Dprintk("spp_getpage %p\n", ptr);
	return ptr;
} 

116
static __init void set_pte_phys(unsigned long vaddr,
Linus Torvalds's avatar
Linus Torvalds committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
			 unsigned long phys, pgprot_t prot)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte, new_pte;

	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);

	pgd = pgd_offset_k(vaddr);
	if (pgd_none(*pgd)) {
		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
		return;
	}
	pud = pud_offset(pgd, vaddr);
	if (pud_none(*pud)) {
		pmd = (pmd_t *) spp_getpage(); 
		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
		if (pmd != pmd_offset(pud, 0)) {
			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
			return;
		}
	}
	pmd = pmd_offset(pud, vaddr);
	if (pmd_none(*pmd)) {
		pte = (pte_t *) spp_getpage();
		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
		if (pte != pte_offset_kernel(pmd, 0)) {
			printk("PAGETABLE BUG #02!\n");
			return;
		}
	}
	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);

	pte = pte_offset_kernel(pmd, vaddr);
	if (!pte_none(*pte) &&
	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
		pte_ERROR(*pte);
	set_pte(pte, new_pte);

	/*
	 * It's enough to flush this one mapping.
	 * (PGE mappings get flushed as well)
	 */
	__flush_tlb_one(vaddr);
}

/* NOTE: this is meant to be run only at boot */
165
166
void __init 
__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
Linus Torvalds's avatar
Linus Torvalds committed
167
168
169
170
171
172
173
174
175
176
{
	unsigned long address = __fix_to_virt(idx);

	if (idx >= __end_of_fixed_addresses) {
		printk("Invalid __set_fixmap\n");
		return;
	}
	set_pte_phys(address, phys, prot);
}

177
unsigned long __meminitdata table_start, table_end;
Linus Torvalds's avatar
Linus Torvalds committed
178

179
static __meminit void *alloc_low_page(unsigned long *phys)
Linus Torvalds's avatar
Linus Torvalds committed
180
{ 
181
	unsigned long pfn = table_end++;
Linus Torvalds's avatar
Linus Torvalds committed
182
183
	void *adr;

184
185
186
187
188
189
	if (after_bootmem) {
		adr = (void *)get_zeroed_page(GFP_ATOMIC);
		*phys = __pa(adr);
		return adr;
	}

Linus Torvalds's avatar
Linus Torvalds committed
190
191
	if (pfn >= end_pfn) 
		panic("alloc_low_page: ran out of memory"); 
192
193

	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
194
	memset(adr, 0, PAGE_SIZE);
195
196
197
	*phys  = pfn * PAGE_SIZE;
	return adr;
}
Linus Torvalds's avatar
Linus Torvalds committed
198

199
static __meminit void unmap_low_page(void *adr)
Linus Torvalds's avatar
Linus Torvalds committed
200
{ 
201
202
203
204

	if (after_bootmem)
		return;

205
	early_iounmap(adr, PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
206
207
} 

208
/* Must run before zap_low_mappings */
209
__meminit void *early_ioremap(unsigned long addr, unsigned long size)
210
{
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
	unsigned long vaddr;
	pmd_t *pmd, *last_pmd;
	int i, pmds;

	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
	vaddr = __START_KERNEL_map;
	pmd = level2_kernel_pgt;
	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
		for (i = 0; i < pmds; i++) {
			if (pmd_present(pmd[i]))
				goto next;
		}
		vaddr += addr & ~PMD_MASK;
		addr &= PMD_MASK;
		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
		__flush_tlb();
		return (void *)vaddr;
	next:
		;
232
	}
233
234
	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
	return NULL;
235
236
237
}

/* To avoid virtual aliases later */
238
__meminit void early_iounmap(void *addr, unsigned long size)
239
{
240
241
242
243
244
245
246
247
248
	unsigned long vaddr;
	pmd_t *pmd;
	int i, pmds;

	vaddr = (unsigned long)addr;
	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
	pmd = level2_kernel_pgt + pmd_index(vaddr);
	for (i = 0; i < pmds; i++)
		pmd_clear(pmd + i);
249
250
251
	__flush_tlb();
}

252
static void __meminit
253
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
254
{
255
	int i = pmd_index(address);
256

257
	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
258
		unsigned long entry;
259
		pmd_t *pmd = pmd_page + pmd_index(address);
260

261
262
263
264
		if (address >= end) {
			if (!after_bootmem)
				for (; i < PTRS_PER_PMD; i++, pmd++)
					set_pmd(pmd, __pmd(0));
265
266
			break;
		}
267
268
269
270

		if (pmd_val(*pmd))
			continue;

271
272
273
274
275
276
277
278
279
		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
		entry &= __supported_pte_mask;
		set_pmd(pmd, __pmd(entry));
	}
}

static void __meminit
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
{
280
281
282
283
284
	pmd_t *pmd = pmd_offset(pud,0);
	spin_lock(&init_mm.page_table_lock);
	phys_pmd_init(pmd, address, end);
	spin_unlock(&init_mm.page_table_lock);
	__flush_tlb_all();
285
286
}

287
static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
288
{ 
289
	int i = pud_index(addr);
290
291


292
293
294
	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
		unsigned long pmd_phys;
		pud_t *pud = pud_page + pud_index(addr);
Linus Torvalds's avatar
Linus Torvalds committed
295
296
		pmd_t *pmd;

297
		if (addr >= end)
Linus Torvalds's avatar
Linus Torvalds committed
298
299
			break;

300
		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
Linus Torvalds's avatar
Linus Torvalds committed
301
302
303
304
			set_pud(pud, __pud(0)); 
			continue;
		} 

305
306
307
308
309
		if (pud_val(*pud)) {
			phys_pmd_update(pud, addr, end);
			continue;
		}

310
		pmd = alloc_low_page(&pmd_phys);
311
		spin_lock(&init_mm.page_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
312
		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313
		phys_pmd_init(pmd, addr, end);
314
		spin_unlock(&init_mm.page_table_lock);
315
		unmap_low_page(pmd);
Linus Torvalds's avatar
Linus Torvalds committed
316
317
318
319
320
321
	}
	__flush_tlb();
} 

static void __init find_early_table_space(unsigned long end)
{
322
	unsigned long puds, pmds, tables, start;
Linus Torvalds's avatar
Linus Torvalds committed
323
324
325
326
327
328

	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);

329
330
331
332
333
 	/* RED-PEN putting page tables only on node 0 could
 	   cause a hotspot and fill up ZONE_DMA. The page tables
 	   need roughly 0.5KB per GB. */
 	start = 0x8000;
 	table_start = find_e820_area(start, end, tables);
Linus Torvalds's avatar
Linus Torvalds committed
334
335
336
337
338
	if (table_start == -1UL)
		panic("Cannot find space for the kernel page tables");

	table_start >>= PAGE_SHIFT;
	table_end = table_start;
339
340

	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
341
342
		end, table_start << PAGE_SHIFT,
		(table_start << PAGE_SHIFT) + tables);
Linus Torvalds's avatar
Linus Torvalds committed
343
344
345
346
347
}

/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
   This runs before bootmem is initialized and gets pages directly from the 
   physical memory. To access them they are temporarily mapped. */
348
void __meminit init_memory_mapping(unsigned long start, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
349
350
351
352
353
354
355
356
357
358
359
{ 
	unsigned long next; 

	Dprintk("init_memory_mapping\n");

	/* 
	 * Find space for the kernel direct mapping tables.
	 * Later we should allocate these tables in the local node of the memory
	 * mapped.  Unfortunately this is done currently before the nodes are 
	 * discovered.
	 */
360
361
	if (!after_bootmem)
		find_early_table_space(end);
Linus Torvalds's avatar
Linus Torvalds committed
362
363
364
365
366
367

	start = (unsigned long)__va(start);
	end = (unsigned long)__va(end);

	for (; start < end; start = next) {
		unsigned long pud_phys; 
368
369
370
371
		pgd_t *pgd = pgd_offset_k(start);
		pud_t *pud;

		if (after_bootmem)
372
			pud = pud_offset(pgd, start & PGDIR_MASK);
373
		else
374
			pud = alloc_low_page(&pud_phys);
375

Linus Torvalds's avatar
Linus Torvalds committed
376
377
378
379
		next = start + PGDIR_SIZE;
		if (next > end) 
			next = end; 
		phys_pud_init(pud, __pa(start), __pa(next));
380
381
		if (!after_bootmem)
			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
382
		unmap_low_page(pud);
Linus Torvalds's avatar
Linus Torvalds committed
383
384
	} 

385
	if (!after_bootmem)
386
		mmu_cr4_features = read_cr4();
Linus Torvalds's avatar
Linus Torvalds committed
387
388
389
	__flush_tlb_all();
}

390
#ifndef CONFIG_NUMA
Linus Torvalds's avatar
Linus Torvalds committed
391
392
void __init paging_init(void)
{
393
394
395
396
397
398
	unsigned long max_zone_pfns[MAX_NR_ZONES];
	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
	max_zone_pfns[ZONE_NORMAL] = end_pfn;

399
400
	memory_present(0, 0, end_pfn);
	sparse_init();
401
	free_area_init_nodes(max_zone_pfns);
Linus Torvalds's avatar
Linus Torvalds committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
}
#endif

/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
   from the CPU leading to inconsistent cache lines. address and size
   must be aligned to 2MB boundaries. 
   Does nothing when the mapping doesn't exist. */
void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
{
	unsigned long end = address + size;

	BUG_ON(address & ~LARGE_PAGE_MASK);
	BUG_ON(size & ~LARGE_PAGE_MASK); 
	
	for (; address < end; address += LARGE_PAGE_SIZE) { 
		pgd_t *pgd = pgd_offset_k(address);
		pud_t *pud;
		pmd_t *pmd;
		if (pgd_none(*pgd))
			continue;
		pud = pud_offset(pgd, address);
		if (pud_none(*pud))
			continue; 
		pmd = pmd_offset(pud, address);
		if (!pmd || pmd_none(*pmd))
			continue; 
		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
			/* Could handle this, but it should not happen currently. */
			printk(KERN_ERR 
	       "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
			pmd_ERROR(*pmd); 
		}
		set_pmd(pmd, __pmd(0)); 		
	}
	__flush_tlb_all();
} 

439
440
441
442
443
444
/*
 * Memory hotplug specific functions
 */
void online_page(struct page *page)
{
	ClearPageReserved(page);
445
	init_page_count(page);
446
447
448
449
450
	__free_page(page);
	totalram_pages++;
	num_physpages++;
}

451
#ifdef CONFIG_MEMORY_HOTPLUG
452
453
454
455
/*
 * Memory is added always to NORMAL zone. This means you will never get
 * additional DMA/DMA32 memory.
 */
456
int arch_add_memory(int nid, u64 start, u64 size)
457
{
458
	struct pglist_data *pgdat = NODE_DATA(nid);
459
	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
460
461
462
463
	unsigned long start_pfn = start >> PAGE_SHIFT;
	unsigned long nr_pages = size >> PAGE_SHIFT;
	int ret;

464
465
	init_memory_mapping(start, (start + size -1));

466
467
468
469
470
471
472
473
474
	ret = __add_pages(zone, start_pfn, nr_pages);
	if (ret)
		goto error;

	return ret;
error:
	printk("%s: Problem encountered in __add_pages!\n", __func__);
	return ret;
}
475
EXPORT_SYMBOL_GPL(arch_add_memory);
476
477
478
479
480
481
482

int remove_memory(u64 start, u64 size)
{
	return -EINVAL;
}
EXPORT_SYMBOL_GPL(remove_memory);

483
#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
484
485
486
487
int memory_add_physaddr_to_nid(u64 start)
{
	return 0;
}
488
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
489
490
#endif

491
492
493
#endif /* CONFIG_MEMORY_HOTPLUG */

#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
/*
 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
 * just online the pages.
 */
int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
{
	int err = -EIO;
	unsigned long pfn;
	unsigned long total = 0, mem = 0;
	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
		if (pfn_valid(pfn)) {
			online_page(pfn_to_page(pfn));
			err = 0;
			mem++;
		}
		total++;
	}
	if (!err) {
		z->spanned_pages += total;
		z->present_pages += mem;
		z->zone_pgdat->node_spanned_pages += total;
		z->zone_pgdat->node_present_pages += mem;
	}
	return err;
}
519
#endif
520

Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
524
525
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
			 kcore_vsyscall;

void __init mem_init(void)
{
526
	long codesize, reservedpages, datasize, initsize;
Linus Torvalds's avatar
Linus Torvalds committed
527

528
	pci_iommu_alloc();
Linus Torvalds's avatar
Linus Torvalds committed
529
530
531
532
533
534
535

	/* clear the zero-page */
	memset(empty_zero_page, 0, PAGE_SIZE);

	reservedpages = 0;

	/* this will put all low memory onto the freelists */
536
#ifdef CONFIG_NUMA
537
	totalram_pages = numa_free_all_bootmem();
Linus Torvalds's avatar
Linus Torvalds committed
538
#else
539
	totalram_pages = free_all_bootmem();
Linus Torvalds's avatar
Linus Torvalds committed
540
#endif
541
542
	reservedpages = end_pfn - totalram_pages -
					absent_pages_in_range(0, end_pfn);
Linus Torvalds's avatar
Linus Torvalds committed
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558

	after_bootmem = 1;

	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;

	/* Register memory areas for /proc/kcore */
	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
		   VMALLOC_END-VMALLOC_START);
	kclist_add(&kcore_kernel, &_stext, _end - _stext);
	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
				 VSYSCALL_END - VSYSCALL_START);

559
	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
Linus Torvalds's avatar
Linus Torvalds committed
560
561
562
563
564
565
566
567
		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
		end_pfn << (PAGE_SHIFT-10),
		codesize >> 10,
		reservedpages << (PAGE_SHIFT-10),
		datasize >> 10,
		initsize >> 10);
}

568
void free_init_pages(char *what, unsigned long begin, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
569
570
571
{
	unsigned long addr;

572
573
574
	if (begin >= end)
		return;

575
	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
576
	for (addr = begin; addr < end; addr += PAGE_SIZE) {
577
578
579
580
		ClearPageReserved(virt_to_page(addr));
		init_page_count(virt_to_page(addr));
		memset((void *)(addr & ~(PAGE_SIZE-1)),
			POISON_FREE_INITMEM, PAGE_SIZE);
581
582
		if (addr >= __START_KERNEL_map)
			change_page_attr_addr(addr, 1, __pgprot(0));
583
		free_page(addr);
Linus Torvalds's avatar
Linus Torvalds committed
584
585
		totalram_pages++;
	}
586
587
	if (addr > __START_KERNEL_map)
		global_flush_tlb();
588
589
590
591
592
}

void free_initmem(void)
{
	free_init_pages("unused kernel memory",
593
594
			(unsigned long)(&__init_begin),
			(unsigned long)(&__init_end));
Linus Torvalds's avatar
Linus Torvalds committed
595
596
}

597
598
599
600
#ifdef CONFIG_DEBUG_RODATA

void mark_rodata_ro(void)
{
601
	unsigned long start = (unsigned long)_stext, end;
602

603
604
605
606
607
608
609
610
611
612
#ifdef CONFIG_HOTPLUG_CPU
	/* It must still be possible to apply SMP alternatives. */
	if (num_possible_cpus() > 1)
		start = (unsigned long)_etext;
#endif

#ifdef CONFIG_KPROBES
	start = (unsigned long)__start_rodata;
#endif
	
613
614
615
616
617
618
619
	end = (unsigned long)__end_rodata;
	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
	end &= PAGE_MASK;
	if (end <= start)
		return;

	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
620

621
	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
622
	       (end - start) >> 10);
623
624
625
626
627
628
629
630
631
632
633

	/*
	 * change_page_attr_addr() requires a global_flush_tlb() call after it.
	 * We do this after the printk so that if something went wrong in the
	 * change, the printk gets out at least to give a better debug hint
	 * of who is the culprit.
	 */
	global_flush_tlb();
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
634
635
636
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
637
	free_init_pages("initrd memory", start, end);
Linus Torvalds's avatar
Linus Torvalds committed
638
639
640
641
642
}
#endif

void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
{ 
643
#ifdef CONFIG_NUMA
Linus Torvalds's avatar
Linus Torvalds committed
644
	int nid = phys_to_nid(phys);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
#endif
	unsigned long pfn = phys >> PAGE_SHIFT;
	if (pfn >= end_pfn) {
		/* This can happen with kdump kernels when accessing firmware
		   tables. */
		if (pfn < end_pfn_map)
			return;
		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
				phys, len);
		return;
	}

	/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
Linus Torvalds's avatar
Linus Torvalds committed
659
660
661
662
  	reserve_bootmem_node(NODE_DATA(nid), phys, len);
#else       		
	reserve_bootmem(phys, len);    
#endif
663
	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
664
		dma_reserve += len / PAGE_SIZE;
665
666
		set_dma_reserve(dma_reserve);
	}
Linus Torvalds's avatar
Linus Torvalds committed
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
}

int kern_addr_valid(unsigned long addr) 
{ 
	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
       pgd_t *pgd;
       pud_t *pud;
       pmd_t *pmd;
       pte_t *pte;

	if (above != 0 && above != -1UL)
		return 0; 
	
	pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd))
		return 0;

	pud = pud_offset(pgd, addr);
	if (pud_none(*pud))
		return 0; 

	pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd))
		return 0;
	if (pmd_large(*pmd))
		return pfn_valid(pmd_pfn(*pmd));

	pte = pte_offset_kernel(pmd, addr);
	if (pte_none(*pte))
		return 0;
	return pfn_valid(pte_pfn(*pte));
}

700
/* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
701
702
   covers the 64bit vsyscall page now. 32bit has a real VMA now and does
   not need special handling anymore. */
Linus Torvalds's avatar
Linus Torvalds committed
703
704
705

static struct vm_area_struct gate_vma = {
	.vm_start = VSYSCALL_START,
706
707
708
	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
	.vm_page_prot = PAGE_READONLY_EXEC,
	.vm_flags = VM_READ | VM_EXEC
Linus Torvalds's avatar
Linus Torvalds committed
709
710
711
712
713
};

struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
#ifdef CONFIG_IA32_EMULATION
714
715
	if (test_tsk_thread_flag(tsk, TIF_IA32))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
716
717
718
719
720
721
722
#endif
	return &gate_vma;
}

int in_gate_area(struct task_struct *task, unsigned long addr)
{
	struct vm_area_struct *vma = get_gate_vma(task);
723
724
	if (!vma)
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
725
726
727
728
729
730
731
732
733
	return (addr >= vma->vm_start) && (addr < vma->vm_end);
}

/* Use this when you have no reliable task/vma, typically from interrupt
 * context.  It is less reliable than using the task's vma and may give
 * false positives.
 */
int in_gate_area_no_task(unsigned long addr)
{
734
	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
Linus Torvalds's avatar
Linus Torvalds committed
735
}
736

737
void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
738
739
740
741
{
	return __alloc_bootmem_core(pgdat->bdata, size,
			SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
}
742
743
744
745
746
747
748
749
750

const char *arch_vma_name(struct vm_area_struct *vma)
{
	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
		return "[vdso]";
	if (vma == &gate_vma)
		return "[vsyscall]";
	return NULL;
}