p2m.c 21.9 KB
Newer Older
1
2
3
4
5
/*
 * Xen leaves the responsibility for maintaining p2m mappings to the
 * guests themselves, but it must also access and update the p2m array
 * during suspend/resume when all the pages are reallocated.
 *
6
7
8
 * The logical flat p2m table is mapped to a linear kernel memory area.
 * For accesses by Xen a three-level tree linked via mfns only is set up to
 * allow the address space to be sparse.
9
 *
10
11
12
13
14
15
16
 *               Xen
 *                |
 *          p2m_top_mfn
 *              /   \
 * p2m_mid_mfn p2m_mid_mfn
 *         /           /
 *  p2m p2m p2m ...
17
18
19
 *
 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
 *
20
21
 * The p2m_top_mfn level is limited to 1 page, so the maximum representable
 * pseudo-physical address space is:
22
23
24
25
 *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
 *
 * P2M_PER_PAGE depends on the architecture, as a mfn is always
 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
26
 * 512 and 1024 entries respectively.
27
28
29
30
31
32
33
 *
 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
 *
 * However not all entries are filled with MFNs. Specifically for all other
 * leaf entries, or for the top  root, or middle one, for which there is a void
 * entry, we assume it is  "missing". So (for example)
 *  pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
34
35
36
 * We have a dedicated page p2m_missing with all entries being
 * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
 * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
37
38
39
40
41
42
 *
 * We also have the possibility of setting 1-1 mappings on certain regions, so
 * that:
 *  pfn_to_mfn(0xc0000)=0xc0000
 *
 * The benefit of this is, that we can assume for non-RAM regions (think
43
 * PCI BARs, or ACPI spaces), we can create mappings easily because we
44
45
 * get the PFN value to match the MFN.
 *
46
47
48
 * For this to work efficiently we have one new page p2m_identity. All entries
 * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
 * recognizes that and MFNs, no other fancy value).
49
50
51
52
 *
 * On lookup we spot that the entry points to p2m_identity and return the
 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
 * If the entry points to an allocated page, we just proceed as before and
53
 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
54
55
56
57
58
59
 * appropriate functions (pfn_to_mfn).
 *
 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
 * non-identity pfn. To protect ourselves against we elect to set (and get) the
 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
60
61
62
63
 */

#include <linux/init.h>
#include <linux/module.h>
64
65
#include <linux/list.h>
#include <linux/hash.h>
66
#include <linux/sched.h>
67
#include <linux/seq_file.h>
68
#include <linux/bootmem.h>
69
#include <linux/slab.h>
70
#include <linux/vmalloc.h>
71
72
73

#include <asm/cache.h>
#include <asm/setup.h>
74
#include <asm/uaccess.h>
75
76
77
78

#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
79
#include <xen/balloon.h>
80
#include <xen/grant_table.h>
81

82
#include "multicalls.h"
83
84
#include "xen-ops.h"

85
86
87
88
89
#define P2M_MID_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long *))
#define P2M_TOP_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long **))

#define MAX_P2M_PFN	(P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)

90
91
#define PMDS_PER_MID_PAGE	(P2M_MID_PER_PAGE / PTRS_PER_PTE)

92
93
94
95
unsigned long *xen_p2m_addr __read_mostly;
EXPORT_SYMBOL_GPL(xen_p2m_addr);
unsigned long xen_p2m_size __read_mostly;
EXPORT_SYMBOL_GPL(xen_p2m_size);
96
unsigned long xen_max_p2m_pfn __read_mostly;
97
EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
98

99
100
101
102
103
104
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
#else
#define P2M_LIMIT 0
#endif

105
106
static DEFINE_SPINLOCK(p2m_update_lock);

107
108
109
static unsigned long *p2m_mid_missing_mfn;
static unsigned long *p2m_top_mfn;
static unsigned long **p2m_top_mfn_p;
110
111
112
113
static unsigned long *p2m_missing;
static unsigned long *p2m_identity;
static pte_t *p2m_missing_pte;
static pte_t *p2m_identity_pte;
114

115
116
117
118
119
120
121
122
123
/*
 * Hint at last populated PFN.
 *
 * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack
 * can avoid scanning the whole P2M (which may be sized to account for
 * hotplugged memory).
 */
static unsigned long xen_p2m_last_pfn;

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
static inline unsigned p2m_top_index(unsigned long pfn)
{
	BUG_ON(pfn >= MAX_P2M_PFN);
	return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
}

static inline unsigned p2m_mid_index(unsigned long pfn)
{
	return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
}

static inline unsigned p2m_index(unsigned long pfn)
{
	return pfn % P2M_PER_PAGE;
}

static void p2m_top_mfn_init(unsigned long *top)
{
	unsigned i;

	for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = virt_to_mfn(p2m_mid_missing_mfn);
}

static void p2m_top_mfn_p_init(unsigned long **top)
{
	unsigned i;

	for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = p2m_mid_missing_mfn;
}

156
static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
157
158
159
160
{
	unsigned i;

	for (i = 0; i < P2M_MID_PER_PAGE; i++)
161
		mid[i] = virt_to_mfn(leaf);
162
163
}

164
static void p2m_init(unsigned long *p2m)
165
166
167
{
	unsigned i;

168
169
	for (i = 0; i < P2M_PER_PAGE; i++)
		p2m[i] = INVALID_P2M_ENTRY;
170
171
}

172
static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
173
174
175
{
	unsigned i;

176
177
	for (i = 0; i < P2M_PER_PAGE; i++)
		p2m[i] = IDENTITY_FRAME(pfn + i);
178
179
}

180
181
182
183
184
185
186
187
static void * __ref alloc_p2m_page(void)
{
	if (unlikely(!slab_is_available()))
		return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);

	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
}

188
static void __ref free_p2m_page(void *p)
189
{
190
191
192
193
194
	if (unlikely(!slab_is_available())) {
		free_bootmem((unsigned long)p, PAGE_SIZE);
		return;
	}

195
196
197
	free_page((unsigned long)p);
}

198
199
200
201
/*
 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
 *
 * This is called both at boot time, and after resuming from suspend:
202
 * - At boot time we're called rather early, and must use alloc_bootmem*()
203
204
205
 *   to allocate memory.
 *
 * - After resume we're called from within stop_machine, but the mfn
206
 *   tree should already be completely allocated.
207
 */
208
void __ref xen_build_mfn_list_list(void)
209
{
210
211
212
213
	unsigned long pfn, mfn;
	pte_t *ptep;
	unsigned int level, topidx, mididx;
	unsigned long *mid_mfn_p;
214

215
216
	if (xen_feature(XENFEAT_auto_translated_physmap) ||
	    xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
217
218
		return;

219
220
	/* Pre-initialize p2m_top_mfn to be completely missing */
	if (p2m_top_mfn == NULL) {
221
		p2m_mid_missing_mfn = alloc_p2m_page();
222
		p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
223

224
		p2m_top_mfn_p = alloc_p2m_page();
225
226
		p2m_top_mfn_p_init(p2m_top_mfn_p);

227
		p2m_top_mfn = alloc_p2m_page();
228
229
230
		p2m_top_mfn_init(p2m_top_mfn);
	} else {
		/* Reinitialise, mfn's all change after migration */
231
		p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
232
233
	}

234
235
236
237
	for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
	     pfn += P2M_PER_PAGE) {
		topidx = p2m_top_index(pfn);
		mididx = p2m_mid_index(pfn);
238
239

		mid_mfn_p = p2m_top_mfn_p[topidx];
240
241
242
243
244
		ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
				      &level);
		BUG_ON(!ptep || level != PG_LEVEL_4K);
		mfn = pte_mfn(*ptep);
		ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
245
246
247
248
249

		/* Don't bother allocating any mfn mid levels if
		 * they're just missing, just update the stored mfn,
		 * since all could have changed over a migrate.
		 */
250
		if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
251
252
253
254
255
256
257
258
			BUG_ON(mididx);
			BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
			p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
			pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
			continue;
		}

		if (mid_mfn_p == p2m_mid_missing_mfn) {
259
			mid_mfn_p = alloc_p2m_page();
260
			p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
261
262
263
264
265

			p2m_top_mfn_p[topidx] = mid_mfn_p;
		}

		p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
266
		mid_mfn_p[mididx] = mfn;
267
268
269
270
271
	}
}

void xen_setup_mfn_list_list(void)
{
Mukesh Rathor's avatar
Mukesh Rathor committed
272
273
274
	if (xen_feature(XENFEAT_auto_translated_physmap))
		return;

275
276
	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);

277
278
279
280
281
	if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL;
	else
		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
			virt_to_mfn(p2m_top_mfn);
282
	HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
283
284
285
286
	HYPERVISOR_shared_info->arch.p2m_generation = 0;
	HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;
	HYPERVISOR_shared_info->arch.p2m_cr3 =
		xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
287
288
289
290
291
292
293
}

/* Set up p2m_top to point to the domain-builder provided p2m pages */
void __init xen_build_dynamic_phys_to_machine(void)
{
	unsigned long pfn;

294
295
296
	 if (xen_feature(XENFEAT_auto_translated_physmap))
		return;

297
	xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
298
	xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
299

300
301
	for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
		xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
302

303
304
	xen_max_p2m_pfn = xen_p2m_size;
}
305

306
307
308
309
#define P2M_TYPE_IDENTITY	0
#define P2M_TYPE_MISSING	1
#define P2M_TYPE_PFN		2
#define P2M_TYPE_UNKNOWN	3
310

311
312
313
static int xen_p2m_elem_type(unsigned long pfn)
{
	unsigned long mfn;
314

315
316
	if (pfn >= xen_p2m_size)
		return P2M_TYPE_IDENTITY;
317

318
	mfn = xen_p2m_addr[pfn];
319

320
321
	if (mfn == INVALID_P2M_ENTRY)
		return P2M_TYPE_MISSING;
322

323
324
325
326
	if (mfn & IDENTITY_FRAME_BIT)
		return P2M_TYPE_IDENTITY;

	return P2M_TYPE_PFN;
327
}
328
329

static void __init xen_rebuild_p2m_list(unsigned long *p2m)
330
{
331
	unsigned int i, chunk;
332
	unsigned long pfn;
333
334
335
336
	unsigned long *mfns;
	pte_t *ptep;
	pmd_t *pmdp;
	int type;
337

338
339
340
341
	p2m_missing = alloc_p2m_page();
	p2m_init(p2m_missing);
	p2m_identity = alloc_p2m_page();
	p2m_init(p2m_identity);
342

343
344
345
346
347
348
	p2m_missing_pte = alloc_p2m_page();
	paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
	p2m_identity_pte = alloc_p2m_page();
	paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
	for (i = 0; i < PTRS_PER_PTE; i++) {
		set_pte(p2m_missing_pte + i,
349
			pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
350
		set_pte(p2m_identity_pte + i,
351
			pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
352
	}
353

354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
	for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
		/*
		 * Try to map missing/identity PMDs or p2m-pages if possible.
		 * We have to respect the structure of the mfn_list_list
		 * which will be built just afterwards.
		 * Chunk size to test is one p2m page if we are in the middle
		 * of a mfn_list_list mid page and the complete mid page area
		 * if we are at index 0 of the mid page. Please note that a
		 * mid page might cover more than one PMD, e.g. on 32 bit PAE
		 * kernels.
		 */
		chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
			P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;

		type = xen_p2m_elem_type(pfn);
		i = 0;
		if (type != P2M_TYPE_PFN)
			for (i = 1; i < chunk; i++)
				if (xen_p2m_elem_type(pfn + i) != type)
					break;
		if (i < chunk)
			/* Reset to minimal chunk size. */
			chunk = P2M_PER_PAGE;

		if (type == P2M_TYPE_PFN || i < chunk) {
			/* Use initial p2m page contents. */
#ifdef CONFIG_X86_64
			mfns = alloc_p2m_page();
			copy_page(mfns, xen_p2m_addr + pfn);
#else
			mfns = xen_p2m_addr + pfn;
#endif
			ptep = populate_extra_pte((unsigned long)(p2m + pfn));
			set_pte(ptep,
				pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
389
			continue;
390
		}
391

392
393
394
395
396
397
		if (chunk == P2M_PER_PAGE) {
			/* Map complete missing or identity p2m-page. */
			mfns = (type == P2M_TYPE_MISSING) ?
				p2m_missing : p2m_identity;
			ptep = populate_extra_pte((unsigned long)(p2m + pfn));
			set_pte(ptep,
398
				pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO));
399
			continue;
400
		}
401

402
403
404
405
406
		/* Complete missing or identity PMD(s) can be mapped. */
		ptep = (type == P2M_TYPE_MISSING) ?
			p2m_missing_pte : p2m_identity_pte;
		for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
			pmdp = populate_extra_pmd(
407
				(unsigned long)(p2m + pfn) + i * PMD_SIZE);
408
409
410
411
			set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
		}
	}
}
412

413
414
415
void __init xen_vmalloc_p2m_tree(void)
{
	static struct vm_struct vm;
416
	unsigned long p2m_limit;
417

418
419
	xen_p2m_last_pfn = xen_max_p2m_pfn;

420
	p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
421
	vm.flags = VM_ALLOC;
422
	vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
423
424
425
			PMD_SIZE * PMDS_PER_MID_PAGE);
	vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
	pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
426

427
	xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
428

429
	xen_rebuild_p2m_list(vm.addr);
430

431
	xen_p2m_addr = vm.addr;
432
433
434
	xen_p2m_size = xen_max_p2m_pfn;

	xen_inv_extra_mem();
435
}
436

437
438
unsigned long get_phys_to_machine(unsigned long pfn)
{
439
440
	pte_t *ptep;
	unsigned int level;
441

442
443
444
445
	if (unlikely(pfn >= xen_p2m_size)) {
		if (pfn < xen_max_p2m_pfn)
			return xen_chk_extra_mem(pfn);

446
		return IDENTITY_FRAME(pfn);
447
	}
448

449
450
	ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
	BUG_ON(!ptep || level != PG_LEVEL_4K);
451

452
453
454
455
456
	/*
	 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
	 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
	 * would be wrong.
	 */
457
	if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
458
459
		return IDENTITY_FRAME(pfn);

460
	return xen_p2m_addr[pfn];
461
462
463
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);

464
465
466
467
468
469
/*
 * Allocate new pmd(s). It is checked whether the old pmd is still in place.
 * If not, nothing is changed. This is okay as the only reason for allocating
 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
 * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
 */
470
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
{
	pte_t *ptechk;
	pte_t *pte_newpg[PMDS_PER_MID_PAGE];
	pmd_t *pmdp;
	unsigned int level;
	unsigned long flags;
	unsigned long vaddr;
	int i;

	/* Do all allocations first to bail out in error case. */
	for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
		pte_newpg[i] = alloc_p2m_page();
		if (!pte_newpg[i]) {
			for (i--; i >= 0; i--)
				free_p2m_page(pte_newpg[i]);

			return NULL;
		}
	}

	vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);

	for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
		copy_page(pte_newpg[i], pte_pg);
		paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);

		pmdp = lookup_pmd_address(vaddr);
		BUG_ON(!pmdp);

		spin_lock_irqsave(&p2m_update_lock, flags);

		ptechk = lookup_address(vaddr, &level);
		if (ptechk == pte_pg) {
504
505
			HYPERVISOR_shared_info->arch.p2m_generation++;
			wmb(); /* Tools are synchronizing via p2m_generation. */
506
507
			set_pmd(pmdp,
				__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
508
509
			wmb(); /* Tools are synchronizing via p2m_generation. */
			HYPERVISOR_shared_info->arch.p2m_generation++;
510
511
512
513
514
515
516
517
518
519
520
521
522
			pte_newpg[i] = NULL;
		}

		spin_unlock_irqrestore(&p2m_update_lock, flags);

		if (pte_newpg[i]) {
			paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
			free_p2m_page(pte_newpg[i]);
		}

		vaddr += PMD_SIZE;
	}

523
	return lookup_address(addr, &level);
524
525
}

526
/*
527
528
529
530
531
532
533
534
 * Fully allocate the p2m structure for a given pfn.  We need to check
 * that both the top and mid levels are allocated, and make sure the
 * parallel mfn tree is kept in sync.  We may race with other cpus, so
 * the new pages are installed with cmpxchg; if we lose the race then
 * simply free the page we allocated and use the one that's there.
 */
static bool alloc_p2m(unsigned long pfn)
{
535
	unsigned topidx;
536
	unsigned long *top_mfn_p, *mid_mfn;
537
538
539
540
541
	pte_t *ptep, *pte_pg;
	unsigned int level;
	unsigned long flags;
	unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
	unsigned long p2m_pfn;
542

543
544
545
	ptep = lookup_address(addr, &level);
	BUG_ON(!ptep || level != PG_LEVEL_4K);
	pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
546

547
548
	if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
		/* PMD level is missing, allocate a new one */
549
		ptep = alloc_p2m_pmd(addr, pte_pg);
550
		if (!ptep)
551
552
553
			return false;
	}

554
555
	if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
		topidx = p2m_top_index(pfn);
556
557
		top_mfn_p = &p2m_top_mfn[topidx];
		mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
558

559
		BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
560

561
562
563
564
565
		if (mid_mfn == p2m_mid_missing_mfn) {
			/* Separately check the mid mfn level */
			unsigned long missing_mfn;
			unsigned long mid_mfn_mfn;
			unsigned long old_mfn;
566

567
568
569
			mid_mfn = alloc_p2m_page();
			if (!mid_mfn)
				return false;
570

571
			p2m_mid_mfn_init(mid_mfn, p2m_missing);
572

573
574
575
576
577
578
579
580
581
			missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
			mid_mfn_mfn = virt_to_mfn(mid_mfn);
			old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
			if (old_mfn != missing_mfn) {
				free_p2m_page(mid_mfn);
				mid_mfn = mfn_to_virt(old_mfn);
			} else {
				p2m_top_mfn_p[topidx] = mid_mfn;
			}
582
		}
583
584
	} else {
		mid_mfn = NULL;
585
586
	}

587
	p2m_pfn = pte_pfn(READ_ONCE(*ptep));
588
589
	if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
	    p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
590
591
592
593
594
595
596
		/* p2m leaf page is missing */
		unsigned long *p2m;

		p2m = alloc_p2m_page();
		if (!p2m)
			return false;

597
598
599
		if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
			p2m_init(p2m);
		else
600
			p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1));
601
602
603
604

		spin_lock_irqsave(&p2m_update_lock, flags);

		if (pte_pfn(*ptep) == p2m_pfn) {
605
606
			HYPERVISOR_shared_info->arch.p2m_generation++;
			wmb(); /* Tools are synchronizing via p2m_generation. */
607
608
			set_pte(ptep,
				pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
609
610
			wmb(); /* Tools are synchronizing via p2m_generation. */
			HYPERVISOR_shared_info->arch.p2m_generation++;
611
			if (mid_mfn)
612
				mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m);
613
614
615
616
			p2m = NULL;
		}

		spin_unlock_irqrestore(&p2m_update_lock, flags);
617

618
		if (p2m)
619
620
621
			free_p2m_page(p2m);
	}

622
623
624
625
626
627
	/* Expanded the p2m? */
	if (pfn > xen_p2m_last_pfn) {
		xen_p2m_last_pfn = pfn;
		HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
	}

628
629
630
	return true;
}

Randy Dunlap's avatar
Randy Dunlap committed
631
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
632
633
634
635
				      unsigned long pfn_e)
{
	unsigned long pfn;

636
	if (unlikely(pfn_s >= xen_p2m_size))
637
638
639
640
641
642
643
644
		return 0;

	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
		return pfn_e - pfn_s;

	if (pfn_s > pfn_e)
		return 0;

645
646
	if (pfn_e > xen_p2m_size)
		pfn_e = xen_p2m_size;
647

648
649
	for (pfn = pfn_s; pfn < pfn_e; pfn++)
		xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn);
650
651
652
653

	return pfn - pfn_s;
}

654
655
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
656
657
	pte_t *ptep;
	unsigned int level;
658

659
660
	/* don't track P2M changes in autotranslate guests */
	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
661
		return true;
662

663
	if (unlikely(pfn >= xen_p2m_size)) {
664
665
666
667
		BUG_ON(mfn != INVALID_P2M_ENTRY);
		return true;
	}

668
669
670
671
672
	/*
	 * The interface requires atomic updates on p2m elements.
	 * xen_safe_write_ulong() is using __put_user which does an atomic
	 * store via asm().
	 */
673
	if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
674
675
		return true;

676
677
	ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
	BUG_ON(!ptep || level != PG_LEVEL_4K);
678

679
	if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
680
681
		return mfn == INVALID_P2M_ENTRY;

682
683
684
	if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
		return mfn == IDENTITY_FRAME(pfn);

685
	return false;
686
687
688
689
}

bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
690
	if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
691
692
693
		if (!alloc_p2m(pfn))
			return false;

694
		return __set_phys_to_machine(pfn, mfn);
695
696
697
698
	}

	return true;
}
699

Juergen Gross's avatar
Juergen Gross committed
700
701
702
int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
			    struct gnttab_map_grant_ref *kmap_ops,
			    struct page **pages, unsigned int count)
703
704
{
	int i, ret = 0;
Juergen Gross's avatar
Juergen Gross committed
705
	pte_t *pte;
706
707
708
709

	if (xen_feature(XENFEAT_auto_translated_physmap))
		return 0;

710
711
712
713
714
	if (kmap_ops) {
		ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
						kmap_ops, count);
		if (ret)
			goto out;
715
716
717
	}

	for (i = 0; i < count; i++) {
Juergen Gross's avatar
Juergen Gross committed
718
		unsigned long mfn, pfn;
719

Juergen Gross's avatar
Juergen Gross committed
720
721
722
723
724
725
726
727
728
729
		/* Do not add to override if the map failed. */
		if (map_ops[i].status)
			continue;

		if (map_ops[i].flags & GNTMAP_contains_pte) {
			pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
				(map_ops[i].host_addr & ~PAGE_MASK));
			mfn = pte_mfn(*pte);
		} else {
			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
730
		}
Juergen Gross's avatar
Juergen Gross committed
731
		pfn = page_to_pfn(pages[i]);
732

733
734
		WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");

Juergen Gross's avatar
Juergen Gross committed
735
736
		if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
			ret = -ENOMEM;
737
			goto out;
Juergen Gross's avatar
Juergen Gross committed
738
		}
739
740
741
742
743
	}

out:
	return ret;
}
Juergen Gross's avatar
Juergen Gross committed
744
EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
745

Juergen Gross's avatar
Juergen Gross committed
746
int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
747
			      struct gnttab_unmap_grant_ref *kunmap_ops,
Juergen Gross's avatar
Juergen Gross committed
748
			      struct page **pages, unsigned int count)
749
{
Juergen Gross's avatar
Juergen Gross committed
750
	int i, ret = 0;
751

Juergen Gross's avatar
Juergen Gross committed
752
753
	if (xen_feature(XENFEAT_auto_translated_physmap))
		return 0;
754

Juergen Gross's avatar
Juergen Gross committed
755
	for (i = 0; i < count; i++) {
756
		unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
Juergen Gross's avatar
Juergen Gross committed
757
758
759
760
761
		unsigned long pfn = page_to_pfn(pages[i]);

		if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
			ret = -EINVAL;
			goto out;
762
763
		}

764
		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
Juergen Gross's avatar
Juergen Gross committed
765
	}
766
767
768
	if (kunmap_ops)
		ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
						kunmap_ops, count);
Juergen Gross's avatar
Juergen Gross committed
769
out:
770
771
	return ret;
}
Juergen Gross's avatar
Juergen Gross committed
772
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
773

774
#ifdef CONFIG_XEN_DEBUG_FS
775
776
777
#include <linux/debugfs.h>
#include "debugfs.h"
static int p2m_dump_show(struct seq_file *m, void *v)
778
{
779
	static const char * const type_name[] = {
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
				[P2M_TYPE_IDENTITY] = "identity",
				[P2M_TYPE_MISSING] = "missing",
				[P2M_TYPE_PFN] = "pfn",
				[P2M_TYPE_UNKNOWN] = "abnormal"};
	unsigned long pfn, first_pfn;
	int type, prev_type;

	prev_type = xen_p2m_elem_type(0);
	first_pfn = 0;

	for (pfn = 0; pfn < xen_p2m_size; pfn++) {
		type = xen_p2m_elem_type(pfn);
		if (type != prev_type) {
			seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
				   type_name[prev_type]);
795
			prev_type = type;
796
			first_pfn = pfn;
797
798
		}
	}
799
800
	seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
		   type_name[prev_type]);
801
802
	return 0;
}
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831

static int p2m_dump_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, p2m_dump_show, NULL);
}

static const struct file_operations p2m_dump_fops = {
	.open		= p2m_dump_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static struct dentry *d_mmu_debug;

static int __init xen_p2m_debugfs(void)
{
	struct dentry *d_xen = xen_init_debugfs();

	if (d_xen == NULL)
		return -ENOMEM;

	d_mmu_debug = debugfs_create_dir("mmu", d_xen);

	debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
	return 0;
}
fs_initcall(xen_p2m_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */