amd_iommu.c 97.2 KB
Newer Older
1
/*
2
 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
Joerg Roedel's avatar
Joerg Roedel committed
3
 * Author: Joerg Roedel <jroedel@suse.de>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 *         Leo Duran <leo.duran@amd.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

20
#include <linux/ratelimit.h>
21
#include <linux/pci.h>
22
#include <linux/pci-ats.h>
23
#include <linux/bitmap.h>
24
#include <linux/slab.h>
25
#include <linux/debugfs.h>
26
#include <linux/scatterlist.h>
27
#include <linux/dma-mapping.h>
28
#include <linux/iommu-helper.h>
29
#include <linux/iommu.h>
30
#include <linux/delay.h>
31
#include <linux/amd-iommu.h>
32
33
#include <linux/notifier.h>
#include <linux/export.h>
34
35
#include <linux/irq.h>
#include <linux/msi.h>
36
#include <linux/dma-contiguous.h>
37
38
39
40
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/hw_irq.h>
41
#include <asm/msidef.h>
42
#include <asm/proto.h>
43
#include <asm/iommu.h>
44
#include <asm/gart.h>
45
#include <asm/dma.h>
46
47
48

#include "amd_iommu_proto.h"
#include "amd_iommu_types.h"
49
#include "irq_remapping.h"
50
51
52

#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))

53
#define LOOP_TIMEOUT	100000
54

55
56
57
58
59
60
/*
 * This bitmap is used to advertise the page sizes our hardware support
 * to the IOMMU core, which will then use this information to split
 * physically contiguous memory regions it is mapping into page sizes
 * that we support.
 *
61
 * 512GB Pages are not supported due to a hardware bug
62
 */
63
#define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
64

65
66
static DEFINE_RWLOCK(amd_iommu_devtable_lock);

67
68
69
70
/* A list of preallocated protection domains */
static LIST_HEAD(iommu_pd_list);
static DEFINE_SPINLOCK(iommu_pd_list_lock);

71
72
73
74
/* List of all available dev_data structures */
static LIST_HEAD(dev_data_list);
static DEFINE_SPINLOCK(dev_data_list_lock);

75
76
77
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);

78
79
80
81
82
83
/*
 * Domain for untranslated devices - only allocated
 * if iommu=pt passed on kernel cmd line.
 */
static struct protection_domain *pt_domain;

84
static const struct iommu_ops amd_iommu_ops;
85

86
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
87
int amd_iommu_max_glx_val = -1;
88

89
90
static struct dma_map_ops amd_iommu_dma_ops;

91
92
93
94
95
96
/*
 * This struct contains device specific data for the IOMMU
 */
struct iommu_dev_data {
	struct list_head list;		  /* For domain->dev_list */
	struct list_head dev_data_list;	  /* For global dev_data_list */
97
	struct list_head alias_list;      /* Link alias-groups together */
98
99
100
101
102
103
104
105
106
107
108
109
110
111
	struct iommu_dev_data *alias_data;/* The alias dev_data */
	struct protection_domain *domain; /* Domain the device is bound to */
	u16 devid;			  /* PCI Device ID */
	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
	bool passthrough;		  /* Default for device is pt_domain */
	struct {
		bool enabled;
		int qdep;
	} ats;				  /* ATS state */
	bool pri_tlp;			  /* PASID TLB required for
					     PPR completions */
	u32 errata;			  /* Bitmap for errata to apply */
};

112
113
114
/*
 * general struct to manage commands send to an IOMMU
 */
115
struct iommu_cmd {
116
117
118
	u32 data[4];
};

119
120
struct kmem_cache *amd_iommu_irq_cache;

121
static void update_domain(struct protection_domain *domain);
122
static int __init alloc_passthrough_domain(void);
123

124
125
126
127
128
129
/****************************************************************************
 *
 * Helper functions
 *
 ****************************************************************************/

130
131
132
133
134
static struct protection_domain *to_pdomain(struct iommu_domain *dom)
{
	return container_of(dom, struct protection_domain, domain);
}

135
static struct iommu_dev_data *alloc_dev_data(u16 devid)
136
137
138
139
140
141
142
143
{
	struct iommu_dev_data *dev_data;
	unsigned long flags;

	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
	if (!dev_data)
		return NULL;

144
145
	INIT_LIST_HEAD(&dev_data->alias_list);

146
	dev_data->devid = devid;
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

	spin_lock_irqsave(&dev_data_list_lock, flags);
	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
	spin_unlock_irqrestore(&dev_data_list_lock, flags);

	return dev_data;
}

static void free_dev_data(struct iommu_dev_data *dev_data)
{
	unsigned long flags;

	spin_lock_irqsave(&dev_data_list_lock, flags);
	list_del(&dev_data->dev_data_list);
	spin_unlock_irqrestore(&dev_data_list_lock, flags);

	kfree(dev_data);
}

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
static struct iommu_dev_data *search_dev_data(u16 devid)
{
	struct iommu_dev_data *dev_data;
	unsigned long flags;

	spin_lock_irqsave(&dev_data_list_lock, flags);
	list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
		if (dev_data->devid == devid)
			goto out_unlock;
	}

	dev_data = NULL;

out_unlock:
	spin_unlock_irqrestore(&dev_data_list_lock, flags);

	return dev_data;
}

static struct iommu_dev_data *find_dev_data(u16 devid)
{
	struct iommu_dev_data *dev_data;

	dev_data = search_dev_data(devid);

	if (dev_data == NULL)
		dev_data = alloc_dev_data(devid);

	return dev_data;
}

197
198
199
200
static inline u16 get_device_id(struct device *dev)
{
	struct pci_dev *pdev = to_pci_dev(dev);

201
	return PCI_DEVID(pdev->bus->number, pdev->devfn);
202
203
}

204
205
206
207
208
static struct iommu_dev_data *get_dev_data(struct device *dev)
{
	return dev->archdata.iommu;
}

209
210
211
212
static bool pci_iommuv2_capable(struct pci_dev *pdev)
{
	static const int caps[] = {
		PCI_EXT_CAP_ID_ATS,
213
214
		PCI_EXT_CAP_ID_PRI,
		PCI_EXT_CAP_ID_PASID,
215
216
217
218
219
220
221
222
223
224
225
226
	};
	int i, pos;

	for (i = 0; i < 3; ++i) {
		pos = pci_find_ext_capability(pdev, caps[i]);
		if (pos == 0)
			return false;
	}

	return true;
}

227
228
229
230
231
232
233
234
235
static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
{
	struct iommu_dev_data *dev_data;

	dev_data = get_dev_data(&pdev->dev);

	return dev_data->errata & (1 << erratum) ? true : false;
}

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
/*
 * In this function the list of preallocated protection domains is traversed to
 * find the domain for a specific device
 */
static struct dma_ops_domain *find_protection_domain(u16 devid)
{
	struct dma_ops_domain *entry, *ret = NULL;
	unsigned long flags;
	u16 alias = amd_iommu_alias_table[devid];

	if (list_empty(&iommu_pd_list))
		return NULL;

	spin_lock_irqsave(&iommu_pd_list_lock, flags);

	list_for_each_entry(entry, &iommu_pd_list, list) {
		if (entry->target_dev == devid ||
		    entry->target_dev == alias) {
			ret = entry;
			break;
		}
	}

	spin_unlock_irqrestore(&iommu_pd_list_lock, flags);

	return ret;
}

264
265
266
267
268
269
270
271
272
273
274
/*
 * This function checks if the driver got a valid device from the caller to
 * avoid dereferencing invalid pointers.
 */
static bool check_device(struct device *dev)
{
	u16 devid;

	if (!dev || !dev->dma_mask)
		return false;

275
276
	/* No PCI device */
	if (!dev_is_pci(dev))
277
278
279
280
281
282
283
284
285
286
287
288
289
290
		return false;

	devid = get_device_id(dev);

	/* Out of our scope? */
	if (devid > amd_iommu_last_bdf)
		return false;

	if (amd_iommu_rlookup_table[devid] == NULL)
		return false;

	return true;
}

291
static void init_iommu_group(struct device *dev)
292
293
294
{
	struct iommu_group *group;

295
	group = iommu_group_get_for_dev(dev);
296
297
	if (!IS_ERR(group))
		iommu_group_put(group);
298
299
}

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
{
	*(u16 *)data = alias;
	return 0;
}

static u16 get_alias(struct device *dev)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	u16 devid, ivrs_alias, pci_alias;

	devid = get_device_id(dev);
	ivrs_alias = amd_iommu_alias_table[devid];
	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);

	if (ivrs_alias == pci_alias)
		return ivrs_alias;

	/*
	 * DMA alias showdown
	 *
	 * The IVRS is fairly reliable in telling us about aliases, but it
	 * can't know about every screwy device.  If we don't have an IVRS
	 * reported alias, use the PCI reported alias.  In that case we may
	 * still need to initialize the rlookup and dev_table entries if the
	 * alias is to a non-existent device.
	 */
	if (ivrs_alias == devid) {
		if (!amd_iommu_rlookup_table[pci_alias]) {
			amd_iommu_rlookup_table[pci_alias] =
				amd_iommu_rlookup_table[devid];
			memcpy(amd_iommu_dev_table[pci_alias].data,
			       amd_iommu_dev_table[devid].data,
			       sizeof(amd_iommu_dev_table[pci_alias].data));
		}

		return pci_alias;
	}

	pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
		"for device %s[%04x:%04x], kernel reported alias "
		"%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
		PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
		PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
		PCI_FUNC(pci_alias));

	/*
	 * If we don't have a PCI DMA alias and the IVRS alias is on the same
	 * bus, then the IVRS table may know about a quirk that we don't.
	 */
	if (pci_alias == devid &&
	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
		pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
		pdev->dma_alias_devfn = ivrs_alias & 0xff;
		pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
			PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
			dev_name(dev));
	}

	return ivrs_alias;
}

362
363
364
365
366
367
368
369
370
371
372
373
374
static int iommu_init_device(struct device *dev)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	struct iommu_dev_data *dev_data;
	u16 alias;

	if (dev->archdata.iommu)
		return 0;

	dev_data = find_dev_data(get_device_id(dev));
	if (!dev_data)
		return -ENOMEM;

375
376
	alias = get_alias(dev);

377
378
379
380
381
382
383
384
385
386
387
388
	if (alias != dev_data->devid) {
		struct iommu_dev_data *alias_data;

		alias_data = find_dev_data(alias);
		if (alias_data == NULL) {
			pr_err("AMD-Vi: Warning: Unhandled device %s\n",
					dev_name(dev));
			free_dev_data(dev_data);
			return -ENOTSUPP;
		}
		dev_data->alias_data = alias_data;

389
390
		/* Add device to the alias_list */
		list_add(&dev_data->alias_list, &alias_data->alias_list);
391
	}
392

393
394
395
396
397
398
399
	if (pci_iommuv2_capable(pdev)) {
		struct amd_iommu *iommu;

		iommu              = amd_iommu_rlookup_table[dev_data->devid];
		dev_data->iommu_v2 = iommu->is_iommu_v2;
	}

400
401
	dev->archdata.iommu = dev_data;

Alex Williamson's avatar
Alex Williamson committed
402
403
404
	iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
			  dev);

405
406
407
	return 0;
}

408
409
410
411
412
413
414
415
416
417
418
419
420
421
static void iommu_ignore_device(struct device *dev)
{
	u16 devid, alias;

	devid = get_device_id(dev);
	alias = amd_iommu_alias_table[devid];

	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
	memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));

	amd_iommu_rlookup_table[devid] = NULL;
	amd_iommu_rlookup_table[alias] = NULL;
}

422
423
static void iommu_uninit_device(struct device *dev)
{
424
425
426
427
428
	struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev));

	if (!dev_data)
		return;

Alex Williamson's avatar
Alex Williamson committed
429
430
431
	iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
			    dev);

432
433
	iommu_group_remove_device(dev);

434
435
436
	/* Unlink from alias, it may change if another device is re-plugged */
	dev_data->alias_data = NULL;

437
	/*
438
439
	 * We keep dev_data around for unplugged devices and reuse it when the
	 * device is re-plugged - not doing so would introduce a ton of races.
440
	 */
441
}
442
443
444

void __init amd_iommu_uninit_devices(void)
{
445
	struct iommu_dev_data *dev_data, *n;
446
447
448
449
450
451
452
453
454
	struct pci_dev *pdev = NULL;

	for_each_pci_dev(pdev) {

		if (!check_device(&pdev->dev))
			continue;

		iommu_uninit_device(&pdev->dev);
	}
455
456
457
458

	/* Free all of our dev_data structures */
	list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list)
		free_dev_data(dev_data);
459
460
461
462
463
464
465
466
467
468
469
470
471
}

int __init amd_iommu_init_devices(void)
{
	struct pci_dev *pdev = NULL;
	int ret = 0;

	for_each_pci_dev(pdev) {

		if (!check_device(&pdev->dev))
			continue;

		ret = iommu_init_device(&pdev->dev);
472
473
474
		if (ret == -ENOTSUPP)
			iommu_ignore_device(&pdev->dev);
		else if (ret)
475
476
477
			goto out_free;
	}

478
479
480
481
482
483
484
485
486
	/*
	 * Initialize IOMMU groups only after iommu_init_device() has
	 * had a chance to populate any IVRS defined aliases.
	 */
	for_each_pci_dev(pdev) {
		if (check_device(&pdev->dev))
			init_iommu_group(&pdev->dev);
	}

487
488
489
490
491
492
493
494
	return 0;

out_free:

	amd_iommu_uninit_devices();

	return ret;
}
495
496
497
498
499
500
#ifdef CONFIG_AMD_IOMMU_STATS

/*
 * Initialization code for statistics collection
 */

501
DECLARE_STATS_COUNTER(compl_wait);
502
DECLARE_STATS_COUNTER(cnt_map_single);
503
DECLARE_STATS_COUNTER(cnt_unmap_single);
504
DECLARE_STATS_COUNTER(cnt_map_sg);
505
DECLARE_STATS_COUNTER(cnt_unmap_sg);
506
DECLARE_STATS_COUNTER(cnt_alloc_coherent);
507
DECLARE_STATS_COUNTER(cnt_free_coherent);
508
DECLARE_STATS_COUNTER(cross_page);
509
DECLARE_STATS_COUNTER(domain_flush_single);
510
DECLARE_STATS_COUNTER(domain_flush_all);
511
DECLARE_STATS_COUNTER(alloced_io_mem);
512
DECLARE_STATS_COUNTER(total_map_requests);
513
514
515
516
517
DECLARE_STATS_COUNTER(complete_ppr);
DECLARE_STATS_COUNTER(invalidate_iotlb);
DECLARE_STATS_COUNTER(invalidate_iotlb_all);
DECLARE_STATS_COUNTER(pri_requests);

518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
static struct dentry *stats_dir;
static struct dentry *de_fflush;

static void amd_iommu_stats_add(struct __iommu_counter *cnt)
{
	if (stats_dir == NULL)
		return;

	cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
				       &cnt->value);
}

static void amd_iommu_stats_init(void)
{
	stats_dir = debugfs_create_dir("amd-iommu", NULL);
	if (stats_dir == NULL)
		return;

	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
537
					 &amd_iommu_unmap_flush);
538
539

	amd_iommu_stats_add(&compl_wait);
540
	amd_iommu_stats_add(&cnt_map_single);
541
	amd_iommu_stats_add(&cnt_unmap_single);
542
	amd_iommu_stats_add(&cnt_map_sg);
543
	amd_iommu_stats_add(&cnt_unmap_sg);
544
	amd_iommu_stats_add(&cnt_alloc_coherent);
545
	amd_iommu_stats_add(&cnt_free_coherent);
546
	amd_iommu_stats_add(&cross_page);
547
	amd_iommu_stats_add(&domain_flush_single);
548
	amd_iommu_stats_add(&domain_flush_all);
549
	amd_iommu_stats_add(&alloced_io_mem);
550
	amd_iommu_stats_add(&total_map_requests);
551
552
553
554
	amd_iommu_stats_add(&complete_ppr);
	amd_iommu_stats_add(&invalidate_iotlb);
	amd_iommu_stats_add(&invalidate_iotlb_all);
	amd_iommu_stats_add(&pri_requests);
555
556
557
558
}

#endif

559
560
561
562
563
564
/****************************************************************************
 *
 * Interrupt handling functions
 *
 ****************************************************************************/

565
566
567
568
static void dump_dte_entry(u16 devid)
{
	int i;

569
570
	for (i = 0; i < 4; ++i)
		pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
571
572
573
			amd_iommu_dev_table[devid].data[i]);
}

574
575
576
577
578
579
580
581
582
static void dump_command(unsigned long phys_addr)
{
	struct iommu_cmd *cmd = phys_to_virt(phys_addr);
	int i;

	for (i = 0; i < 4; ++i)
		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}

583
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
584
{
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
	int type, devid, domid, flags;
	volatile u32 *event = __evt;
	int count = 0;
	u64 address;

retry:
	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
	domid   = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
	address = (u64)(((u64)event[3]) << 32) | event[2];

	if (type == 0) {
		/* Did we hit the erratum? */
		if (++count == LOOP_TIMEOUT) {
			pr_err("AMD-Vi: No event written to event log\n");
			return;
		}
		udelay(1);
		goto retry;
	}
606

607
	printk(KERN_ERR "AMD-Vi: Event logged [");
608
609
610
611
612

	switch (type) {
	case EVENT_TYPE_ILL_DEV:
		printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
613
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
614
		       address, flags);
615
		dump_dte_entry(devid);
616
617
618
619
		break;
	case EVENT_TYPE_IO_FAULT:
		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
620
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
621
622
623
624
625
		       domid, address, flags);
		break;
	case EVENT_TYPE_DEV_TAB_ERR:
		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
626
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
627
628
629
630
631
		       address, flags);
		break;
	case EVENT_TYPE_PAGE_TAB_ERR:
		printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
632
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
633
634
635
636
		       domid, address, flags);
		break;
	case EVENT_TYPE_ILL_CMD:
		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
637
		dump_command(address);
638
639
640
641
642
643
644
645
		break;
	case EVENT_TYPE_CMD_HARD_ERR:
		printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
		       "flags=0x%04x]\n", address, flags);
		break;
	case EVENT_TYPE_IOTLB_INV_TO:
		printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
		       "address=0x%016llx]\n",
646
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
647
648
649
650
651
		       address);
		break;
	case EVENT_TYPE_INV_DEV_REQ:
		printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
652
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
653
654
655
656
657
		       address, flags);
		break;
	default:
		printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
	}
658
659

	memset(__evt, 0, 4 * sizeof(u32));
660
661
662
663
664
665
666
667
668
669
}

static void iommu_poll_events(struct amd_iommu *iommu)
{
	u32 head, tail;

	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

	while (head != tail) {
670
		iommu_print_event(iommu, iommu->evt_buf + head);
671
672
673
674
675
676
		head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
	}

	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}

677
static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
678
679
680
{
	struct amd_iommu_fault fault;

681
682
	INC_STATS_COUNTER(pri_requests);

683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
		return;
	}

	fault.address   = raw[1];
	fault.pasid     = PPR_PASID(raw[0]);
	fault.device_id = PPR_DEVID(raw[0]);
	fault.tag       = PPR_TAG(raw[0]);
	fault.flags     = PPR_FLAGS(raw[0]);

	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
}

static void iommu_poll_ppr_log(struct amd_iommu *iommu)
{
	u32 head, tail;

	if (iommu->ppr_log == NULL)
		return;

	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);

	while (head != tail) {
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
		volatile u64 *raw;
		u64 entry[2];
		int i;

		raw = (u64 *)(iommu->ppr_log + head);

		/*
		 * Hardware bug: Interrupt may arrive before the entry is
		 * written to memory. If this happens we need to wait for the
		 * entry to arrive.
		 */
		for (i = 0; i < LOOP_TIMEOUT; ++i) {
			if (PPR_REQ_TYPE(raw[0]) != 0)
				break;
			udelay(1);
		}
724

725
726
727
		/* Avoid memcpy function-call overhead */
		entry[0] = raw[0];
		entry[1] = raw[1];
728

729
730
731
732
733
734
735
		/*
		 * To detect the hardware bug we need to clear the entry
		 * back to zero.
		 */
		raw[0] = raw[1] = 0UL;

		/* Update head pointer of hardware ring-buffer */
736
737
		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
738
739
740
741
742
743

		/* Handle PPR entry */
		iommu_handle_ppr_entry(iommu, entry);

		/* Refresh ring-buffer information */
		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
744
745
746
747
		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
	}
}

748
irqreturn_t amd_iommu_int_thread(int irq, void *data)
749
{
750
751
	struct amd_iommu *iommu = (struct amd_iommu *) data;
	u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
752

753
754
755
756
	while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) {
		/* Enable EVT and PPR interrupts again */
		writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK),
			iommu->mmio_base + MMIO_STATUS_OFFSET);
757

758
759
760
761
		if (status & MMIO_STATUS_EVT_INT_MASK) {
			pr_devel("AMD-Vi: Processing IOMMU Event Log\n");
			iommu_poll_events(iommu);
		}
762

763
764
765
766
		if (status & MMIO_STATUS_PPR_INT_MASK) {
			pr_devel("AMD-Vi: Processing IOMMU PPR Log\n");
			iommu_poll_ppr_log(iommu);
		}
767

768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
		/*
		 * Hardware bug: ERBT1312
		 * When re-enabling interrupt (by writing 1
		 * to clear the bit), the hardware might also try to set
		 * the interrupt bit in the event status register.
		 * In this scenario, the bit will be set, and disable
		 * subsequent interrupts.
		 *
		 * Workaround: The IOMMU driver should read back the
		 * status register and check if the interrupt bits are cleared.
		 * If not, driver will need to go through the interrupt handler
		 * again and re-clear the bits
		 */
		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
	}
783
	return IRQ_HANDLED;
784
785
}

786
787
788
789
790
irqreturn_t amd_iommu_int_handler(int irq, void *data)
{
	return IRQ_WAKE_THREAD;
}

791
792
793
794
795
796
/****************************************************************************
 *
 * IOMMU command queuing functions
 *
 ****************************************************************************/

797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
static int wait_on_sem(volatile u64 *sem)
{
	int i = 0;

	while (*sem == 0 && i < LOOP_TIMEOUT) {
		udelay(1);
		i += 1;
	}

	if (i == LOOP_TIMEOUT) {
		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
		return -EIO;
	}

	return 0;
}

static void copy_cmd_to_buffer(struct amd_iommu *iommu,
			       struct iommu_cmd *cmd,
			       u32 tail)
817
818
819
{
	u8 *target;

820
	target = iommu->cmd_buf + tail;
821
822
823
824
825
826
	tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;

	/* Copy command to buffer */
	memcpy(target, cmd, sizeof(*cmd));

	/* Tell the IOMMU about it */
827
	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
828
}
829

830
static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
831
{
832
833
	WARN_ON(address & 0x7ULL);

834
	memset(cmd, 0, sizeof(*cmd));
835
836
837
	cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
	cmd->data[1] = upper_32_bits(__pa(address));
	cmd->data[2] = 1;
838
839
840
	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
}

841
842
843
844
845
846
847
static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
{
	memset(cmd, 0, sizeof(*cmd));
	cmd->data[0] = devid;
	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
}

848
849
850
851
static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
				  size_t size, u16 domid, int pde)
{
	u64 pages;
852
	bool s;
853
854

	pages = iommu_num_pages(address, size, PAGE_SIZE);
855
	s     = false;
856
857
858
859
860
861
862

	if (pages > 1) {
		/*
		 * If we have to flush more than one page, flush all
		 * TLB entries for this domain
		 */
		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
863
		s = true;
864
865
866
867
868
869
870
871
872
873
874
	}

	address &= PAGE_MASK;

	memset(cmd, 0, sizeof(*cmd));
	cmd->data[1] |= domid;
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[3]  = upper_32_bits(address);
	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
	if (s) /* size bit - we flush more than one 4kb page */
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
Frank Arnold's avatar
Frank Arnold committed
875
	if (pde) /* PDE bit - we want to flush everything, not only the PTEs */
876
877
878
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
}

879
880
881
882
static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
				  u64 address, size_t size)
{
	u64 pages;
883
	bool s;
884
885

	pages = iommu_num_pages(address, size, PAGE_SIZE);
886
	s     = false;
887
888
889
890
891
892
893

	if (pages > 1) {
		/*
		 * If we have to flush more than one page, flush all
		 * TLB entries for this domain
		 */
		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
894
		s = true;
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
	}

	address &= PAGE_MASK;

	memset(cmd, 0, sizeof(*cmd));
	cmd->data[0]  = devid;
	cmd->data[0] |= (qdep & 0xff) << 24;
	cmd->data[1]  = devid;
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[3]  = upper_32_bits(address);
	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
	if (s)
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}

910
911
912
913
914
915
916
static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
				  u64 address, bool size)
{
	memset(cmd, 0, sizeof(*cmd));

	address &= ~(0xfffULL);

917
	cmd->data[0]  = pasid;
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
	cmd->data[1]  = domid;
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[3]  = upper_32_bits(address);
	cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
	if (size)
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
}

static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
				  int qdep, u64 address, bool size)
{
	memset(cmd, 0, sizeof(*cmd));

	address &= ~(0xfffULL);

	cmd->data[0]  = devid;
936
	cmd->data[0] |= ((pasid >> 8) & 0xff) << 16;
937
938
	cmd->data[0] |= (qdep  & 0xff) << 24;
	cmd->data[1]  = devid;
939
	cmd->data[1] |= (pasid & 0xff) << 16;
940
941
942
943
944
945
946
947
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
	cmd->data[3]  = upper_32_bits(address);
	if (size)
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
}

948
949
950
951
952
953
954
static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
			       int status, int tag, bool gn)
{
	memset(cmd, 0, sizeof(*cmd));

	cmd->data[0]  = devid;
	if (gn) {
955
		cmd->data[1]  = pasid;
956
957
958
959
960
961
962
963
		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
	}
	cmd->data[3]  = tag & 0x1ff;
	cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;

	CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
}

964
965
966
967
static void build_inv_all(struct iommu_cmd *cmd)
{
	memset(cmd, 0, sizeof(*cmd));
	CMD_SET_TYPE(cmd, CMD_INV_ALL);
968
969
}

970
971
972
973
974
975
976
static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
{
	memset(cmd, 0, sizeof(*cmd));
	cmd->data[0] = devid;
	CMD_SET_TYPE(cmd, CMD_INV_IRT);
}

977
978
/*
 * Writes the command to the IOMMUs command buffer and informs the
979
 * hardware about the new command.
980
 */
981
982
983
static int iommu_queue_command_sync(struct amd_iommu *iommu,
				    struct iommu_cmd *cmd,
				    bool sync)
984
{
985
	u32 left, tail, head, next_tail;
986
987
	unsigned long flags;

988
	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
989
990

again:
991
992
	spin_lock_irqsave(&iommu->lock, flags);

993
994
995
996
	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
	next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
	left      = (head - next_tail) % iommu->cmd_buf_size;
997

998
999
1000
1001
	if (left <= 2) {
		struct iommu_cmd sync_cmd;
		volatile u64 sem = 0;
		int ret;
1002

1003
1004
		build_completion_wait(&sync_cmd, (u64)&sem);
		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
1005

1006
1007
1008
1009
1010
1011
		spin_unlock_irqrestore(&iommu->lock, flags);

		if ((ret = wait_on_sem(&sem)) != 0)
			return ret;

		goto again;
1012
1013
	}

1014
1015
1016
	copy_cmd_to_buffer(iommu, cmd, tail);

	/* We need to sync now to make sure all commands are processed */
1017
	iommu->need_sync = sync;
1018

1019
	spin_unlock_irqrestore(&iommu->lock, flags);
1020

1021
	return 0;
1022
1023
}

1024
1025
1026
1027
1028
static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
{
	return iommu_queue_command_sync(iommu, cmd, true);
}

1029
1030
1031
1032
/*
 * This function queues a completion wait command into the command
 * buffer of an IOMMU
 */
1033
static int iommu_completion_wait(struct amd_iommu *iommu)
1034
1035
{
	struct iommu_cmd cmd;
1036
	volatile u64 sem = 0;
1037
	int ret;
1038

1039
	if (!iommu->need_sync)
1040
		return 0;
1041

1042
	build_completion_wait(&cmd, (u64)&sem);
1043

1044
	ret = iommu_queue_command_sync(iommu, &cmd, false);
1045
	if (ret)
1046
		return ret;
1047

1048
	return wait_on_sem(&sem);
1049
1050
}

1051
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
1052
{
1053
	struct iommu_cmd cmd;
1054

1055
	build_inv_dte(&cmd, devid);
1056

1057
1058
	return iommu_queue_command(iommu, &cmd);
}
1059

1060
1061
1062
static void iommu_flush_dte_all(struct amd_iommu *iommu)
{
	u32 devid;
1063

1064
1065
	for (devid = 0; devid <= 0xffff; ++devid)
		iommu_flush_dte(iommu, devid);
1066

1067
1068
	iommu_completion_wait(iommu);
}
1069

1070
1071
1072
1073
1074
1075
1076
/*
 * This function uses heavy locking and may disable irqs for some time. But
 * this is no issue because it is only called during resume.
 */
static void iommu_flush_tlb_all(struct amd_iommu *iommu)
{
	u32 dom_id;
1077

1078
1079
1080
1081
1082
1083
	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
		struct iommu_cmd cmd;
		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
				      dom_id, 1);
		iommu_queue_command(iommu, &cmd);
	}
1084

1085
	iommu_completion_wait(iommu);
1086
1087
}

1088
static void iommu_flush_all(struct amd_iommu *iommu)
1089
{
1090
	struct iommu_cmd cmd;
1091

1092
	build_inv_all(&cmd);
1093

1094
1095
1096
1097
	iommu_queue_command(iommu, &cmd);
	iommu_completion_wait(iommu);
}

1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
{
	struct iommu_cmd cmd;

	build_inv_irt(&cmd, devid);

	iommu_queue_command(iommu, &cmd);
}

static void iommu_flush_irt_all(struct amd_iommu *iommu)
{
	u32 devid;

	for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++)
		iommu_flush_irt(iommu, devid);

	iommu_completion_wait(iommu);
}

1117
1118
void iommu_flush_all_caches(struct amd_iommu *iommu)
{
1119
1120
1121
1122
	if (iommu_feature(iommu, FEATURE_IA)) {
		iommu_flush_all(iommu);
	} else {
		iommu_flush_dte_all(iommu);
1123
		iommu_flush_irt_all(iommu);
1124
		iommu_flush_tlb_all(iommu);
1125
1126
1127
	}
}

1128
/*
1129
 * Command send function for flushing on-device TLB
1130
 */
1131
1132
static int device_flush_iotlb(struct iommu_dev_data *dev_data,
			      u64 address, size_t size)
1133
1134
{
	struct amd_iommu *iommu;
1135
	struct iommu_cmd cmd;
1136
	int qdep;
1137

1138
1139
	qdep     = dev_data->ats.qdep;
	iommu    = amd_iommu_rlookup_table[dev_data->devid];
1140

1141
	build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
1142
1143

	return iommu_queue_command(iommu, &cmd);
1144
1145
}

1146
1147
1148
/*
 * Command send function for invalidating a device table entry
 */
1149
static int device_flush_dte(struct iommu_dev_data *dev_data)
1150
{
1151
	struct amd_iommu *iommu;
1152
	int ret;
1153

1154
	iommu = amd_iommu_rlookup_table[dev_data->devid];
1155

1156
	ret = iommu_flush_dte(iommu, dev_data->devid);
1157
1158
1159
	if (ret)
		return ret;

1160
	if (dev_data->ats.enabled)
1161
		ret = device_flush_iotlb(dev_data, 0, ~0UL);
1162
1163

	return ret;
1164
1165
}

1166
1167
1168
1169
1170
/*
 * TLB invalidation function which is called from the mapping functions.
 * It invalidates a single PTE if the range to flush is within a single
 * page. Otherwise it flushes the whole TLB of the IOMMU.
 */
1171
1172
static void __domain_flush_pages(struct protection_domain *domain,
				 u64 address, size_t size, int pde)
1173
{
1174
	struct iommu_dev_data *dev_data;
1175
1176
	struct iommu_cmd cmd;
	int ret = 0, i;
1177

1178
	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
1179

1180
1181
1182
1183
1184
1185
1186
1187
	for (i = 0; i < amd_iommus_present; ++i) {
		if (!domain->dev_iommu[i])
			continue;

		/*
		 * Devices of this domain are behind this IOMMU
		 * We need a TLB flush
		 */
1188
		ret |= iommu_queue_command(amd_iommus[i], &cmd);
1189
1190
	}

1191
1192
	list_for_each_entry(dev_data, &domain->dev_list, list) {

1193
		if (!dev_data->ats.enabled)
1194
1195
			continue;

1196
		ret |= device_flush_iotlb(dev_data, address, size);
1197
1198
	}

1199
	WARN_ON(ret);
1200
1201
}

1202
1203
static void domain_flush_pages(struct protection_domain *domain,
			       u64 address, size_t size)
1204
{
1205
	__domain_flush_pages(domain, address, size, 0);
1206
}
1207

1208
/* Flush the whole IO/TLB for a given protection domain */
1209
static void domain_flush_tlb(struct protection_domain *domain)
1210
{
1211
	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
1212
1213
}

1214
/* Flush the whole IO/TLB for a given protection domain - including PDE */
1215
static void domain_flush_tlb_pde(struct protection_domain *domain)
1216
{
1217
	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
1218
1219
}

1220
static void domain_flush_complete(struct protection_domain *domain)
1221
{
1222
	int i;
1223

1224
1225
1226
	for (i = 0; i < amd_iommus_present; ++i) {
		if (!domain->dev_iommu[i