Commit aaaff1a1 authored by Vikram Narayanan's avatar Vikram Narayanan

lcd/liblcd: Add dma API support (x86) for LCDs

Just bring in the normal nommu_ops for now which uses scatterlists for sg dma.
Kernel uses SWIOTLB by default if no iommu= option is specified in the command
line for x86_64 systems. Bring in SWIOTLB as well when it is absolutely
necessary.
Signed-off-by: Vikram Narayanan's avatarVikram Narayanan <vikram186@gmail.com>
parent c6590e79
#include <lcd_config/pre_hook.h>
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/dmar.h>
#include <linux/export.h>
#include <linux/bootmem.h>
#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/calgary.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
#include <lcd_config/post_hook.h>
static int forbid_dac __read_mostly;
struct dma_map_ops *dma_ops = &nommu_dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
#ifndef LCD_ISOLATE
#ifdef CONFIG_IOMMU_DEBUG
int panic_on_overflow __read_mostly = 1;
int force_iommu __read_mostly = 1;
#else
int panic_on_overflow __read_mostly = 0;
int force_iommu __read_mostly = 0;
#endif
int iommu_merge __read_mostly = 0;
int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
/*
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
* If this variable is 1, IOMMU implementations do no DMA translation for
* devices and allow every device to access to whole physical memory. This is
* useful if a user wants to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
*/
int iommu_pass_through __read_mostly;
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
.coherent_dma_mask = ISA_DMA_BIT_MASK,
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
/* Number of entries preallocated for DMA-API debugging */
#define PREALLOC_DMA_DEBUG_ENTRIES 65536
void __init pci_iommu_alloc(void)
{
struct iommu_table_entry *p;
sort_iommu_table(__iommu_table, __iommu_table_end);
check_iommu_entries(__iommu_table, __iommu_table_end);
for (p = __iommu_table; p < __iommu_table_end; p++) {
if (p && p->detect && p->detect() > 0) {
p->flags |= IOMMU_DETECTED;
if (p->early_init)
p->early_init();
if (p->flags & IOMMU_FINISH_IF_DETECTED)
break;
}
}
}
#endif /* LCD_ISOLATE */
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
unsigned long attrs)
{
unsigned long dma_mask;
struct page *page;
#ifndef LCD_ISOLATE
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
#endif
dma_addr_t addr;
dma_mask = dma_alloc_coherent_mask(dev, flag);
flag &= ~__GFP_ZERO;
again:
page = NULL;
#ifndef LCD_ISOLATE
/* CMA can be used only in the context which permits sleeping */
if (gfpflags_allow_blocking(flag)) {
page = dma_alloc_from_contiguous(dev, count, get_order(size));
if (page && page_to_phys(page) + size > dma_mask) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
}
}
#endif /* LCD_ISOLATE */
/* Always fallback in case of LCD. We don't have CMA */
/* fallback */
if (!page)
page = lcd_alloc_pages_node(dev_to_node(dev), flag, get_order(size));
if (!page)
return NULL;
addr = page_to_phys(page);
if (addr + size > dma_mask) {
lcd_free_pages(page, get_order(size));
if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
flag = (flag & ~GFP_DMA32) | GFP_DMA;
goto again;
}
return NULL;
}
memset(lcd_page_address(page), 0, size);
*dma_addr = addr;
return lcd_page_address(page);
}
void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
#ifndef LCD_ISOLATE
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page = virt_to_page(vaddr);
if (!dma_release_from_contiguous(dev, page, count))
#endif /* LCD_ISOLATE */
__lcd_free_pages((unsigned long)vaddr, get_order(size));
}
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
#ifndef LCD_ISOLATE
if (!*dev)
*dev = &x86_dma_fallback_dev;
*gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
if (!is_device_dma_capable(*dev))
return false;
#endif
return true;
}
EXPORT_SYMBOL(arch_dma_alloc_attrs);
#ifndef LCD_ISOLATE
/*
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
* parameter documentation.
*/
static __init int iommu_setup(char *p)
{
iommu_merge = 1;
if (!p)
return -EINVAL;
while (*p) {
if (!strncmp(p, "off", 3))
no_iommu = 1;
/* gart_parse_options has more force support */
if (!strncmp(p, "force", 5))
force_iommu = 1;
if (!strncmp(p, "noforce", 7)) {
iommu_merge = 0;
force_iommu = 0;
}
if (!strncmp(p, "biomerge", 8)) {
iommu_merge = 1;
force_iommu = 1;
}
if (!strncmp(p, "panic", 5))
panic_on_overflow = 1;
if (!strncmp(p, "nopanic", 7))
panic_on_overflow = 0;
if (!strncmp(p, "merge", 5)) {
iommu_merge = 1;
force_iommu = 1;
}
if (!strncmp(p, "nomerge", 7))
iommu_merge = 0;
if (!strncmp(p, "forcesac", 8))
iommu_sac_force = 1;
if (!strncmp(p, "allowdac", 8))
forbid_dac = 0;
if (!strncmp(p, "nodac", 5))
forbid_dac = 1;
if (!strncmp(p, "usedac", 6)) {
forbid_dac = -1;
return 1;
}
#ifdef CONFIG_SWIOTLB
if (!strncmp(p, "soft", 4))
swiotlb = 1;
#endif
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
gart_parse_options(p);
#ifdef CONFIG_CALGARY_IOMMU
if (!strncmp(p, "calgary", 7))
use_calgary = 1;
#endif /* CONFIG_CALGARY_IOMMU */
p += strcspn(p, ",");
if (*p == ',')
++p;
}
return 0;
}
early_param("iommu", iommu_setup);
#endif /* LCD_ISOLATE */
int dma_supported(struct device *dev, u64 mask)
{
struct dma_map_ops *ops = get_dma_ops(dev);
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
dev_info(dev, "PCI: Disallowing DAC for device\n");
return 0;
}
#endif
if (ops->dma_supported)
return ops->dma_supported(dev, mask);
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
if (mask < DMA_BIT_MASK(24))
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
allows the driver to use cheaper accesses in some cases.
Problem with this is that if we overflow the IOMMU area and
return DAC as fallback address the device may not handle it
correctly.
As a special case some controllers have a 39bit address
mode that is as efficient as 32bit (aic79xx). Don't force
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
dev_info(dev, "Force SAC with mask %Lx\n", mask);
return 0;
}
return 1;
}
EXPORT_SYMBOL(dma_supported);
#ifndef LCD_ISOLATE
static int __init pci_iommu_init(void)
{
struct iommu_table_entry *p;
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
#endif
x86_init.iommu.iommu_init();
for (p = __iommu_table; p < __iommu_table_end; p++) {
if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
p->late_init();
}
return 0;
}
/* Must execute after PCI subsystem */
rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
static void via_no_dac(struct pci_dev *dev)
{
if (forbid_dac == 0) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
forbid_dac = 1;
}
}
DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
PCI_CLASS_BRIDGE_PCI, 8, via_no_dac);
#endif /* CONFIG_PCI */
#endif /* LCD_ISOLATE */
/* Fallback functions when the main IOMMU code is not compiled in. This
code is roughly equivalent to i386. */
#include <lcd_config/pre_hook.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <linux/string.h>
#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/iommu.h>
#include <asm/dma.h>
#include <lcd_config/post_hook.h>
static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
if (hwdev && !dma_capable(hwdev, bus, size)) {
if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
name, (long long)bus, size,
(long long)*hwdev->dma_mask);
return 0;
}
return 1;
}
static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return DMA_ERROR_CODE;
flush_write_buffers();
return bus;
}
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scatter-gather version of the
* above pci_map_single interface. Here the scatter gather list
* elements are each tagged with the appropriate dma address
* and length. They are obtained via sg_dma_{address,length}(SG).
*
* NOTE: An implementation may be able to use a smaller number of
* DMA address/length pairs than there are SG table elements.
* (for example via virtual mapping capabilities)
* The routine returns the number of addr/length pairs actually
* used, at most nents.
*
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
struct scatterlist *s;
int i;
WARN_ON(nents == 0 || sg[0].length == 0);
for_each_sg(sg, s, nents, i) {
BUG_ON(!sg_page(s));
s->dma_address = sg_phys(s);
if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
return 0;
s->dma_length = s->length;
}
flush_write_buffers();
return nents;
}
static void nommu_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
flush_write_buffers();
}
static void nommu_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems,
enum dma_data_direction dir)
{
flush_write_buffers();
}
struct dma_map_ops nommu_dma_ops = {
.alloc = dma_generic_alloc_coherent,
.free = dma_generic_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
};
EXPORT_SYMBOL(nommu_dma_ops);
/*
* Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
*
* Scatterlist handling helpers.
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
#include <lcd_config/pre_hook.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <linux/kmemleak.h>
#include <liblcd/mem.h>
#include <lcd_config/post_hook.h>
/**
* sg_next - return the next scatterlist entry in a list
* @sg: The current sg entry
*
* Description:
* Usually the next entry will be @sg@ + 1, but if this sg element is part
* of a chained scatterlist, it could jump to the start of a new
* scatterlist array.
*
**/
struct scatterlist *sg_next(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
BUG_ON(sg->sg_magic != SG_MAGIC);
#endif
if (sg_is_last(sg))
return NULL;
sg++;
if (unlikely(sg_is_chain(sg)))
sg = sg_chain_ptr(sg);
return sg;
}
EXPORT_SYMBOL(sg_next);
/**
* sg_nents - return total count of entries in scatterlist
* @sg: The scatterlist
*
* Description:
* Allows to know how many entries are in sg, taking into acount
* chaining as well
*
**/
int sg_nents(struct scatterlist *sg)
{
int nents;
for (nents = 0; sg; sg = sg_next(sg))
nents++;
return nents;
}
EXPORT_SYMBOL(sg_nents);
/**
* sg_nents_for_len - return total count of entries in scatterlist
* needed to satisfy the supplied length
* @sg: The scatterlist
* @len: The total required length
*
* Description:
* Determines the number of entries in sg that are required to meet
* the supplied length, taking into acount chaining as well
*
* Returns:
* the number of sg entries needed, negative error on failure
*
**/
int sg_nents_for_len(struct scatterlist *sg, u64 len)
{
int nents;
u64 total;
if (!len)
return 0;
for (nents = 0, total = 0; sg; sg = sg_next(sg)) {
nents++;
total += sg->length;
if (total >= len)
return nents;
}
return -EINVAL;
}
EXPORT_SYMBOL(sg_nents_for_len);
/**
* sg_last - return the last scatterlist entry in a list
* @sgl: First entry in the scatterlist
* @nents: Number of entries in the scatterlist
*
* Description:
* Should only be used casually, it (currently) scans the entire list
* to get the last entry.
*
* Note that the @sgl@ pointer passed in need not be the first one,
* the important bit is that @nents@ denotes the number of entries that
* exist from @sgl@.
*
**/
struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
{
struct scatterlist *sg, *ret = NULL;
unsigned int i;
for_each_sg(sgl, sg, nents, i)
ret = sg;
#ifdef CONFIG_DEBUG_SG
BUG_ON(sgl[0].sg_magic != SG_MAGIC);
BUG_ON(!sg_is_last(ret));
#endif
return ret;
}
EXPORT_SYMBOL(sg_last);
/**
* sg_init_table - Initialize SG table
* @sgl: The SG table
* @nents: Number of entries in table
*
* Notes:
* If this is part of a chained sg table, sg_mark_end() should be
* used only on the last table part.
*
**/
void sg_init_table(struct scatterlist *sgl, unsigned int nents)
{
memset(sgl, 0, sizeof(*sgl) * nents);
#ifdef CONFIG_DEBUG_SG
{
unsigned int i;
for (i = 0; i < nents; i++)
sgl[i].sg_magic = SG_MAGIC;
}
#endif
sg_mark_end(&sgl[nents - 1]);
}
EXPORT_SYMBOL(sg_init_table);
/**
* sg_init_one - Initialize a single entry sg list
* @sg: SG entry
* @buf: Virtual address for IO
* @buflen: IO length
*
**/
void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
{
sg_init_table(sg, 1);
sg_set_buf(sg, buf, buflen);
}
EXPORT_SYMBOL(sg_init_one);
/*
* The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
* helpers.
*/
static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
{
if (nents == SG_MAX_SINGLE_ALLOC) {
/*
* Kmemleak doesn't track page allocations as they are not
* commonly used (in a raw form) for kernel data structures.
* As we chain together a list of pages and then a normal
* kmalloc (tracked by kmemleak), in order to for that last
* allocation not to become decoupled (and thus a
* false-positive) we need to inform kmemleak of all the
* intermediate allocations.
*/
void *ptr = (void *) __get_free_page(gfp_mask);
kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
return ptr;
} else
return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
}
static void sg_kfree(struct scatterlist *sg, unsigned int nents)
{
if (nents == SG_MAX_SINGLE_ALLOC) {
kmemleak_free(sg);
free_page((unsigned long) sg);
} else
kfree(sg);
}
/**
* __sg_free_table - Free a previously mapped sg table
* @table: The sg table header to use
* @max_ents: The maximum number of entries per single scatterlist
* @skip_first_chunk: don't free the (preallocated) first scatterlist chunk
* @free_fn: Free function
*
* Description:
* Free an sg table previously allocated and setup with
* __sg_alloc_table(). The @max_ents value must be identical to
* that previously used with __sg_alloc_table().
*
**/
void __sg_free_table(struct sg_table *table, unsigned int max_ents,
bool skip_first_chunk, sg_free_fn *free_fn)
{
struct scatterlist *sgl, *next;
if (unlikely(!table->sgl))
return;
sgl = table->sgl;
while (table->orig_nents) {
unsigned int alloc_size = table->orig_nents;
unsigned int sg_size;
/*
* If we have more than max_ents segments left,
* then assign 'next' to the sg table after the current one.
* sg_size is then one less than alloc size, since the last
* element is the chain pointer.