Commit 2134a922 authored by Pierre Ossman's avatar Pierre Ossman

sdhci: scatter-gather (ADMA) support

Add support for the scatter-gather DMA mode present on newer controllers.
As the mode requires 32-bit alignment, non-aligned chunks are handled by
using a bounce buffer.

Also add some new quirks to handle controllers that have bugs in the
ADMA engine.
Signed-off-by: default avatarPierre Ossman <drzeus@drzeus.cx>
parent 93fc48c7
......@@ -142,6 +142,7 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
if (chip->pdev->revision == 0) {
chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
SDHCI_QUIRK_32BIT_DMA_SIZE |
SDHCI_QUIRK_32BIT_ADMA_SIZE |
SDHCI_QUIRK_RESET_AFTER_REQUEST;
}
......@@ -206,6 +207,22 @@ static void jmicron_enable_mmc(struct sdhci_host *host, int on)
static int jmicron_probe_slot(struct sdhci_pci_slot *slot)
{
if (slot->chip->pdev->revision == 0) {
u16 version;
version = readl(slot->host->ioaddr + SDHCI_HOST_VERSION);
version = (version & SDHCI_VENDOR_VER_MASK) >>
SDHCI_VENDOR_VER_SHIFT;
/*
* Older versions of the chip have lots of nasty glitches
* in the ADMA engine. It's best just to avoid it
* completely.
*/
if (version < 0xAC)
slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
}
/*
* The secondary interface requires a bit set to get the
* interrupts.
......
......@@ -124,7 +124,8 @@ static void sdhci_init(struct sdhci_host *host)
SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT |
SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL |
SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE;
SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE |
SDHCI_INT_ADMA_ERROR;
writel(intmask, host->ioaddr + SDHCI_INT_ENABLE);
writel(intmask, host->ioaddr + SDHCI_SIGNAL_ENABLE);
......@@ -314,6 +315,196 @@ static void sdhci_transfer_pio(struct sdhci_host *host)
DBG("PIO transfer complete.\n");
}
static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags)
{
local_irq_save(*flags);
return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
}
static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags)
{
kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
local_irq_restore(*flags);
}
static void sdhci_adma_table_pre(struct sdhci_host *host,
struct mmc_data *data)
{
int direction;
u8 *desc;
u8 *align;
dma_addr_t addr;
dma_addr_t align_addr;
int len, offset;
struct scatterlist *sg;
int i;
char *buffer;
unsigned long flags;
/*
* The spec does not specify endianness of descriptor table.
* We currently guess that it is LE.
*/
if (data->flags & MMC_DATA_READ)
direction = DMA_FROM_DEVICE;
else
direction = DMA_TO_DEVICE;
/*
* The ADMA descriptor table is mapped further down as we
* need to fill it with data first.
*/
host->align_addr = dma_map_single(mmc_dev(host->mmc),
host->align_buffer, 128 * 4, direction);
BUG_ON(host->align_addr & 0x3);
host->sg_count = dma_map_sg(mmc_dev(host->mmc),
data->sg, data->sg_len, direction);
desc = host->adma_desc;
align = host->align_buffer;
align_addr = host->align_addr;
for_each_sg(data->sg, sg, host->sg_count, i) {
addr = sg_dma_address(sg);
len = sg_dma_len(sg);
/*
* The SDHCI specification states that ADMA
* addresses must be 32-bit aligned. If they
* aren't, then we use a bounce buffer for
* the (up to three) bytes that screw up the
* alignment.
*/
offset = (4 - (addr & 0x3)) & 0x3;
if (offset) {
if (data->flags & MMC_DATA_WRITE) {
buffer = sdhci_kmap_atomic(sg, &flags);
memcpy(align, buffer, offset);
sdhci_kunmap_atomic(buffer, &flags);
}
desc[7] = (align_addr >> 24) & 0xff;
desc[6] = (align_addr >> 16) & 0xff;
desc[5] = (align_addr >> 8) & 0xff;
desc[4] = (align_addr >> 0) & 0xff;
BUG_ON(offset > 65536);
desc[3] = (offset >> 8) & 0xff;
desc[2] = (offset >> 0) & 0xff;
desc[1] = 0x00;
desc[0] = 0x21; /* tran, valid */
align += 4;
align_addr += 4;
desc += 8;
addr += offset;
len -= offset;
}
desc[7] = (addr >> 24) & 0xff;
desc[6] = (addr >> 16) & 0xff;
desc[5] = (addr >> 8) & 0xff;
desc[4] = (addr >> 0) & 0xff;
BUG_ON(len > 65536);
desc[3] = (len >> 8) & 0xff;
desc[2] = (len >> 0) & 0xff;
desc[1] = 0x00;
desc[0] = 0x21; /* tran, valid */
desc += 8;
/*
* If this triggers then we have a calculation bug
* somewhere. :/
*/
WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4);
}
/*
* Add a terminating entry.
*/
desc[7] = 0;
desc[6] = 0;
desc[5] = 0;
desc[4] = 0;
desc[3] = 0;
desc[2] = 0;
desc[1] = 0x00;
desc[0] = 0x03; /* nop, end, valid */
/*
* Resync align buffer as we might have changed it.
*/
if (data->flags & MMC_DATA_WRITE) {
dma_sync_single_for_device(mmc_dev(host->mmc),
host->align_addr, 128 * 4, direction);
}
host->adma_addr = dma_map_single(mmc_dev(host->mmc),
host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
BUG_ON(host->adma_addr & 0x3);
}
static void sdhci_adma_table_post(struct sdhci_host *host,
struct mmc_data *data)
{
int direction;
struct scatterlist *sg;
int i, size;
u8 *align;
char *buffer;
unsigned long flags;
if (data->flags & MMC_DATA_READ)
direction = DMA_FROM_DEVICE;
else
direction = DMA_TO_DEVICE;
dma_unmap_single(mmc_dev(host->mmc), host->adma_addr,
(128 * 2 + 1) * 4, DMA_TO_DEVICE);
dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
128 * 4, direction);
if (data->flags & MMC_DATA_READ) {
dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg,
data->sg_len, direction);
align = host->align_buffer;
for_each_sg(data->sg, sg, host->sg_count, i) {
if (sg_dma_address(sg) & 0x3) {
size = 4 - (sg_dma_address(sg) & 0x3);
buffer = sdhci_kmap_atomic(sg, &flags);
memcpy(buffer, align, size);
sdhci_kunmap_atomic(buffer, &flags);
align += 4;
}
}
}
dma_unmap_sg(mmc_dev(host->mmc), data->sg,
data->sg_len, direction);
}
static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data)
{
u8 count;
......@@ -363,6 +554,7 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_data *data)
static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
{
u8 count;
u8 ctrl;
WARN_ON(host->data);
......@@ -383,35 +575,104 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
if (host->flags & SDHCI_USE_DMA)
host->flags |= SDHCI_REQ_USE_DMA;
if (unlikely((host->flags & SDHCI_REQ_USE_DMA) &&
(host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) &&
((data->blksz * data->blocks) & 0x3))) {
DBG("Reverting to PIO because of transfer size (%d)\n",
data->blksz * data->blocks);
host->flags &= ~SDHCI_REQ_USE_DMA;
/*
* FIXME: This doesn't account for merging when mapping the
* scatterlist.
*/
if (host->flags & SDHCI_REQ_USE_DMA) {
int broken, i;
struct scatterlist *sg;
broken = 0;
if (host->flags & SDHCI_USE_ADMA) {
if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE)
broken = 1;
} else {
if (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE)
broken = 1;
}
if (unlikely(broken)) {
for_each_sg(data->sg, sg, data->sg_len, i) {
if (sg->length & 0x3) {
DBG("Reverting to PIO because of "
"transfer size (%d)\n",
sg->length);
host->flags &= ~SDHCI_REQ_USE_DMA;
break;
}
}
}
}
/*
* The assumption here being that alignment is the same after
* translation to device address space.
*/
if (unlikely((host->flags & SDHCI_REQ_USE_DMA) &&
(host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR) &&
(data->sg->offset & 0x3))) {
DBG("Reverting to PIO because of bad alignment\n");
host->flags &= ~SDHCI_REQ_USE_DMA;
if (host->flags & SDHCI_REQ_USE_DMA) {
int broken, i;
struct scatterlist *sg;
broken = 0;
if (host->flags & SDHCI_USE_ADMA) {
/*
* As we use 3 byte chunks to work around
* alignment problems, we need to check this
* quirk.
*/
if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE)
broken = 1;
} else {
if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR)
broken = 1;
}
if (unlikely(broken)) {
for_each_sg(data->sg, sg, data->sg_len, i) {
if (sg->offset & 0x3) {
DBG("Reverting to PIO because of "
"bad alignment\n");
host->flags &= ~SDHCI_REQ_USE_DMA;
break;
}
}
}
}
/*
* Always adjust the DMA selection as some controllers
* (e.g. JMicron) can't do PIO properly when the selection
* is ADMA.
*/
if (host->version >= SDHCI_SPEC_200) {
ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL);
ctrl &= ~SDHCI_CTRL_DMA_MASK;
if ((host->flags & SDHCI_REQ_USE_DMA) &&
(host->flags & SDHCI_USE_ADMA))
ctrl |= SDHCI_CTRL_ADMA32;
else
ctrl |= SDHCI_CTRL_SDMA;
writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
}
if (host->flags & SDHCI_REQ_USE_DMA) {
int count;
if (host->flags & SDHCI_USE_ADMA) {
sdhci_adma_table_pre(host, data);
writel(host->adma_addr,
host->ioaddr + SDHCI_ADMA_ADDRESS);
} else {
int count;
count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
(data->flags & MMC_DATA_READ) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
WARN_ON(count != 1);
count = dma_map_sg(mmc_dev(host->mmc),
data->sg, data->sg_len,
(data->flags & MMC_DATA_READ) ?
DMA_FROM_DEVICE :
DMA_TO_DEVICE);
WARN_ON(count != 1);
writel(sg_dma_address(data->sg),
host->ioaddr + SDHCI_DMA_ADDRESS);
writel(sg_dma_address(data->sg),
host->ioaddr + SDHCI_DMA_ADDRESS);
}
} else {
host->cur_sg = data->sg;
host->num_sg = data->sg_len;
......@@ -457,9 +718,13 @@ static void sdhci_finish_data(struct sdhci_host *host)
host->data = NULL;
if (host->flags & SDHCI_REQ_USE_DMA) {
dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
(data->flags & MMC_DATA_READ) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
if (host->flags & SDHCI_USE_ADMA)
sdhci_adma_table_post(host, data);
else {
dma_unmap_sg(mmc_dev(host->mmc), data->sg,
data->sg_len, (data->flags & MMC_DATA_READ) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
}
}
/*
......@@ -1008,6 +1273,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
host->data->error = -ETIMEDOUT;
else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT))
host->data->error = -EILSEQ;
else if (intmask & SDHCI_INT_ADMA_ERROR)
host->data->error = -EIO;
if (host->data->error)
sdhci_finish_data(host);
......@@ -1199,7 +1466,6 @@ int sdhci_add_host(struct sdhci_host *host)
{
struct mmc_host *mmc;
unsigned int caps;
unsigned int version;
int ret;
WARN_ON(host == NULL);
......@@ -1213,12 +1479,13 @@ int sdhci_add_host(struct sdhci_host *host)
sdhci_reset(host, SDHCI_RESET_ALL);
version = readw(host->ioaddr + SDHCI_HOST_VERSION);
version = (version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT;
if (version > 1) {
host->version = readw(host->ioaddr + SDHCI_HOST_VERSION);
host->version = (host->version & SDHCI_SPEC_VER_MASK)
>> SDHCI_SPEC_VER_SHIFT;
if (host->version > SDHCI_SPEC_200) {
printk(KERN_ERR "%s: Unknown controller version (%d). "
"You may experience problems.\n", mmc_hostname(mmc),
version);
host->version);
}
caps = readl(host->ioaddr + SDHCI_CAPABILITIES);
......@@ -1236,17 +1503,47 @@ int sdhci_add_host(struct sdhci_host *host)
host->flags &= ~SDHCI_USE_DMA;
}
if (host->flags & SDHCI_USE_DMA) {
if ((host->version >= SDHCI_SPEC_200) &&
(caps & SDHCI_CAN_DO_ADMA2))
host->flags |= SDHCI_USE_ADMA;
}
if ((host->quirks & SDHCI_QUIRK_BROKEN_ADMA) &&
(host->flags & SDHCI_USE_ADMA)) {
DBG("Disabling ADMA as it is marked broken\n");
host->flags &= ~SDHCI_USE_ADMA;
}
if (host->flags & SDHCI_USE_DMA) {
if (host->ops->enable_dma) {
if (host->ops->enable_dma(host)) {
printk(KERN_WARNING "%s: No suitable DMA "
"available. Falling back to PIO.\n",
mmc_hostname(mmc));
host->flags &= ~SDHCI_USE_DMA;
host->flags &= ~(SDHCI_USE_DMA | SDHCI_USE_ADMA);
}
}
}
if (host->flags & SDHCI_USE_ADMA) {
/*
* We need to allocate descriptors for all sg entries
* (128) and potentially one alignment transfer for
* each of those entries.
*/
host->adma_desc = kmalloc((128 * 2 + 1) * 4, GFP_KERNEL);
host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
if (!host->adma_desc || !host->align_buffer) {
kfree(host->adma_desc);
kfree(host->align_buffer);
printk(KERN_WARNING "%s: Unable to allocate ADMA "
"buffers. Falling back to standard DMA.\n",
mmc_hostname(mmc));
host->flags &= ~SDHCI_USE_ADMA;
}
}
/* XXX: Hack to get MMC layer to avoid highmem */
if (!(host->flags & SDHCI_USE_DMA))
mmc_dev(host->mmc)->dma_mask = 0;
......@@ -1298,13 +1595,16 @@ int sdhci_add_host(struct sdhci_host *host)
spin_lock_init(&host->lock);
/*
* Maximum number of segments. Hardware cannot do scatter lists.
* Maximum number of segments. Depends on if the hardware
* can do scatter/gather or not.
*/
if (host->flags & SDHCI_USE_DMA)
if (host->flags & SDHCI_USE_ADMA)
mmc->max_hw_segs = 128;
else if (host->flags & SDHCI_USE_DMA)
mmc->max_hw_segs = 1;
else
mmc->max_hw_segs = 16;
mmc->max_phys_segs = 16;
else /* PIO */
mmc->max_hw_segs = 128;
mmc->max_phys_segs = 128;
/*
* Maximum number of sectors in one transfer. Limited by DMA boundary
......@@ -1314,9 +1614,13 @@ int sdhci_add_host(struct sdhci_host *host)
/*
* Maximum segment size. Could be one segment with the maximum number
* of bytes.
* of bytes. When doing hardware scatter/gather, each entry cannot
* be larger than 64 KiB though.
*/
mmc->max_seg_size = mmc->max_req_size;
if (host->flags & SDHCI_USE_ADMA)
mmc->max_seg_size = 65536;
else
mmc->max_seg_size = mmc->max_req_size;
/*
* Maximum block size. This varies from controller to controller and
......@@ -1371,8 +1675,9 @@ int sdhci_add_host(struct sdhci_host *host)
mmc_add_host(mmc);
printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s\n",
printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n",
mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id,
(host->flags & SDHCI_USE_ADMA)?"A":"",
(host->flags & SDHCI_USE_DMA)?"DMA":"PIO");
return 0;
......@@ -1426,6 +1731,12 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
tasklet_kill(&host->card_tasklet);
tasklet_kill(&host->finish_tasklet);
kfree(host->adma_desc);
kfree(host->align_buffer);
host->adma_desc = NULL;
host->align_buffer = NULL;
}
EXPORT_SYMBOL_GPL(sdhci_remove_host);
......
......@@ -60,6 +60,11 @@
#define SDHCI_CTRL_LED 0x01
#define SDHCI_CTRL_4BITBUS 0x02
#define SDHCI_CTRL_HISPD 0x04
#define SDHCI_CTRL_DMA_MASK 0x18
#define SDHCI_CTRL_SDMA 0x00
#define SDHCI_CTRL_ADMA1 0x08
#define SDHCI_CTRL_ADMA32 0x10
#define SDHCI_CTRL_ADMA64 0x18
#define SDHCI_POWER_CONTROL 0x29
#define SDHCI_POWER_ON 0x01
......@@ -105,6 +110,7 @@
#define SDHCI_INT_DATA_END_BIT 0x00400000
#define SDHCI_INT_BUS_POWER 0x00800000
#define SDHCI_INT_ACMD12ERR 0x01000000
#define SDHCI_INT_ADMA_ERROR 0x02000000
#define SDHCI_INT_NORMAL_MASK 0x00007FFF
#define SDHCI_INT_ERROR_MASK 0xFFFF8000
......@@ -128,11 +134,14 @@
#define SDHCI_CLOCK_BASE_SHIFT 8
#define SDHCI_MAX_BLOCK_MASK 0x00030000
#define SDHCI_MAX_BLOCK_SHIFT 16
#define SDHCI_CAN_DO_ADMA2 0x00080000
#define SDHCI_CAN_DO_ADMA1 0x00100000
#define SDHCI_CAN_DO_HISPD 0x00200000
#define SDHCI_CAN_DO_DMA 0x00400000
#define SDHCI_CAN_VDD_330 0x01000000
#define SDHCI_CAN_VDD_300 0x02000000
#define SDHCI_CAN_VDD_180 0x04000000
#define SDHCI_CAN_64BIT 0x10000000
/* 44-47 reserved for more caps */
......@@ -140,7 +149,16 @@
/* 4C-4F reserved for more max current */
/* 50-FB reserved */
#define SDHCI_SET_ACMD12_ERROR 0x50
#define SDHCI_SET_INT_ERROR 0x52
#define SDHCI_ADMA_ERROR 0x54
/* 55-57 reserved */
#define SDHCI_ADMA_ADDRESS 0x58
/* 60-FB reserved */
#define SDHCI_SLOT_INT_STATUS 0xFC
......@@ -149,6 +167,8 @@
#define SDHCI_VENDOR_VER_SHIFT 8
#define SDHCI_SPEC_VER_MASK 0x00FF
#define SDHCI_SPEC_VER_SHIFT 0
#define SDHCI_SPEC_100 0
#define SDHCI_SPEC_200 1
struct sdhci_ops;
......@@ -170,16 +190,20 @@ struct sdhci_host {
#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4)
/* Controller has an unusable DMA engine */
#define SDHCI_QUIRK_BROKEN_DMA (1<<5)
/* Controller has an unusable ADMA engine */
#define SDHCI_QUIRK_BROKEN_ADMA (1<<6)
/* Controller can only DMA from 32-bit aligned addresses */
#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<6)
#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<7)
/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<7)
#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<8)
/* Controller can only ADMA chunks that are a multiple of 32 bits */
#define SDHCI_QUIRK_32BIT_ADMA_SIZE (1<<9)
/* Controller needs to be reset after each request to stay stable */
#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<8)
#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<10)
/* Controller needs voltage and power writes to happen separately */
#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<9)
#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11)
/* Controller provides an incorrect timeout value for transfers */
#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<10)
#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12)
int irq; /* Device IRQ */
void __iomem * ioaddr; /* Mapped address */
......@@ -197,8 +221,11 @@ struct sdhci_host {
int flags; /* Host attributes */
#define SDHCI_USE_DMA (1<<0) /* Host is DMA capable */
#define SDHCI_REQ_USE_DMA (1<<1) /* Use DMA for this req. */
#define SDHCI_DEVICE_DEAD (1<<2) /* Device unresponsive */
#define SDHCI_USE_ADMA (1<<1) /* Host is ADMA capable */
#define SDHCI_REQ_USE_DMA (1<<2) /* Use DMA for this req. */
#define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */
unsigned int version; /* SDHCI spec. version */
unsigned int max_clk; /* Max possible freq (MHz) */
unsigned int timeout_clk; /* Timeout freq (KHz) */
......@@ -216,6 +243,14 @@ struct sdhci_host {
int offset; /* Offset into current sg */
int remain;